Apache-Wyrd

 view release on metacpan or  search on metacpan

Wyrd/Services/SAK.pm  view on Meta::CPAN

	my $number = shift;
	1 while ($number =~ s/^([-+]?\d+)(\d{3})/$1,$2/);
	return $number;
}

=pod

=item (scalar) C<strip_html>(scalar)

Escape out entities and strip tags from a given string.

=cut

sub strip_html {
	my ($data) = @_;
	$data = decode_entities($data);
	$data =~ s/<>//g; # Strip out all empty tags
	$data =~ s/<--.*?-->/ /g; # Strip out all comments
	$data =~ s/<[^>]*?>/ /g; # Strip out all HTML tags
	return $data;
}

=pod

=item (scalar) C<utf8_force>(scalar)

Attempt to decode the text into UTF-8 by trying different common encodings
until one returns valid UTF-8.

=cut

sub utf8_force {
	my ($text) = @_;
	my $success = 0;
	if (utf8::valid($text)) {
		utf8::upgrade($text);
		return $text;
	}
	for my $encoding (qw(windows-1252 MacRoman Latin-1 Latin-9)) {
		my $trial_data = $text;
		eval {
			from_to($encoding, 'utf8', $trial_data, Encode::FB_HTMLCREF);
		};
		if (not($@) && utf8::valid($trial_data)) {
			$text = $trial_data;
			$success = 1;
			last;
		}
	}
	unless ($success) {
		carp "Unable to encode as UTF8";
	}
	return $text;
}

=pod

=item (scalar) C<utf8_to_entities>(scalar)

Seek through the given text for Unicode byte sequences and replace them with
numbered entities for that unicode character.  Assumes the text is properly-
formatted UTF8.

=cut


sub utf8_to_entities {
	my ($text) = @_;
	use Encode qw(_utf8_off);
	_utf8_off($text);
	while ($text =~ /(([\xC0-\xFF])([\x80-\xFF]{1,5}))/) {

		#store the sequence for later;
		my $unicode_sequence = $1;

		#separate the first byte from the others
		my ($first, $second) = ($2, $3);

		#split remaining bytes and count them
		my @parts = split '', $second;
		my $count = @parts;

		#remove the appropriate number of bits from the high end of the first
		#byte (3 for 2 bytes, 4 for 3, etc) and use that for the first part of
		#the 32-bit binary number
		$first = substr(sprintf("%b", ord($first)), $count + 2, 6 - $count);
		my $full = $first;

		#Remove the two highest bits from the remaining bytes and concatenate
		#the result with the first part
		foreach my $part (@parts) {
			$part = substr(sprintf("%b", ord($part)),2,6);
			$full .= $part;
		}

		#Left-fill with zeroes to make a full 32 bit binary number
		$full =  substr(0 x 32 . $full, -32);

		#Turn the binary number into a 32-bit unsigned integer value
		my $hex_number = sprintf('%04X', unpack("N", pack("B32", $full)));

		#Replace all instances of that byte sequence found in the text with a
		#numbered entity sequence
		$text =~ s/$unicode_sequence/&#x$hex_number;/g;
	}
	return $text;
}

=pod

=back

=head2 TAGS (:tag)

Tag-generation tools.

=over

=item (scalar) C<attopts_template> (array)

Creates a template of attribute options, given an array of the attributes.

=cut

sub attopts_template {
	my @opts = @_;
	my $string = '';
	foreach my $opt (@opts) {
		$string .= '?:' . $opt . '{ $:' . $opt . '}';
	}
}

=pod

=back

=head1 AUTHOR

Barry King E<lt>wyrd@nospam.wyrdwright.comE<gt>

=head1 SEE ALSO

=over

=item Apache::Wyrd

General-purpose HTML-embeddable perl object

=back

=head1 LICENSE

Copyright 2002-2007 Wyrdwright, Inc. and licensed under the GNU GPL.

See LICENSE under the documentation for C<Apache::Wyrd>.

=cut

1;



( run in 1.768 second using v1.01-cache-2.11-cpan-5837b0d9d2c )