Apache-Wyrd
view release on metacpan or search on metacpan
Wyrd/Services/SAK.pm view on Meta::CPAN
my $number = shift;
1 while ($number =~ s/^([-+]?\d+)(\d{3})/$1,$2/);
return $number;
}
=pod
=item (scalar) C<strip_html>(scalar)
Escape out entities and strip tags from a given string.
=cut
sub strip_html {
my ($data) = @_;
$data = decode_entities($data);
$data =~ s/<>//g; # Strip out all empty tags
$data =~ s/<--.*?-->/ /g; # Strip out all comments
$data =~ s/<[^>]*?>/ /g; # Strip out all HTML tags
return $data;
}
=pod
=item (scalar) C<utf8_force>(scalar)
Attempt to decode the text into UTF-8 by trying different common encodings
until one returns valid UTF-8.
=cut
sub utf8_force {
my ($text) = @_;
my $success = 0;
if (utf8::valid($text)) {
utf8::upgrade($text);
return $text;
}
for my $encoding (qw(windows-1252 MacRoman Latin-1 Latin-9)) {
my $trial_data = $text;
eval {
from_to($encoding, 'utf8', $trial_data, Encode::FB_HTMLCREF);
};
if (not($@) && utf8::valid($trial_data)) {
$text = $trial_data;
$success = 1;
last;
}
}
unless ($success) {
carp "Unable to encode as UTF8";
}
return $text;
}
=pod
=item (scalar) C<utf8_to_entities>(scalar)
Seek through the given text for Unicode byte sequences and replace them with
numbered entities for that unicode character. Assumes the text is properly-
formatted UTF8.
=cut
sub utf8_to_entities {
my ($text) = @_;
use Encode qw(_utf8_off);
_utf8_off($text);
while ($text =~ /(([\xC0-\xFF])([\x80-\xFF]{1,5}))/) {
#store the sequence for later;
my $unicode_sequence = $1;
#separate the first byte from the others
my ($first, $second) = ($2, $3);
#split remaining bytes and count them
my @parts = split '', $second;
my $count = @parts;
#remove the appropriate number of bits from the high end of the first
#byte (3 for 2 bytes, 4 for 3, etc) and use that for the first part of
#the 32-bit binary number
$first = substr(sprintf("%b", ord($first)), $count + 2, 6 - $count);
my $full = $first;
#Remove the two highest bits from the remaining bytes and concatenate
#the result with the first part
foreach my $part (@parts) {
$part = substr(sprintf("%b", ord($part)),2,6);
$full .= $part;
}
#Left-fill with zeroes to make a full 32 bit binary number
$full = substr(0 x 32 . $full, -32);
#Turn the binary number into a 32-bit unsigned integer value
my $hex_number = sprintf('%04X', unpack("N", pack("B32", $full)));
#Replace all instances of that byte sequence found in the text with a
#numbered entity sequence
$text =~ s/$unicode_sequence/&#x$hex_number;/g;
}
return $text;
}
=pod
=back
=head2 TAGS (:tag)
Tag-generation tools.
=over
=item (scalar) C<attopts_template> (array)
Creates a template of attribute options, given an array of the attributes.
=cut
sub attopts_template {
my @opts = @_;
my $string = '';
foreach my $opt (@opts) {
$string .= '?:' . $opt . '{ $:' . $opt . '}';
}
}
=pod
=back
=head1 AUTHOR
Barry King E<lt>wyrd@nospam.wyrdwright.comE<gt>
=head1 SEE ALSO
=over
=item Apache::Wyrd
General-purpose HTML-embeddable perl object
=back
=head1 LICENSE
Copyright 2002-2007 Wyrdwright, Inc. and licensed under the GNU GPL.
See LICENSE under the documentation for C<Apache::Wyrd>.
=cut
1;
( run in 1.768 second using v1.01-cache-2.11-cpan-5837b0d9d2c )