Apache-Wyrd

 view release on metacpan or  search on metacpan

Wyrd/Interfaces/Setter.pm  view on Meta::CPAN

	my ($self, $hash, $string) = @_;
	my $changed = 0; #toggle: if there is nothing left to change, it's time to return
	my $mode = 's'; #(s)eek a conditional (c)onfirm that it is a conditional, com(p)lete the expression
	my $state = '?'; #keep the argument or discard it
	my $buf = ''; #buffer for temp storage of the conditional
	my $out = ''; #buffer for the completed expression
	my $depth = 0; #how many layers of conditionals are we at?
	do {
		$changed = 0;
		foreach my $char (unpack('U*', $string)) {
			$char = chr($char);#returns unicode
			if ($mode eq 's') {#always begin by seeking
				if ($char eq '?' or $char eq '!') {
					$buf = '';
					$buf .= $char;
					$mode = 'c';
					$state = $char;
				} else {
					$out .= $char;
				}
			}

Wyrd/Services/Index.pm  view on Meta::CPAN

	$self->set_error("Could not delete key: " . $result);
	$self->check_error;
}

sub process_html {
	my ($self, $id, $data) = @_;

	return undef if ($self->{'runtime_flags'}->{'no_data'});

	#Remove all punctuation noise from the data and turn all control characters
	#and unicode into entities
	$data = $self->clean_html($data);

	#if we're doing bigfiles, we get a chance to override the re-indexing
	#of large swaths of data if there has been no change to the html of the
	#indexed object
	if ($self->bigfile and length($data) >= 2048) {
		$self->db->db_get("\x03\%$id", my $old_key);
		$old_key =~ s/^\x00://;
		my $current_key = sha1_hex($data);
		if ($current_key ne $old_key) {

Wyrd/Services/SAK.pm  view on Meta::CPAN

		carp "Unable to encode as UTF8";
	}
	return $text;
}

=pod

=item (scalar) C<utf8_to_entities>(scalar)

Seek through the given text for Unicode byte sequences and replace them with
numbered entities for that unicode character.  Assumes the text is properly-
formatted UTF8.

=cut


sub utf8_to_entities {
	my ($text) = @_;
	use Encode qw(_utf8_off);
	_utf8_off($text);
	while ($text =~ /(([\xC0-\xFF])([\x80-\xFF]{1,5}))/) {

		#store the sequence for later;
		my $unicode_sequence = $1;

		#separate the first byte from the others
		my ($first, $second) = ($2, $3);

		#split remaining bytes and count them
		my @parts = split '', $second;
		my $count = @parts;

		#remove the appropriate number of bits from the high end of the first
		#byte (3 for 2 bytes, 4 for 3, etc) and use that for the first part of

Wyrd/Services/SAK.pm  view on Meta::CPAN

		}

		#Left-fill with zeroes to make a full 32 bit binary number
		$full =  substr(0 x 32 . $full, -32);

		#Turn the binary number into a 32-bit unsigned integer value
		my $hex_number = sprintf('%04X', unpack("N", pack("B32", $full)));

		#Replace all instances of that byte sequence found in the text with a
		#numbered entity sequence
		$text =~ s/$unicode_sequence/&#x$hex_number;/g;
	}
	return $text;
}

=pod

=back

=head2 TAGS (:tag)



( run in 0.278 second using v1.01-cache-2.11-cpan-88abd93f124 )