utf8 results from the CPAN

Apache-LoggedAuthDBI

view release on metacpan or search on metacpan

    my ($a, $b) = @_;
    unless (defined $a and defined $b) {             # one undef
	return ""
		if !defined $a and !defined $b;
	return "String a is undef, string b has ".length($b)." characters"
		if !defined $a;
	return "String b is undef, string a has ".length($a)." characters"
		if !defined $b;
    }

    require utf8;
    # hack to cater for perl 5.6
    *utf8::is_utf8 = sub { (DBI::neat(shift)=~/^"/) } unless defined &utf8::is_utf8;

    my @a_chars = (utf8::is_utf8($a)) ? unpack("U*", $a) : unpack("C*", $a);
    my @b_chars = (utf8::is_utf8($b)) ? unpack("U*", $b) : unpack("C*", $b);
    my $i = 0;
    while (@a_chars && @b_chars) {
	++$i, shift(@a_chars), shift(@b_chars), next
	    if $a_chars[0] == $b_chars[0];# compare ordinal values
	my @desc = map {
	    $_ > 255 ?                    # if wide character...
	      sprintf("\\x{%04X}", $_) :  # \x{...}
	      chr($_) =~ /[[:cntrl:]]/ ?  # else if control character ...
	      sprintf("\\x%02X", $_) :    # \x..
	      chr($_)                     # else as themselves

DBI.pm view on Meta::CPAN

    }
    return "String a truncated after $i characters" if @b_chars;
    return "String b truncated after $i characters" if @a_chars;
    return "";
}


sub data_string_desc {	# describe a data string
    my ($a) = @_;
    require bytes;
    require utf8;

    # hacks to cater for perl 5.6
    *utf8::is_utf8 = sub { (DBI::neat(shift)=~/^"/) } unless defined &utf8::is_utf8;
    *utf8::valid   = sub {                        1 } unless defined &utf8::valid;

    # Give sufficient info to help diagnose at least these kinds of situations:
    # - valid UTF8 byte sequence but UTF8 flag not set
    #   (might be ascii so also need to check for hibit to make it worthwhile)
    # - UTF8 flag set but invalid UTF8 byte sequence
    # could do better here, but this'll do for now
    my $utf8 = sprintf "UTF8 %s%s", 
	utf8::is_utf8($a) ? "on" : "off",
	utf8::valid($a||'') ? "" : " but INVALID encoding";
    return "$utf8, undef" unless defined $a;
    my $is_ascii = $a =~ m/^[\000-\177]*$/;
    return sprintf "%s, %s, %d characters %d bytes",
	$utf8, $is_ascii ? "ASCII" : "non-ASCII",
	length($a), bytes::length($a);
}


sub connect_test_perf {
    my($class, $dsn,$dbuser,$dbpass, $attr) = @_;
	Carp::croak("connect_test_perf needs hash ref as fourth arg") unless ref $attr;
    # these are non standard attributes just for this special method
    my $loops ||= $attr->{dbi_loops} || 5;
    my $par   ||= $attr->{dbi_par}   || 1;	# parallelism

DBI.pm view on Meta::CPAN

  $str = neat($value);
  $str = neat($value, $maxlen);

Return a string containing a neat (and tidy) representation of the
supplied value.

Strings will be quoted, although internal quotes will I<not> be escaped.
Values known to be numeric will be unquoted. Undefined (NULL) values
will be shown as C<undef> (without quotes).

If the string is flagged internally as utf8 then double quotes will
be used, otherwise single quotes are used and unprintable characters
will be replaced by dot (.).

For result strings longer than C<$maxlen> the result string will be
truncated to C<$maxlen-4> and "C<...'>" will be appended.  If C<$maxlen> is 0
or C<undef>, it defaults to C<$DBI::neat_maxlen> which, in turn, defaults to 400.

This function is designed to format values for human consumption.
It is used internally by the DBI for L</trace> output. It should
typically I<not> be used for formatting values for database use.

MD5.pm view on Meta::CPAN

data:

    use Digest::MD5 qw(md5_hex);

    my $str = "abc\x{300}";
    print md5_hex($str), "\n";  # croaks
    # Wide character in subroutine entry

What you can do is calculate the MD5 checksum of the UTF-8
representation of such strings.  This is achieved by filtering the
string through encode_utf8() function:

    use Digest::MD5 qw(md5_hex);
    use Encode qw(encode_utf8);

    my $str = "abc\x{300}";
    print md5_hex(encode_utf8($str)), "\n";
    # 8c2d46911f3f5a326455f0ed7a8ed3b3

=head1 SEE ALSO

L<Digest>,
L<Digest::MD2>,
L<Digest::SHA1>,
L<Digest::HMAC>

L<md5sum(1)>

( run in 0.228 second using v1.01-cache-2.11-cpan-0f795438458 )