utf8 results from the CPAN

Apache-LoggedAuthDBI

view release on metacpan or search on metacpan

}


sub neat_list {
    my ($listref, $maxlen, $sep) = @_;
    $maxlen = 0 unless defined $maxlen;	# 0 == use internal default
    $sep = ", " unless defined $sep;
    join($sep, map { neat($_,$maxlen) } @$listref);
}


sub dump_results {	# also aliased as a method in DBD::_::st
    my ($sth, $maxlen, $lsep, $fsep, $fh) = @_;
    return 0 unless $sth;
    $maxlen ||= 35;
    $lsep   ||= "\n";
    $fh ||= \*STDOUT;
    my $rows = 0;
    my $ref;
    while($ref = $sth->fetch) {
	print $fh $lsep if $rows++ and $lsep;
	my $str = neat_list($ref,$maxlen,$fsep);
	print $fh $str;	# done on two lines to avoid 5.003 errors
    }
    print $fh "\n$rows rows".($DBI::err ? " ($DBI::err: $DBI::errstr)" : "")."\n";
    $rows;
}


sub data_diff {
    my ($a, $b, $logical) = @_;

    my $diff   = data_string_diff($a, $b);
    return "" if $logical and !$diff;

    my $a_desc = data_string_desc($a);
    my $b_desc = data_string_desc($b);
    return "" if !$diff and $a_desc eq $b_desc;

    $diff ||= "Strings contain the same sequence of characters"
    	if length($a);
    $diff .= "\n" if $diff;
    return "a: $a_desc\nb: $b_desc\n$diff";
}
    

sub data_string_diff {
    # Compares 'logical' characters, not bytes, so a latin1 string and an
    # an equivalent unicode string will compare as equal even though their
    # byte encodings are different.
    my ($a, $b) = @_;
    unless (defined $a and defined $b) {             # one undef
	return ""
		if !defined $a and !defined $b;
	return "String a is undef, string b has ".length($b)." characters"
		if !defined $a;
	return "String b is undef, string a has ".length($a)." characters"
		if !defined $b;
    }

    require utf8;
    # hack to cater for perl 5.6
    *utf8::is_utf8 = sub { (DBI::neat(shift)=~/^"/) } unless defined &utf8::is_utf8;

    my @a_chars = (utf8::is_utf8($a)) ? unpack("U*", $a) : unpack("C*", $a);
    my @b_chars = (utf8::is_utf8($b)) ? unpack("U*", $b) : unpack("C*", $b);
    my $i = 0;
    while (@a_chars && @b_chars) {
	++$i, shift(@a_chars), shift(@b_chars), next
	    if $a_chars[0] == $b_chars[0];# compare ordinal values
	my @desc = map {
	    $_ > 255 ?                    # if wide character...
	      sprintf("\\x{%04X}", $_) :  # \x{...}
	      chr($_) =~ /[[:cntrl:]]/ ?  # else if control character ...
	      sprintf("\\x%02X", $_) :    # \x..
	      chr($_)                     # else as themselves
	} ($a_chars[0], $b_chars[0]);
	# highlight probable double-encoding?
        foreach my $c ( @desc ) {
	    next unless $c =~ m/\\x\{08(..)}/;
	    $c .= "='" .chr(hex($1)) ."'"
	}
	return sprintf "Strings differ at index $i: a[$i]=$desc[0], b[$i]=$desc[1]";
    }
    return "String a truncated after $i characters" if @b_chars;
    return "String b truncated after $i characters" if @a_chars;
    return "";
}


sub data_string_desc {	# describe a data string
    my ($a) = @_;
    require bytes;
    require utf8;

    # hacks to cater for perl 5.6
    *utf8::is_utf8 = sub { (DBI::neat(shift)=~/^"/) } unless defined &utf8::is_utf8;
    *utf8::valid   = sub {                        1 } unless defined &utf8::valid;

    # Give sufficient info to help diagnose at least these kinds of situations:
    # - valid UTF8 byte sequence but UTF8 flag not set
    #   (might be ascii so also need to check for hibit to make it worthwhile)
    # - UTF8 flag set but invalid UTF8 byte sequence
    # could do better here, but this'll do for now
    my $utf8 = sprintf "UTF8 %s%s", 
	utf8::is_utf8($a) ? "on" : "off",
	utf8::valid($a||'') ? "" : " but INVALID encoding";
    return "$utf8, undef" unless defined $a;
    my $is_ascii = $a =~ m/^[\000-\177]*$/;
    return sprintf "%s, %s, %d characters %d bytes",
	$utf8, $is_ascii ? "ASCII" : "non-ASCII",
	length($a), bytes::length($a);
}


sub connect_test_perf {
    my($class, $dsn,$dbuser,$dbpass, $attr) = @_;
	Carp::croak("connect_test_perf needs hash ref as fourth arg") unless ref $attr;
    # these are non standard attributes just for this special method
    my $loops ||= $attr->{dbi_loops} || 5;
    my $par   ||= $attr->{dbi_par}   || 1;	# parallelism
    my $verb  ||= $attr->{dbi_verb}  || 1;
    print "$dsn: testing $loops sets of $par connections:\n";
    require Benchmark;
    require "FileHandle.pm";	# don't let toke.c create empty FileHandle package
    $| = 1;
    my $t0 = new Benchmark;		# not currently used
    my $drh = $class->install_driver($dsn) or Carp::croak("Can't install $dsn driver\n");
    my $t1 = new Benchmark;
    my $loop;
    for $loop (1..$loops) {
	my @cons;
	print "Connecting... " if $verb;
	for (1..$par) {
	    print "$_ ";
	    push @cons, ($drh->connect($dsn,$dbuser,$dbpass)
		    or Carp::croak("Can't connect # $_: $DBI::errstr\n"));
	}
	print "\nDisconnecting...\n" if $verb;
	for (@cons) {
	    $_->disconnect or warn "bad disconnect $DBI::errstr"
	}
    }
    my $t2 = new Benchmark;
    my $td = Benchmark::timediff($t2, $t1);
    printf "Made %2d connections in %s\n", $loops*$par, Benchmark::timestr($td);
	print "\n";
    return $td;
}


# Help people doing DBI->errstr, might even document it one day
# XXX probably best moved to cheaper XS code
sub err    { $DBI::err    }
sub errstr { $DBI::errstr }


# --- Private Internal Function for Creating New DBI Handles

sub _new_handle {
    my ($class, $parent, $attr, $imp_data, $imp_class) = @_;

    Carp::croak('Usage: DBI::_new_handle'
	    .'($class_name, parent_handle, \%attr, $imp_data)'."\n"
	    .'got: ('.join(", ",$class, $parent, $attr, $imp_data).")\n")
	unless (@_ == 5	and (!$parent or ref $parent)
			and ref $attr eq 'HASH'
			and $imp_class);

    $attr->{ImplementorClass} = $imp_class
	or Carp::croak("_new_handle($class): 'ImplementorClass' attribute not given");

DBI.pm view on Meta::CPAN


  $diff = data_string_diff($a, $b);

Returns an informal description of the first character difference
between the strings. If both $a and $b contain the same sequence
of characters then data_string_diff() returns an empty string.
For example:

 Params a & b     Result
 ------------     ------
 'aaa', 'aaa'     ''
 'aaa', 'abc'     'Strings differ at index 2: a[2]=a, b[2]=b'
 'aaa', undef     'String b is undef, string a has 3 characters'
 'aaa', 'aa'      'String b truncated after 2 characters'

Unicode characters are reported in C<\x{XXXX}> format. Unicode
code points in the range U+0800 to U+08FF are unassigned and most
likely to occur due to double-encoding. Characters in this range
are reported as C<\x{08XX}='C'> where C<C> is the corresponding
latin-1 character.

The data_string_diff() function only considers logical I<characters>
and not the underlying encoding. See L</data_diff> for an alternative.

The data_string_diff() function was added in DBI 1.46.

=item C<data_diff>

  $diff = data_diff($a, $b);
  $diff = data_diff($a, $b, $logical);

Returns an informal description of the difference between two strings.
It calls L</data_string_desc> and L</data_string_diff>
and returns the combined results as a multi-line string.

For example, C<data_diff("abc", "ab\x{263a}")> will return:

  a: UTF8 off, ASCII, 3 characters 3 bytes
  b: UTF8 on, non-ASCII, 3 characters 5 bytes
  Strings differ at index 2: a[2]=c, b[2]=\x{263A}

If $a and $b are identical in both the characters they contain I<and>
their physical encoding then data_diff() returns an empty string.
If $logical is true then physical encoding differences are ignored
(but are still reported if there is a difference in the characters).

The data_diff() function was added in DBI 1.46.

=item C<neat>

  $str = neat($value);
  $str = neat($value, $maxlen);

Return a string containing a neat (and tidy) representation of the
supplied value.

Strings will be quoted, although internal quotes will I<not> be escaped.
Values known to be numeric will be unquoted. Undefined (NULL) values
will be shown as C<undef> (without quotes).

If the string is flagged internally as utf8 then double quotes will
be used, otherwise single quotes are used and unprintable characters
will be replaced by dot (.).

For result strings longer than C<$maxlen> the result string will be
truncated to C<$maxlen-4> and "C<...'>" will be appended.  If C<$maxlen> is 0
or C<undef>, it defaults to C<$DBI::neat_maxlen> which, in turn, defaults to 400.

This function is designed to format values for human consumption.
It is used internally by the DBI for L</trace> output. It should
typically I<not> be used for formatting values for database use.
(See also L</quote>.)

=item C<neat_list>

  $str = neat_list(\@listref, $maxlen, $field_sep);

Calls C<neat> on each element of the list and returns a string
containing the results joined with C<$field_sep>. C<$field_sep> defaults
to C<", ">.

=item C<looks_like_number>

  @bool = looks_like_number(@array);

Returns true for each element that looks like a number.
Returns false for each element that does not look like a number.
Returns C<undef> for each element that is undefined or empty.

=item C<hash>

  $hash_value = DBI::hash($buffer, $type);

Return a 32-bit integer 'hash' value corresponding to the contents of $buffer.
The $type parameter selects which kind of hash algorithm should be used.

For the technically curious, type 0 (which is the default if $type
isn't specified) is based on the Perl 5.1 hash except that the value
is forced to be negative (for obscure historical reasons).
Type 1 is the better "Fowler / Noll / Vo" (FNV) hash. See
L<http://www.isthe.com/chongo/tech/comp/fnv/> for more information.
Both types are implemented in C and are very fast.

This function doesn't have much to do with databases, except that
it can be handy to store hash values in a database.

=back


=head2 DBI Dynamic Attributes

Dynamic attributes are always associated with the I<last handle used>
(that handle is represented by C<$h> in the descriptions below).

Where an attribute is equivalent to a method call, then refer to
the method call for all related documentation.

Warning: these attributes are provided as a convenience but they
do have limitations. Specifically, they have a short lifespan:
because they are associated with
the last handle used, they should only be used I<immediately> after

( run in 1.109 second using v1.01-cache-2.11-cpan-bbb979687b5 )