view release on metacpan or search on metacpan
ext/xxHash/CHANGELOG view on Meta::CPAN
- perf: added AVX512 support, by @gzm55
- api : new: secret generator `XXH_generateSecret()`, suggested by @koraa
- api : fix: XXH3_state_t is movable, identified by @koraa
- api : fix: state is correctly aligned in AVX mode (unlike `malloc()`), by @easyaspi314
- api : fix: streaming generated wrong values in some combination of random ingestion lengths, reported by @WayneD
- cli : fix unicode print on Windows, by @easyaspi314
- cli : can `-c` check file generated by sfv
- build: `make DISPATCH=1` generates `xxhsum` and `libxxhash` with runtime vector detection (x86/x64 only)
- install: cygwin installation support
- doc : Cryptol specification of XXH32 and XXH64, by @weaversa
view all matches for this distribution
view release on metacpan or search on metacpan
parser_free|5.009005||Viu
parser_free_nexttoke_ops|5.017006||Viu
parse_stmtseq|5.013006|5.013006|x
parse_subsignature|5.031003|5.031003|x
parse_termexpr|5.013008|5.013008|x
parse_unicode_opts|5.008001||Viu
parse_uniprop_string|5.027011||Viu
PATCHLEVEL|5.003007||Viu
path_is_searchable|5.019001||Vniu
Pause|5.003007||Viu
pause|5.005000||Viu
PL_tokenbuf||5.003007|ponu
PL_top_env|5.005000||Viu
PL_toptarget|5.005000||Viu
PL_TR_SPECIAL_HANDLING_UTF8|5.031006||Viu
PL_underlying_numeric_obj|5.027009||Viu
PL_unicode|5.008001||Viu
PL_unitcheckav|5.009005||Viu
PL_unitcheckav_save|5.009005||Viu
PL_unlockhook|5.007003||Viu
PL_unsafe|5.005000||Viu
PL_UpperLatin1|5.019005||Viu
#ifndef PERL_PV_PRETTY_REGPROP
# define PERL_PV_PRETTY_REGPROP PERL_PV_PRETTY_ELLIPSES|PERL_PV_PRETTY_LTGT|PERL_PV_ESCAPE_RE
#endif
/* Hint: pv_escape
* Note that unicode functionality is only backported to
* those perl versions that support it. For older perl
* versions, the implementation will fall back to bytes.
*/
#ifndef pv_escape
view all matches for this distribution
view release on metacpan or search on metacpan
pad_sv||5.009005|
pad_swipe|||
pad_tidy|||
pad_undef|||
parse_body|||
parse_unicode_opts|||
parser_dup|||
parser_free|||
path_is_absolute|||n
peep|||
pending_Slabs_to_ro|||
view all matches for this distribution
view release on metacpan or search on metacpan
pad_sv|||
pad_swipe|||
pad_tidy|||
pad_undef|||
parse_body|||
parse_unicode_opts|||
path_is_absolute|||
peep|||
pending_ident|||
perl_alloc_using|||n
perl_alloc|||n
view all matches for this distribution
view release on metacpan or search on metacpan
# Enable utf-8 encoding so we do not get Wide character in print
# warnings when reporting test failures
use open qw{:encoding(UTF-8) :std};
my $test_root = "corpus.tmp";
my $unicode_dir = "\x{30c6}\x{30b9}\x{30c8}\x{30c6}\x{3099}\x{30a3}\x{30ec}\x{30af}\x{30c8}\x{30ea}";
plan skip_all => "Skipped: $^O does not have proper utf-8 file system support"
if ($^O =~ /MSWin32|cygwin|dos|os2/);
# Create test files
mkdir $test_root
or die "Unable to create directory $test_root: $!"
unless -d $test_root;
mkdir "$test_root/$unicode_dir"
or die "Unable to create directory $test_root/$unicode_dir: $!"
unless -d "$test_root/$unicode_dir";
# Cleanup temporarily created files and directories
END {
use File::Path 2.06_06 qw(remove_tree);
remove_tree($test_root) or die "Unable to remove $test_root" if -d $test_root;
my $utf8_encoded = encode_utf8($utf8);
my $non_utf8_decoded = decode_utf8($non_utf8, FB_CROAK | LEAVE_SRC);
plan tests => 3;
like $utf8 => qr/\/$unicode_dir$/, "$test found correct dir";
is $utf8_encoded => $non_utf8, "$test encoded utf8 dir matches non-utf8";
is $utf8 => $non_utf8_decoded, "$test utf8 dir matches decoded non-utf8";
}
plan tests => 9;
use Cwd;
my $currentdir = getcwd();
# Test getcwd, cwd, fastgetcwd, fastcwd
chdir("$test_root/$unicode_dir") or die "Couldn't chdir to $test_root/$unicode_dir: $!";
for my $test (qw(getcwd cwd fastgetcwd fastcwd)) {
subtest "utf8$test" => sub {
# To keep results in
my $utf8;
my $non_utf8;
# To keep results in
my $utf8;
my $non_utf8;
{
use Cwd qw(abs_path realpath fast_abs_path fast_realpath);
$non_utf8 = (\&{$test})->("$test_root/$unicode_dir");
}
{
use Cwd::utf8 qw(abs_path realpath fast_abs_path fast_realpath);
$utf8 = (\&{$test})->("$test_root/$unicode_dir");
}
check_dirs($test, $utf8, $non_utf8);
}
}
view all matches for this distribution
view release on metacpan or search on metacpan
lib/Cyrillic.pm view on Meta::CPAN
nurse, History of Japanese EUC 22:00
http://d.hatena.ne.jp/nurse/20090308/1236517235
Mike Whitaker, Perl And Unicode
http://www.slideshare.net/Penfold/perl-and-unicode
Ricardo Signes, Perl 5.14 for Pragmatists
http://www.slideshare.net/rjbs/perl-514-8809465
Ricardo Signes, What's New in Perl? v5.10 - v5.16 #'
lib/Cyrillic.pm view on Meta::CPAN
daily dayflower, 2008-06-25 perluniadvice
http://d.hatena.ne.jp/dayflower/20080625/1214374293
Unicode issues in Perl
http://www.i-programmer.info/programming/other-languages/1973-unicode-issues-in-perl.html
Jesse Vincent, Compatibility is a virtue
http://www.nntp.perl.org/group/perl.perl5.porters/2010/05/msg159825.html
Tokyo-pm archive
view all matches for this distribution
view release on metacpan or search on metacpan
parse_fullstmt||5.013005|
parse_label||5.013007|
parse_listexpr||5.013008|
parse_stmtseq||5.013006|
parse_termexpr||5.013008|
parse_unicode_opts|||
parser_dup|||
parser_free|||
path_is_absolute|||n
peep|||
pending_Slabs_to_ro|||
#ifndef PERL_PV_PRETTY_REGPROP
# define PERL_PV_PRETTY_REGPROP PERL_PV_PRETTY_ELLIPSES|PERL_PV_PRETTY_LTGT|PERL_PV_ESCAPE_RE
#endif
/* Hint: pv_escape
* Note that unicode functionality is only backported to
* those perl versions that support it. For older perl
* versions, the implementation will fall back to bytes.
*/
#ifndef pv_escape
view all matches for this distribution
view release on metacpan or search on metacpan
t/20entities.t view on Meta::CPAN
__END__
# TODO
# Check that strings are encoded in UTF-8
my %unicode = (
"\xE4" => "\xC3\xA4"
);
foreach my $s (keys %unicode) {
$e = $class->new( content => $s );
is ( $e->json, '{"string":"'. $unicode{$s} . '"}', "UTF-8" );
}
view all matches for this distribution
view release on metacpan or search on metacpan
lib/DB/Object/SQLite.pm view on Meta::CPAN
# This may be handy, but with performance penalty. See above for details.
sqlite_allow_multiple_statements => 1,
# If you set this to true, DBD::SQLite tries to see if the bind values are number or
# not, and does not quote if they are numbers.
sqlite_see_if_its_a_number => 1,
sqlite_unicode => 1,
# Returns an unprepared part of the statement you pass to "prepare". Typically this
# contains nothing but white spaces after a semicolon.
sqlite_unprepared_statements => 0,
# If you set this to true, DBD::SQLite tries to issue a "begin immediate transaction"
# (instead of "begin transaction") when necessary.
lib/DB/Object/SQLite.pm view on Meta::CPAN
sub connect
{
my $that = shift( @_ );
my $param = $that->_connection_params2hash( @_ ) || return;
$param->{driver} = 'SQLite';
$param->{sqlite_unicode} = 1;
return( $that->SUPER::connect( $param ) );
}
# NOTE: sub constant_to_datatype is inherited
lib/DB/Object/SQLite.pm view on Meta::CPAN
sub _check_default_option
{
my $self = shift( @_ );
my $opts = $self->_get_args_as_hash( @_ );
return( $self->error( "Provided option is not a hash reference." ) ) if( !$self->_is_hash( $opts => 'strict' ) );
$opts->{sqlite_unicode} = 1 if( !CORE::exists( $opts->{sqlite_unicode} ) );
return( $opts );
}
sub _connection_options
{
lib/DB/Object/SQLite.pm view on Meta::CPAN
# Even though login, password, server, host are not used, I was hesitating, but decided to leave them as ok, and ignore them
# Or maybe should I issue an error when they are provided?
my $core = [qw( db login passwd host port driver database server opt uri debug cache_connections cache_table unknown_field )];
my @sqlite_params = grep( /^sqlite_/, keys( %$param ) );
# See DBD::SQLite for the list of valid parameters
# E.g.: sqlite_open_flags sqlite_busy_timeout sqlite_use_immediate_transaction sqlite_see_if_its_a_number sqlite_allow_multiple_statements sqlite_unprepared_statements sqlite_unicode sqlite_allow_multiple_statements sqlite_use_immediate_transacti...
push( @$core, @sqlite_params );
return( $core );
}
sub _dbi_connect
lib/DB/Object/SQLite.pm view on Meta::CPAN
=item * C<sqlite_see_if_its_a_number>
Can be changed.
=item * C<sqlite_unicode>
Can be changed.
=item * C<sqlite_unprepared_statements>
lib/DB/Object/SQLite.pm view on Meta::CPAN
=head2 connect
Same as L<DB::Object/connect>, only specific to SQLite.
It sets C<sqlite_unicode> to a true value in the connection parameters returned by L</_connection_params2hash>
See L</_connection_params2hash>
=head2 copy
lib/DB/Object/SQLite.pm view on Meta::CPAN
=head2 _check_default_option
Provided with an hash or hash reference of options and this will check it and set some default value.
The only default property this sets is C<sqlite_unicode> to true.
It returns the hash reference of options.
=head2 _connection_options
view all matches for this distribution
view release on metacpan or search on metacpan
lib/DBD/ADO.pm view on Meta::CPAN
Win32::OLE->Option( CP => Win32::OLE::CP_UTF8 );
More detailed notes can be found at
http://purl.net/stefan_ram/pub/perl_unicode_en
=head2 ADO providers
=over
view all matches for this distribution
view release on metacpan or search on metacpan
parse_fullstmt||5.013005|
parse_label||5.013007|
parse_listexpr||5.013008|
parse_stmtseq||5.013006|
parse_termexpr||5.013008|
parse_unicode_opts|||
parser_dup|||
parser_free|||
path_is_absolute|||n
peep|||
pending_Slabs_to_ro|||
#ifndef PERL_PV_PRETTY_REGPROP
# define PERL_PV_PRETTY_REGPROP PERL_PV_PRETTY_ELLIPSES|PERL_PV_PRETTY_LTGT|PERL_PV_ESCAPE_RE
#endif
/* Hint: pv_escape
* Note that unicode functionality is only backported to
* those perl versions that support it. For older perl
* versions, the implementation will fall back to bytes.
*/
#ifndef pv_escape
view all matches for this distribution
view release on metacpan or search on metacpan
t/75-utf8.t view on Meta::CPAN
$cursor->finish;
is( $row->[0], 3 );
is( $row->[1], 'VærÑà r' );
is( $row->[2], 'Tæst' );
is( $row->[3], 'â¬Ã·â', 'inline unicode blob' );
#
# Insert UTF8, binding
#
ok( $dbh->do(
t/75-utf8.t view on Meta::CPAN
$cursor->finish;
is( $row->[0], 4 );
is( $row->[1], 'VærÑà r' );
is( $row->[2], 'Tæst' );
is( $row->[3], 'â¬Ã·â', 'bound unicode blob' );
#
# Now turn off unicode support. things we fetch should not be flagged as
# unicode anymore
#
$dbh->{ib_enable_utf8} = 0;
ok( !$dbh->{ib_enable_utf8}, 'Turn off ib_enable_utf8' );
t/75-utf8.t view on Meta::CPAN
ok( $cursor->execute(4) );
$row = $cursor->fetchrow_arrayref;
$cursor->finish;
is( $row->[0], 4 );
is( $row->[1], encode_utf8('VærÑà r'), 'non-unicode varchar' );
is( $row->[2], encode_utf8('Tæst'), 'non-unicode char' );
is( $row->[3], encode_utf8('â¬Ã·â'), 'non-unicode blob' );
#
# ... and drop it.
#
ok($dbh->do("DROP TABLE $table"), "DROP TABLE '$table'");
view all matches for this distribution
view release on metacpan or search on metacpan
IngresII.pm view on Meta::CPAN
charset. For example, for polish Windows you need to encode it to
cp852 encoding.
=head1 UNICODE EXAMPLES
You want to store or retrieve unicode string from Ingres database? Like
with everything in Perl, there's more than one way to do it (TMTOWTDI).
Here are some examples:
# Example number one, it uses NVARCHAR, and assumes that II_CHARSET is set
# to UTF8
view all matches for this distribution
view release on metacpan or search on metacpan
- Prevent core dump when checking $dbh->{standard_conforming_strings}
on older servers.
[Greg Sabino Mullane]
- Skip unicode tests if server is set to 'LATIN1'
[Greg Sabino Mullane]
Version 2.10.5 (released September 16, 2008)
view all matches for this distribution
view release on metacpan or search on metacpan
my $err;
my $dbh = eval { DBI->connect(@_) };
if ( $dbh ) {
my ($current_charset, $current_collation) = $dbh->selectrow_array('SELECT @@character_set_database, @@collation_database');
my $expected_charset = $dbh->selectrow_array("SHOW CHARSET LIKE 'utf8mb4'") ? 'utf8mb4' : 'utf8';
my $expected_collation = "${expected_charset}_unicode_ci";
if ($current_charset ne $expected_charset) {
$err = "Database charset is not $expected_charset, but $current_charset";
} elsif ($current_collation ne $expected_collation) {
$err = "Database collation is not $expected_collation, but $current_collation";
}
view all matches for this distribution
view release on metacpan or search on metacpan
$sth;
}
sub type_info_all {
my ($dbh) = @_;
my $res = DBD::MaxDB::db::_isunicode($dbh);
if ($res) {
require DBD::MaxDB::TypeInfoUnicode;
return $DBD::MaxDB::TypeInfoUnicode::type_info_all;
} else {
require DBD::MaxDB::TypeInfoAscii;
=item C<maxdb_sqlmode (string)>
Gets/Sets the SQL mode of the current connection. Possible values are
C<ORACLE | INTERNAL>.
=item C<maxdb_unicode (boolean, read-only)>
Indicates whether the current connection supports unicode (true) or not (false)
=back
=head2 Statement Handle Attributes
or die "Can't execute statement $DBI::err $DBI::errstr\n";
...
=head1 UNICODE
DBD::MaxDB supports Unicode. Perl's internal unicode format is UTF-8
but MaxDB uses UCS-2. Therefor the support is limited to UTF-8 characters
that also contained in the UCS-2 standard.
=head2 Perl and Unicode
Perl began implementing Unicode with version 5.6. But if you plan to use Unicode
it is strongly recomended to use perl 5.8.2 or later. Details about using
unicode in perl can you find in the perl documentation:
perldoc perluniintro
perldoc perlunicode
=head2 MaxDB and Unicode
MaxDB supports the code attribute Unicode for the data type CHAR and is able to
display various presentation codes in Unicode format. As well as storing data in Unicode,
view all matches for this distribution
view release on metacpan or search on metacpan
#define SQL_INTERVAL_MINUTE_TO_SECOND (-92)
#endif /* ODBCVER >= 0x0300 */
/*
* SQL unicode data types
*/
#if (ODBCVER <= 0x0300)
/* These definitions are historical and obsolete */
#define SQL_UNICODE (-95)
#define SQL_UNICODE_VARCHAR (-96)
view all matches for this distribution
view release on metacpan or search on metacpan
diags => \@EXPORT_DIAGS,
taf => \@EXPORT_TAF);
sub parse_trace_flag {
my ($class, $name) = @_;
return 0x02_00_00_00 if $name eq 'odbcunicode';
return 0x04_00_00_00 if $name eq 'odbcconnection';
return DBI::parse_trace_flag($class, $name);
}
sub parse_trace_flags {
odbc_describe_parameters => undef,
odbc_SQL_ROWSET_SIZE => undef,
odbc_SQL_DRIVER_ODBC_VER => undef,
odbc_cursortype => undef,
odbc_query_timeout => undef, # sth and dbh
odbc_has_unicode => undef,
odbc_out_connect_string => undef,
odbc_version => undef,
odbc_err_handler => undef,
odbc_putdata_start => undef, # sth and dbh
odbc_column_display_size => undef, # sth and dbh
This documentation refers to DBD::ODBC version 1.61.
=head1 WARNING
This version of DBD::ODBC contains a significant fix to unicode when
inserting into CHAR/VARCHAR columns and it is a change in behaviour
from 1.45. The change B<only> applies to unicode builds of DBD::ODBC
(the default on Windows but you can build it for unicode on unix too)
and char/varchar columns and not nchar/nvarchar columns.
Prior to this release of DBD::ODBC when you are using the unicode
build of DBD::ODBC and inserted data into a CHAR/VARCHAR columns using
parameters DBD::ODBC did this:
1 if you set odbc_describe_parameters to 0, (thus preventing DBD::ODBC
from calling SQLDescribeParam) parameters for CHAR/VARCHAR columns
type. This usually returns SQL_CHAR or SQL_VARCHAR for CHAR/VARCHAR
columns unsurprisingly. The parameter was then bound as SQL_VARCHAR.
Items 1 to 4 still apply. 5 now has a different behaviour. In this
release, DBD::ODBC now looks at your bound data first before using the
type returned by SQLDescribeParam. If you data looks like unicode
(i.e., SvUTF8() is true) it now binds the parameter as SQL_WVARCHAR.
What might this might mean to you?
If you had Perl scalars that were bound to CHAR/VARCHAR columns in an
insert/update/delete and those scalars contained unicode, DBD::ODBC
would actually pass the individual octets in your scalar not
characters. For instance, if you had the Perl scalar "\x{20ac}" (the
Euro unicode character) and you bound it to a CHAR/VARCHAR, DBD::ODBC
would pass 0xe2, 0x82, 0xc2 as separate characters because those bytes
were Perl's UTF-8 encoding of a euro. These would probably be
interpreted by your database engine as 3 characters in its current
codepage. If you queried your database to find the length of the data
inserted you'd probably get back 3, not 1.
statement, it would bind the column as SQL_WCHAR and you'd get back 3
characters with the utf8 flag on (what those characters were depends
on how your database or driver translates code page characters to wide
characters).
What should happen now is that if your bound parameters are unicode,
DBD::ODBC will bind them as wide characters (unicode) and your driver
or database will attempt to convert them into the code page it is
using. This means so long as your database can store the data you are
inserting, when you read it back you should get what you inserted.
=head1 SYNOPSIS
Older versions of DBD::ODBC assumed that the parameter binding type
was 12 (C<SQL_VARCHAR>). Newer versions always attempt to call
C<SQLDescribeParam> to find the parameter types but if
C<SQLDescribeParam> is unavailable DBD::ODBC falls back to a default
bind type. The internal default bind type is C<SQL_VARCHAR> (for
non-unicode build) and C<SQL_WVARCHAR> or C<SQL_VARCHAR> (for a
unicode build depending on whether the parameter is unicode or
not). If you set C<odbc_default_bind_type> to a value other than 0 you
override the internal default.
B<N.B> If you call the C<bind_param> method with a SQL type this
overrides everything else above.
describe the parameters accurately (MS SQL Server sometimes does this
with some SQL like I<select myfunc(?) where 1 = 1>). Setting
C<odbc_force_bind_type> to C<SQL_VARCHAR> will force DBD::ODBC to bind
all the parameters as C<SQL_VARCHAR> and ignore SQLDescribeParam.
Bear in mind that if you are inserting unicode data you probably want
to use C<SQL_WVARCHAR>/C<SQL_WCHAR>/C<SQL_WLONGVARCHAR> and not
C<SQL_VARCHAR>.
As this attribute was created to work around buggy ODBC Drivers which
support SQLDescribeParam but describe the parameters incorrectly you
Set this flag to treat all strings returned from the ODBC driver
(except columns described as SQL_BINARY or SQL_TIMESTAMP and its
variations) as UTF-8 encoded. Some ODBC drivers (like Aster and maybe
PostgreSQL) return UTF-8 encoded data but do not support the SQLxxxW
unicode API. Enabling this flag will cause DBD::ODBC to treat driver
returned data as UTF-8 encoded and it will be marked as such in Perl.
Do not confuse this with DBD::ODBC's unicode support. The
C<odbc_utf8_on> attribute only applies to non-unicode enabled builds
of DBD::ODBC.
=head3 odbc_describe_parameters
Defaults to on. When set this allows DBD::ODBC to call SQLDescribeParam
(if the driver supports it) to retrieve information about any
parameters.
When off/false DBD::ODBC will not call SQLDescribeParam and defaults
to binding parameters as SQL_CHAR/SQL_WCHAR depending on the build
type and whether your data is unicode or not.
You do not have to disable odbc_describe_parameters just because your
driver does not support SQLDescribeParam as DBD::ODBC will work this
out at the start via SQLGetFunctions.
and
$dbh->{odbc_exec_direct} = 1;
B<NOTE:> Even if you build DBD::ODBC with unicode support you can
still not pass unicode strings to the prepare method if you also set
odbc_exec_direct. This is a restriction in this attribute which is
unavoidable.
=head3 odbc_SQL_DRIVER_ODBC_VER
See F<t/20SqlServer.t> for an example.
In versions of SQL Server 2005 and later see "Multiple Active Statements (MAS)" in the DBD::ODBC::FAQ instead of using this attribute.
=head3 odbc_has_unicode
A read-only attribute signifying whether DBD::ODBC was built with the
C macro WITH_UNICODE or not. A value of 1 indicates DBD::ODBC was built
with WITH_UNICODE else the value returned is 0.
Building WITH_UNICODE affects columns and parameters which are
SQL_C_WCHAR, SQL_WCHAR, SQL_WVARCHAR, and SQL_WLONGVARCHAR, SQL,
the connect method and a lot more. See L</Unicode>.
When odbc_has_unicode is 1, DBD::ODBC will:
=over
=item bind all string columns as wide characters (SQL_Wxxx)
This means that UNICODE data stored in these columns will be returned
to Perl correctly as unicode (i.e., encoded in UTF-8 and the UTF-8 flag set).
=item bind parameters the database declares as wide characters or unicode parameters as SQL_Wxxx
Parameters bound where the database declares the parameter as being a
wide character, or where the parameter data is unicode, or where the
parameter type is explicitly set to a wide type (e.g., SQL_Wxxx) are bound
as wide characters in the ODBC API and DBD::ODBC encodes the perl parameters
as UTF-16 before passing them to the driver.
=item SQL
=back
NOTE: You will need at least Perl 5.8.1 to use UNICODE with DBD::ODBC.
NOTE: Binding of unicode output parameters is coded but untested.
NOTE: When building DBD::ODBC on Windows ($^O eq 'MSWin32') the
WITH_UNICODE macro is automatically added. To disable specify -nou as
an argument to Makefile.PL (e.g. C<perl Makefile.PL -nou>). On non-Windows
platforms the WITH_UNICODE macro is B<not> enabled by default and to enable
export DBD_ODBC_UNICODE=1
cpanm DBD::ODBC
UNICODE support in ODBC Drivers differs considerably. Please read the
README.unicode file for further details.
=head3 odbc_out_connect_string
After calling the connect method this will be the ODBC driver's
out connection string - see documentation on SQLDriverConnect.
The type the lob is retrieved as may be overridden in C<%attr> using
C<TYPE =E<gt> sql_type>. C<%attr> is optional and if omitted defaults
to SQL_C_BINARY for binary columns and SQL_C_CHAR/SQL_C_WCHAR for
other column types depending on whether DBD::ODBC is built with
unicode support. C<$chrs_or_bytes_read> will by the bytes read when
the column types SQL_C_CHAR or SQL_C_BINARY are used and characters
read if the column type is SQL_C_WCHAR.
When built with unicode support C<$length> specifies the amount of
buffer space to be used when retrieving the lob data but as it is
returned as SQLWCHAR characters this means you at most retrieve
C<$length/2> characters. When those retrieved characters are encoded
in UTF-8 for Perl, the C<$lob> scalar may need to be larger than
C<$length> so DBD::ODBC grows it appropriately.
DBD::ODBC is 'SQL' which DBD::ODBC supports by outputting the SQL
strings (after modification) passed to the prepare and do methods.
From DBI 1.617 DBI also defines ENC (encoding), CON (connection) TXN
(transaction) and DBD (DBD only) trace flags. DBI's ENC and CON trace
flags are synonymous with DBD::ODBC's odbcunicode and odbcconnection
trace flags though I may remove the DBD::ODBC ones in the
future. DBI's DBD trace flag allows output of only DBD::ODBC trace
messages without DBI's trace messages.
Currently DBD::ODBC supports two private trace flags. The
'odbcunicode' flag traces some unicode operations and the
odbcconnection traces the connect process.
To enable tracing of particular flags you use:
$h->trace($h->parse_trace_flags('SQL|odbcconnection'));
$h->trace($h->parse_trace_flags('1|odbcunicode'));
In the first case 'SQL' and 'odbcconnection' tracing is enabled on
$h. In the second case trace level 1 is set and 'odbcunicode' tracing
is enabled.
If you want to enable a DBD::ODBC private trace flag before connecting
you need to do something like:
DBI->trace(DBD::ODBC->parse_trace_flag('odbcconnection'));
or
use DBD::ODBC;
DBI->trace(DBD::ODBC->parse_trace_flags('odbcconnection|odbcunicode'));
or
DBI_TRACE=odbcconnection|odbcunicode perl myscript.pl
From DBI 1.617 you can output only DBD::ODBC trace messages using
DBI_TRACE=DBD perl myscript.pl
unixODBC will happily recognise ODBC drivers which only have the ANSI
versions of the ODBC API and those that have the wide versions
too.
unixODBC will allow an ANSI application to work with a unicode
ODBC driver and vice versa (although in the latter case you obviously
cannot actually use unicode).
unixODBC does not prevent you sending UTF-8 in the ANSI versions of
the ODBC APIs but whether that is understood by your ODBC driver is
another matter.
unixODBC differs in only one way from the Microsoft ODBC driver in
terms of unicode support in that it avoids unnecessary translations
between single byte and double byte characters when an ANSI
application is using a unicode-aware ODBC driver by requiring unicode
applications to signal their intent by calling SQLDriverConnectW
first. On Windows, the ODBC driver manager always uses the wide
versions of the ODBC API in ODBC drivers which provide the wide
versions regardless of what the application really needs and this
results in a lot of unnecessary character translations when you have
an ANSI application and a unicode ODBC driver.
=item iODBC
The wide character versions expect and return wchar_t types.
=back
DBD::ODBC has gone with unixODBC so you cannot use iODBC with a
unicode build of DBD::ODBC. However, some ODBC drivers support UTF-8
(although how they do this with SQLGetData reliably I don't know)
and so you should be able to use those with DBD::ODBC not built for
unicode.
=head3 Enabling and Disabling Unicode support
On Windows Unicode support is enabled by default and to disable it
you will need to specify C<-nou> to F<Makefile.PL> to get back to the
perl Makefile.PL -u
=head3 Unicode - What is supported?
As of version 1.17 DBD::ODBC has the following unicode support:
=over
=item SQL (introduced in 1.16_2)
Unicode strings in calls to the C<prepare> and C<do> methods are
supported so long as the C<odbc_execdirect> attribute is not used.
=item unicode connection strings (introduced in 1.16_2)
Unicode connection strings are supported but you will need a DBI
post 1.607 for that.
=item column names
As of DBD::ODBC 1.32_3 meta data calls accept Unicode strings.
=back
Since version 1.16_4, the default parameter bind type is SQL_WVARCHAR
for unicode builds of DBD::ODBC. This only affects ODBC drivers which
do not support SQLDescribeParam and only then if you do not
specifically set a SQL type on the bind_param method call.
The above Unicode support has been tested with the SQL Server, Oracle
9.2+ and Postgres drivers on Windows and various Easysoft ODBC drivers
on UNIX.
=head3 Unicode - What is not supported?
You cannot use unicode parameter names e.g.,
select * from table where column = :unicode_param_name
You cannot use unicode strings in calls to prepare if you set the
odbc_execdirect attribute.
You cannot use the iODBC driver manager with DBD::ODBC built for
unicode.
=head3 Unicode - Caveats
For Unicode support on any platform in Perl you will need at least
Perl 5.8.1 - sorry but this is the way it is with Perl.
(http://www.unixodbc.org) with Unicode support and it was built with
defaults which set WCHAR as 2 bytes.
I believe that the iODBC driver manager expects wide characters to be
wchar_t types (which are usually 4 bytes) and hence DBD::ODBC will not
work iODBC when built for unicode.
The ODBC Driver must expect Unicode data specified in SQLBindParameter
and SQLBindCol to be UTF-16 in local endianness. Similarly, in calls to
SQLPrepareW, SQLDescribeColW and SQLDriverConnectW.
patches welcome.
=head3 Unicode implementation in DBD::ODBC
DBD::ODBC uses the wide character versions of the ODBC API and the
SQL_WCHAR ODBC type to support unicode in Perl.
Wide characters returned from the ODBC driver will be converted to
UTF-8 and the perl scalars will have the utf8 flag set (by using
sv_utf8_decode).
from table" with a single Unicode character above 0xFFFF may
return 2 and not 1 so you cannot use database functions on that
data like upper/lower/length etc but you can at least save the data in
your database and get it back.
When built for unicode, DBD::ODBC will always call SQLDriverConnectW
(and not SQLDriverConnect) even if a) your connection string is not
unicode b) you have not got a DBI later than 1.607, because unixODBC
requires SQLDriverConnectW to be called if you want to call other
unicode ODBC APIs later. As a result, if you build for unicode and
pass ASCII strings to the connect method they will be converted to
UTF-16 and passed to SQLDriverConnectW. This should make no real
difference to perl not using unicode connection strings.
You will need a DBI later than 1.607 to support unicode connection
strings because until post 1.607 there was no way for DBI to pass
unicode strings to the DBD.
=head3 Unicode and Oracle
You have to set the environment variables C<NLS_NCHAR=AL32UTF8> and
C<NLS_LANG=AMERICAN_AMERICA.AL32UTF8> (or any other language setting
modification except for the Oracle driver you will need to set you
NLS_LANG as mentioned above.
=head3 Unicode and other ODBC drivers
If you have a unicode-enabled ODBC driver and it works with DBD::ODBC
let me know and I will include it here.
=head2 ODBC Support in ODBC Drivers
=head3 Drivers without SQLDescribeParam
DBD::ODBC uses the C<SQLDescribeParam> API when parameters are bound
to your SQL to find the types of the parameters. If the ODBC driver
does not support C<SQLDescribeParam>, DBD::ODBC assumes the parameters
are C<SQL_VARCHAR> or C<SQL_WVARCHAR> types (depending on whether
DBD::ODBC is built for unicode or not and whether your parameter is
unicode data). In any case, if you bind a parameter and specify a SQL
type this overrides any type DBD::ODBC would choose.
For ODBC drivers which do not support C<SQLDescribeParam> the default
behavior in DBD::ODBC may not be what you want. To change the default
parameter bind type set L</odbc_default_bind_type>. If, after that you
L<http://www.easysoft.com/support/kb/kb01043.html>
Some Common Unicode Problems and Solutions using Perl DBD::ODBC and MS SQL Server
L<http://www.easysoft.com/developer/languages/perl/sql-server-unicode.html>
and a version possibly kept more up to date:
L<https://github.com/mjegh/dbd_odbc_sql_server_unicode/blob/master/common_problems.pod>
How do I use SQL Server Query Notifications from Linux and UNIX?
L<http://www.easysoft.com/support/kb/kb01069.html>
view all matches for this distribution
view release on metacpan or search on metacpan
lib/DBD/Oracle.pm view on Meta::CPAN
ora_lob_trim
ora_lob_length
ora_lob_chunk_size
ora_lob_is_init
ora_nls_parameters
ora_can_unicode
ora_can_taf
ora_db_startup
ora_db_shutdown
/;
lib/DBD/Oracle.pm view on Meta::CPAN
# return copy of params to protect against accidental editing
my %nls = %{$dbh->{ora_nls_parameters}};
return \%nls;
}
sub ora_can_unicode {
my $dbh = shift;
my $refresh = shift;
# 0 = No Unicode support.
# 1 = National character set is Unicode-based.
# 2 = Database character set is Unicode-based.
# 3 = Both character sets are Unicode-based.
return $dbh->{ora_can_unicode}
if defined $dbh->{ora_can_unicode} && !$refresh;
my $nls = $dbh->ora_nls_parameters($refresh);
$dbh->{ora_can_unicode} = 0;
$dbh->{ora_can_unicode} += 1 if $nls->{NLS_NCHAR_CHARACTERSET} =~ m/UTF/;
$dbh->{ora_can_unicode} += 2 if $nls->{NLS_CHARACTERSET} =~ m/UTF/;
return $dbh->{ora_can_unicode};
}
} # end of package DBD::Oracle::db
lib/DBD/Oracle.pm view on Meta::CPAN
It also has the effect of disabling the 'quick FETCH' of attribute values from the handles attribute cache. So all attribute values are handled by the drivers own FETCH method. This makes them slightly slower but is useful for special-purpose drivers...
=head1 ORACLE-SPECIFIC DATABASE HANDLE METHODS
=head2 B<ora_can_unicode ( [ $refresh ] )>
Returns a number indicating whether either of the database character sets
is a Unicode encoding. Calls ora_nls_parameters() and passes the optional
$refresh parameter to it.
lib/DBD/Oracle.pm view on Meta::CPAN
In this section we'll discuss "Perl and Unicode", then "Oracle and
Unicode", and finally "DBD::Oracle and Unicode".
Information about Unicode in general can be found at:
L<http://www.unicode.org/>. It is well worth reading because there are
many misconceptions about Unicode and you may be holding some of them.
=head2 Perl and Unicode
Perl began implementing Unicode with version 5.6, but the implementation
did not mature until version 5.8 and later. If you plan to use Unicode
you are I<strongly> urged to use Perl 5.8.2 or later and to I<carefully> read
the Perl documentation on Unicode:
perldoc perluniintro # in Perl 5.8 or later
perldoc perlunicode
And then read it again.
Perl's internal Unicode format is UTF-8
which corresponds to the Oracle character set called AL32UTF8.
lib/DBD/Oracle.pm view on Meta::CPAN
words characters beyond the Unicode BMP (Basic Multilingual Plane).
That's because the character set that Oracle calls "UTF8" doesn't
conform to the UTF-8 standard in its handling of surrogate characters.
Technically the encoding that Oracle calls "UTF8" is known as "CESU-8".
Here are a couple of extracts from L<http://www.unicode.org/reports/tr26/>:
CESU-8 is useful in 8-bit processing environments where binary
collation with UTF-16 is required. It is designed and recommended
for use only within products requiring this UTF-16 binary collation
equivalence. It is not intended nor recommended for open interchange.
view all matches for this distribution
view release on metacpan or search on metacpan
example/11_read_using_Locale-Maketext.pl view on Meta::CPAN
# and has 1 utf-8 po file.
Locale::Maketext::Lexicon->import({
de => [
Gettext => "$path/$table",
],
_decode => 1, # unicode mode
});
}
use Carp qw(croak);
use Tie::Sub (); # allow to write a subroutine call as fetch hash
view all matches for this distribution
view release on metacpan or search on metacpan
t/01_simple.t view on Meta::CPAN
my $dbh = create_dbh();
{
no utf8;
$dbh->do(q{INSERT INTO member (id, name) VALUES (?, ?)}, {}, 3, 'ããããã');
}
$dbh->{sqlite_unicode} = 1;
my ($name) = $dbh->selectrow_array(q{SELECT name FROM member WHERE id=3});
is($name, 'ããããã');
};
subtest 'can_ok' => sub {
my $dbh = create_dbh();
view all matches for this distribution
view release on metacpan or search on metacpan
- Prevent core dump when checking $dbh->{standard_conforming_strings}
on older servers.
[Greg Sabino Mullane]
- Skip unicode tests if server is set to 'LATIN1'
[Greg Sabino Mullane]
Version 2.10.5 (released September 16, 2008)
view all matches for this distribution
view release on metacpan or search on metacpan
- Prevent core dump when checking $dbh->{standard_conforming_strings}
on older servers.
[Greg Sabino Mullane]
- Skip unicode tests if server is set to 'LATIN1'
[Greg Sabino Mullane]
Version 2.10.5 (released September 16, 2008)
view all matches for this distribution
view release on metacpan or search on metacpan
Redbase/DataStream.pm view on Meta::CPAN
# This function return string compatible with with javas Input/OutputStream
# readUTF method
###############################################################################
sub _writeUTF($)
{
my $unicode_string = utf8(shift());
while ((my $pos = index($unicode_string, "\000")) > -1)
{
$unicode_string = substr($unicode_string, 0, $pos) . chr(192) . chr(128) . substr($unicode_string, $pos + 1);
}
return $unicode_string;
}
###############################################################################
# This method writes binary char compatible with Java
###############################################################################
Redbase/DataStream.pm view on Meta::CPAN
###############################################################################
# This method converts Java UTF-8 string into current encoding
###############################################################################
sub _readUTF($)
{
my $unicode_string = utf8(shift());
while ((my $pos = index($unicode_string, chr(192) . chr(128))) > -1)
{
$unicode_string = substr($unicode_string, 0, $pos) . chr(0) . substr($unicode_string, $pos + 2);
}
return $unicode_string->latin1();
}
###############################################################################
# This method reads binary char compatible with Java
###############################################################################
view all matches for this distribution
view release on metacpan or search on metacpan
lib/DBD/SQLcipher.pm view on Meta::CPAN
unless ($flags & (DBD::SQLcipher::OPEN_READONLY() | DBD::SQLcipher::OPEN_READWRITE())) {
$attr->{sqlite_open_flags} |= DBD::SQLcipher::OPEN_READWRITE() | DBD::SQLcipher::OPEN_CREATE();
}
}
# To avoid unicode and long file name problems on Windows,
# convert to the shortname if the file (or parent directory) exists.
if ( $^O =~ /MSWin32/ and $real ne ':memory:' and $real ne '' and $real !~ /^file:/ and !-f $real ) {
require File::Basename;
my ($file, $dir, $suffix) = File::Basename::fileparse($real);
# We are creating a new file.
lib/DBD/SQLcipher.pm view on Meta::CPAN
=item sqlite_version
Returns the version of the SQLcipher library which B<DBD::SQLcipher> is using,
e.g., "2.8.0". Can only be read.
=item sqlite_unicode
If set to a true value, B<DBD::SQLcipher> will turn the UTF-8 flag on for all
text strings coming out of the database (this feature is currently disabled
for perl < 5.8.5). For more details on the UTF-8 flag see
L<perlunicode>. The default is for the UTF-8 flag to be turned off.
Also note that due to some bizarreness in SQLcipher's type system (see
L<http://www.sqlite.org/datatype3.html>), if you want to retain
blob-style behavior for B<some> columns under C<< $dbh->{sqlite_unicode} = 1
>> (say, to store images in the database), you have to state so
explicitly using the 3-argument form of L<DBI/bind_param> when doing
updates:
use DBI qw(:sql_types);
$dbh->{sqlite_unicode} = 1;
my $sth = $dbh->prepare("INSERT INTO mytable (blobcolumn) VALUES (?)");
# Binary_data will be stored as is.
$sth->bind_param(1, $binary_data, SQL_BLOB);
Defining the column type as C<BLOB> in the DDL is B<not> sufficient.
This attribute was originally named as C<unicode>, and renamed to
C<sqlite_unicode> for integrity since version 1.26_06. Old C<unicode>
attribute is still accessible but will be deprecated in the near future.
=item sqlite_allow_multiple_statements
If you set this to true, C<do> method will process multiple
lib/DBD/SQLcipher.pm view on Meta::CPAN
SELECT * FROM foo ORDER BY name COLLATE perllocale
=head2 Unicode handling
If the attribute C<< $dbh->{sqlite_unicode} >> is set, strings coming from
the database and passed to the collation function will be properly
tagged with the utf8 flag; but this only works if the
C<sqlite_unicode> attribute is set B<before> the first call to
a perl collation sequence . The recommended way to activate unicode
is to set the parameter at connection time :
my $dbh = DBI->connect(
"dbi:SQLcipher:dbname=foo", "", "",
{
RaiseError => 1,
sqlite_unicode => 1,
}
);
=head2 Adding user-defined collations
view all matches for this distribution
view release on metacpan or search on metacpan
lib/DBD/SQLeet.pm view on Meta::CPAN
unless ($flags & (DBD::SQLeet::OPEN_READONLY() | DBD::SQLeet::OPEN_READWRITE())) {
$attr->{sqlite_open_flags} |= DBD::SQLeet::OPEN_READWRITE() | DBD::SQLeet::OPEN_CREATE();
}
}
# To avoid unicode and long file name problems on Windows,
# convert to the shortname if the file (or parent directory) exists.
if ($^O =~ /MSWin32/ and $real ne ':memory:' and $real ne '' and $real !~ /^file:/ and !-f $real) {
require File::Basename;
my ($file, $dir, $suffix) = File::Basename::fileparse($real);
# We are creating a new file.
view all matches for this distribution
view release on metacpan or search on metacpan
lib/DBD/SQLite.pm view on Meta::CPAN
=item sqlite_version
Returns the version of the SQLite library which DBD::SQLite is using,
e.g., "2.8.0". Can only be read.
=item unicode
If set to a true value, DBD::SQLite will turn the UTF-8 flag on for all text
strings coming out of the database. For more details on the UTF-8 flag see
L<perlunicode>. The default is for the UTF-8 flag to be turned off.
Also note that due to some bizareness in SQLite's type system (see
http://www.sqlite.org/datatype3.html), if you want to retain
blob-style behavior for B<some> columns under C<< $dbh->{unicode} = 1
>> (say, to store images in the database), you have to state so
explicitely using the 3-argument form of L<DBI/bind_param> when doing
updates:
use DBI qw(:sql_types);
$dbh->{unicode} = 1;
my $sth = $dbh->prepare
("INSERT INTO mytable (blobcolumn) VALUES (?)");
$sth->bind_param(1, $binary_data, SQL_BLOB); # binary_data will
# be stored as-is.
lib/DBD/SQLite.pm view on Meta::CPAN
CREATE TABLE foo(txt1 COLLATE perl,
txt2 COLLATE perllocale,
txt3 COLLATE nocase)
If the attribute C<< $dbh->{unicode} >> is set, strings coming from
the database and passed to the collation function will be properly
tagged with the utf8 flag; but this only works if the
C<unicode> attribute is set B<before> the call to
C<create_collation>. The recommended way to activate unicode
is to set the parameter at connection time :
my $dbh = DBI->connect("dbi:SQLite:dbname=foo", "", "",
{ RaiseError => 1,
unicode => 1} );
=head2 $dbh->func( $n_opcodes, $handler, 'progress_handler' )
This method registers a handler to be invoked
view all matches for this distribution
view release on metacpan or search on metacpan
lib/DBD/SQLite.pm view on Meta::CPAN
unless ($flags & (DBD::SQLite::OPEN_READONLY() | DBD::SQLite::OPEN_READWRITE())) {
$attr->{sqlite_open_flags} |= DBD::SQLite::OPEN_READWRITE() | DBD::SQLite::OPEN_CREATE();
}
}
# To avoid unicode and long file name problems on Windows,
# convert to the shortname if the file (or parent directory) exists.
if ( $^O =~ /MSWin32/ and $real ne ':memory:' and $real ne '' and $real !~ /^file:/ and !-f $real ) {
require File::Basename;
my ($file, $dir, $suffix) = File::Basename::fileparse($real);
# We are creating a new file.
lib/DBD/SQLite.pm view on Meta::CPAN
B<bad>, but it's been the default for many years, and changing that would
break existing applications.
=back
=item C<sqlite_unicode> or C<unicode> (deprecated)
If truthy, equivalent to setting C<sqlite_string_mode> to
DBD_SQLITE_STRING_MODE_UNICODE_NAIVE; if falsy, equivalent to
DBD_SQLITE_STRING_MODE_PV.
lib/DBD/SQLite.pm view on Meta::CPAN
Depending on the C<< $dbh->{sqlite_string_mode} >> value, strings coming
from the database and passed to the collation function may be decoded as
UTF-8. This only works, though, if the C<sqlite_string_mode> attribute is
set B<before> the first call to a perl collation sequence. The recommended
way to activate unicode is to set C<sqlite_string_mode> at connection time:
my $dbh = DBI->connect(
"dbi:SQLite:dbname=foo", "", "",
{
RaiseError => 1,
view all matches for this distribution
view release on metacpan or search on metacpan
- Fixed major crash bug affecting Mac OS X
- Removed test.pl from distribution
- Upgraded to sqlite 2.7.6
0.23
- Fixed unicode tests
0.22
- Merge with sqlite 2.7.4
0.21
0.15
- Upgraded to SQLite 2.4.5
0.14
- Added NoUTF8Flag option, so that returned strings don't get flagged
with SvUTF8_on() - needed when you're storing non-unicode in the database
0.13
- Upgraded to SQLite 2.4.3
- Added script to download sqlite core library when it's upgraded
view all matches for this distribution
view release on metacpan or search on metacpan
Const/Const.pm view on Meta::CPAN
# Items to export into callers namespace by default. Note: do not export
# names by default without a very good reason. Use EXPORT_OK instead.
# Do not simply export all your public functions/methods/constants.
@DBD::Solid::Const::EXPORT = ();
# I added the three unicode types. --mms
%DBD::Solid::Const::EXPORT_TAGS =
(
sql_types => [ qw(SQL_CHAR
SQL_NUMERIC
SQL_DECIMAL
view all matches for this distribution