view release on metacpan or search on metacpan
0.006 2020-03-08
- script handle BOM as Text::CSV doesn't
- script handle no pk_insert
- chop_cols not try splice past end of array
0.005 2020-02-01
- work around Text::CSV not checking Text::CSV_XS version like it should
view all matches for this distribution
view release on metacpan or search on metacpan
t/000-report-versions.t view on Meta::CPAN
return $self->_error("Did not provide a string to load");
}
# Byte order marks
# NOTE: Keeping this here to educate maintainers
# my %BOM = (
# "\357\273\277" => 'UTF-8',
# "\376\377" => 'UTF-16BE',
# "\377\376" => 'UTF-16LE',
# "\377\376\0\0" => 'UTF-32LE'
# "\0\0\376\377" => 'UTF-32BE',
# );
if ( $string =~ /^(?:\376\377|\377\376|\377\376\0\0|\0\0\376\377)/ ) {
return $self->_error("Stream has a non UTF-8 BOM");
} else {
# Strip UTF-8 bom if found, we'll just ignore it
$string =~ s/^\357\273\277//;
}
view all matches for this distribution
view release on metacpan or search on metacpan
t/000-report-versions.t view on Meta::CPAN
return $self->_error("Did not provide a string to load");
}
# Byte order marks
# NOTE: Keeping this here to educate maintainers
# my %BOM = (
# "\357\273\277" => 'UTF-8',
# "\376\377" => 'UTF-16BE',
# "\377\376" => 'UTF-16LE',
# "\377\376\0\0" => 'UTF-32LE'
# "\0\0\376\377" => 'UTF-32BE',
# );
if ( $string =~ /^(?:\376\377|\377\376|\377\376\0\0|\0\0\376\377)/ ) {
return $self->_error("Stream has a non UTF-8 BOM");
} else {
# Strip UTF-8 bom if found, we'll just ignore it
$string =~ s/^\357\273\277//;
}
view all matches for this distribution
view release on metacpan or search on metacpan
t/000-report-versions.t view on Meta::CPAN
return $self->_error("Did not provide a string to load");
}
# Byte order marks
# NOTE: Keeping this here to educate maintainers
# my %BOM = (
# "\357\273\277" => 'UTF-8',
# "\376\377" => 'UTF-16BE',
# "\377\376" => 'UTF-16LE',
# "\377\376\0\0" => 'UTF-32LE'
# "\0\0\376\377" => 'UTF-32BE',
# );
if ( $string =~ /^(?:\376\377|\377\376|\377\376\0\0|\0\0\376\377)/ ) {
return $self->_error("Stream has a non UTF-8 BOM");
} else {
# Strip UTF-8 bom if found, we'll just ignore it
$string =~ s/^\357\273\277//;
}
view all matches for this distribution
view release on metacpan or search on metacpan
t/000-report-versions.t view on Meta::CPAN
return $self->_error("Did not provide a string to load");
}
# Byte order marks
# NOTE: Keeping this here to educate maintainers
# my %BOM = (
# "\357\273\277" => 'UTF-8',
# "\376\377" => 'UTF-16BE',
# "\377\376" => 'UTF-16LE',
# "\377\376\0\0" => 'UTF-32LE'
# "\0\0\376\377" => 'UTF-32BE',
# );
if ( $string =~ /^(?:\376\377|\377\376|\377\376\0\0|\0\0\376\377)/ ) {
return $self->_error("Stream has a non UTF-8 BOM");
} else {
# Strip UTF-8 bom if found, we'll just ignore it
$string =~ s/^\357\273\277//;
}
view all matches for this distribution
view release on metacpan or search on metacpan
t/000-report-versions.t view on Meta::CPAN
return $self->_error("Did not provide a string to load");
}
# Byte order marks
# NOTE: Keeping this here to educate maintainers
# my %BOM = (
# "\357\273\277" => 'UTF-8',
# "\376\377" => 'UTF-16BE',
# "\377\376" => 'UTF-16LE',
# "\377\376\0\0" => 'UTF-32LE'
# "\0\0\376\377" => 'UTF-32BE',
# );
if ( $string =~ /^(?:\376\377|\377\376|\377\376\0\0|\0\0\376\377)/ ) {
return $self->_error("Stream has a non UTF-8 BOM");
} else {
# Strip UTF-8 bom if found, we'll just ignore it
$string =~ s/^\357\273\277//;
}
view all matches for this distribution
view release on metacpan or search on metacpan
lib/Data/Table/Text.pm view on Meta::CPAN
processSizesInParallel
quoteFile
randomizeArray
readBinaryFile readFile readFileFromRemote readFiles readGZipFile readStdIn readUtf16File
rectangularArray rectangularArray2
relFromAbsAgainstAbs reloadHashes removeBOM removeDuplicatePrefixes
removeFilePathsFromStructure removeFilePrefix removeFoldersFromDataStructure
replaceStringWithString reportAttributeSettings reportAttributes
reportExportableMethods reportReplacableMethods reportSettings retrieveFile
runInParallel runInSquareRootParallel
s3DownloadFolder s3FileExists s3ListFilesAndSizes s3ReadFile s3ReadString
view all matches for this distribution
view release on metacpan or search on metacpan
lib/Data/TableReader/Decoder/HTML.pm view on Meta::CPAN
return 1;
}
sub _build__tables {
my $self= shift;
# TODO: determine encoding from BOM, or from meta-equiv while parsing...
binmode $self->file_handle;
return $self->_parse_html_tables($self->file_handle);
}
sub _parse_html_tables {
view all matches for this distribution
view release on metacpan or search on metacpan
lib/Data/TableReader/Decoder/CSV.pm view on Meta::CPAN
return 'UTF-8' if $enc eq 'utf8';
return uc($1) if $enc =~ /encoding\(([^)]+)\)/;
return uc($enc); # could throw a parse error, but this is probably more useful behavior
}
# fh_start_pos will be set if we have already checked for BOM
if ($self->autodetect_encoding && !defined $self->_fh_start_pos) {
$self->_fh_start_pos(tell $fh or 0);
if (($enc= $self->_autodetect_bom($fh))) {
binmode($fh, ":encoding($enc)");
# re-mark the start after the BOM
$self->_fh_start_pos(tell $fh or 0);
return $enc;
}
}
return '';
lib/Data/TableReader/Decoder/CSV.pm view on Meta::CPAN
my $fh= $self->file_handle;
my $row_ref= $self->_row_ref;
# Keeping this object is just an indication of whether an iterator has been used yet
if (!$row_ref) {
$self->_row_ref($row_ref= \(my $row= 0));
# trigger BOM detection if needed
my $enc= $self->encoding;
$self->_log->('debug', "encoding is ".($enc||'maybe utf8'));
# ensure _fh_start_pos is set
$self->_fh_start_pos(tell $fh or 0);
}
lib/Data/TableReader/Decoder/CSV.pm view on Meta::CPAN
);
$self->_iterator($i);
return $i;
}
# This design is simplified from File::BOM in that it ignores UTF-32
# and in any "normal" case it can read from a pipe with only one
# character to push back, avoiding the need to tie the file handle.
# It also checks for whether layers have already been enabled.
# It also avoids seeking to the start of the file handle, in case
# the user deliberately seeked to a position.
lib/Data/TableReader/Decoder/CSV.pm view on Meta::CPAN
return 'UTF-8';
}
}
}
# It wasn't a BOM. Try to undo our read.
$self->_log->('debug', 'No BOM in stream, seeking back to start');
if (length $buf == 1) {
$fh->ungetc(ord $buf);
} elsif (!seek($fh, $fpos, 0)) {
# Can't seek
if ($fh->can('ungets')) { # support for FileHandle::Unget
$fh->ungets($buf);
} else {
croak "Can't seek input handle after BOM detection; You should set an encoding manually, buffer the entire input, or use FileHandle::Unget";
}
}
return;
}
lib/Data/TableReader/Decoder/CSV.pm view on Meta::CPAN
If the stream has a PerlIO encoding() on it, no additional decoding is done.
=item *
If the stream has a BOM (byte-order mark) for UTF-8 or UTF-16, it adds that
encoding with C<binmode>.
=item *
Else, it lets the parser decide. The default Text::CSV parser will
view all matches for this distribution
view release on metacpan or search on metacpan
t/000-report-versions.t view on Meta::CPAN
return $self->_error("Did not provide a string to load");
}
# Byte order marks
# NOTE: Keeping this here to educate maintainers
# my %BOM = (
# "\357\273\277" => 'UTF-8',
# "\376\377" => 'UTF-16BE',
# "\377\376" => 'UTF-16LE',
# "\377\376\0\0" => 'UTF-32LE'
# "\0\0\376\377" => 'UTF-32BE',
# );
if ( $string =~ /^(?:\376\377|\377\376|\377\376\0\0|\0\0\376\377)/ ) {
return $self->_error("Stream has a non UTF-8 BOM");
} else {
# Strip UTF-8 bom if found, we'll just ignore it
$string =~ s/^\357\273\277//;
}
view all matches for this distribution
view release on metacpan or search on metacpan
BOL_t8_p8|5.033003||Viu
BOL_t8_pb|5.033003||Viu
BOL_tb|5.035004||Viu
BOL_tb_p8|5.033003||Viu
BOL_tb_pb|5.033003||Viu
BOM_UTF8|5.025005|5.003007|p
BOM_UTF8_FIRST_BYTE|5.019004||Viu
BOM_UTF8_TAIL|5.019004||Viu
boolSV|5.004000|5.003007|p
boot_core_builtin|5.035007||Viu
boot_core_mro|5.009005||Viu
boot_core_PerlIO|5.007002||Viu
boot_core_UNIVERSAL|5.003007||Viu
#endif
#endif
#if 'A' == 65
#ifndef BOM_UTF8
# define BOM_UTF8 "\xEF\xBB\xBF"
#endif
#ifndef REPLACEMENT_CHARACTER_UTF8
# define REPLACEMENT_CHARACTER_UTF8 "\xEF\xBF\xBD"
#endif
#elif '^' == 95
#ifndef BOM_UTF8
# define BOM_UTF8 "\xDD\x73\x66\x73"
#endif
#ifndef REPLACEMENT_CHARACTER_UTF8
# define REPLACEMENT_CHARACTER_UTF8 "\xDD\x73\x73\x71"
#endif
#elif '^' == 176
#ifndef BOM_UTF8
# define BOM_UTF8 "\xDD\x72\x65\x72"
#endif
#ifndef REPLACEMENT_CHARACTER_UTF8
# define REPLACEMENT_CHARACTER_UTF8 "\xDD\x72\x72\x70"
#endif
view all matches for this distribution
view release on metacpan or search on metacpan
---
{WABlGg=>{WsxTgoCmJtWj=>79121,FWbXcNs=>'zY',HAZOai__Oeuny=>['uLkhvnHL'],yrOEvRUhB=>'leTRGn_ljQd',TMQr_DaY=>'Ex-Gn',ORpx=>'eYFA',leIuEzyBOpzvIJ=>0,OEXaHoI=>'',MJQeEJFLQxA=>0,_RuafJI=>'0.5'},LumhOl=>{JuoIjb=>'*/*',opsmlZOiaOUc=>0,JTQ=>'1',pHlNZJCtCAR=>...
---
{WABlGg=>{MoXkWVwtWtBwkq=>'ubUUbR;kdvxPt;bBcvCG;qncsPtdjUV;bIvhPoNcBr;NHVRfS'},LumhOl=>{JuoIjb=>'*/*',EfrbvjmcgPegFyg=>'eVyK, RBriKYc',LTDrnMNCqjHMKxM=>'Ex-or',WbGVflxHyB=>88,opsmlZOiaOUc=>2,HYpBCoBNdaqv=>'XINP/WQe',JTQ=>'1',pHlNZJCtCAR=>1,qpjfZFh=>'...
---
{WABlGg=>{MoXkWVwtWtBwkq=>'_aac;kdvxPt;bBcvCG;NHVRfS;ubUUbR;ugA;dKuoNX'},LumhOl=>{JuoIjb=>'XINP/WQe,*/*;x=0.0',EfrbvjmcgPegFyg=>'eVyK, RBriKYc',LTDrnMNCqjHMKxM=>'Ex-or,Ex;x=0.1',vjOR_XPbT=>'ifIYLG',WbGVflxHyB=>256,opsmlZOiaOUc=>9,HYpBCoBNdaqv=>'XINP/...
---
{WABlGg=>{WsxTgoCmJtWj=>79121,uJBWfGbhQli=>597,MoXkWVwtWtBwkq=>'y;_aac;ugA;dKuoNX;kdvxPt;NHVRfS;ubUUbR;bBcvCG',ShH=>{WABlGg=>'eQKFL'},mEJfnAhcLEvXOonkXyHwD=>597,HAZOai__Oeuny=>['uLkhvnHL','MpouB_KbUxvwcEubzzYLHgC'],ruZkzMnxAI=>0,yrOEvRUhB=>'XXKnDFeKb...
---
{WABlGg=>{MoXkWVwtWtBwkq=>'ugA;dKuoNX;kdvxPt;NHVRfS;ubUUbR;bBcvCG;qYxCdCMESXtRY;YG;_aac',TRjxvaPmnUCCiBTJqV=>{rtiJSiR=>'349-1-5',mflaxtxp=>'349-1-1',GboTnhYH=>'72668',TMQr_DaY=>'DT'},ORpx=>'TRjxvaPmnUCCiBTJqV',rMBPIcYzSQM=>'46684332Lve5'},gQ=>{svnVKH...
---
---
{WABlGg=>{EVWCeOuOkCRHK=>0,MoXkWVwtWtBwkq=>'_aac;y',hLFbBmKcaW=>{'270'=>[0,123637068,0],'3634'=>[0,123637068,0],'4996'=>[0,123637068,0],'959'=>[0,123637068,0],'1960'=>[0,123637068,0],'7507'=>[0,123637068,0],'1208'=>[0,123637068,0],'483'=>[0,123637068...
---
{WABlGg=>{MoXkWVwtWtBwkq=>'_aac;kdvxPt;NHVRfS;ubUUbR;bBcvCG;ugA;dKuoNX;YG',TRjxvaPmnUCCiBTJqV=>{rtiJSiR=>'349-3-1',mflaxtxp=>'349-3-8',GboTnhYH=>'110414280',TMQr_DaY=>'Ex-Gn'},ORpx=>'TRjxvaPmnUCCiBTJqV',rMBPIcYzSQM=>'30007Xm1o24437d'},gQ=>{svnVKHykek...
---
{WABlGg=>{WsxTgoCmJtWj=>195241,uJBWfGbhQli=>1024,EVWCeOuOkCRHK=>0,MoXkWVwtWtBwkq=>'kdvxPt;bBcvCG;_aac',FWbXcNs=>'CF',fnItfpriiXyKjY=>['3563','1365','4977','4996','172','483','1415','1392','7507','203','1208','3649398','931'],hLFbBmKcaW=>{'270'=>['0',...
---
{WABlGg=>{WsxTgoCmJtWj=>79121,freSfeVzWWZa=>{'6565629K3169930K0K58857'=>['39.112536',0],'6565629K3169930K0K85257'=>['8.919827',0],'13590196K3169930K0K10057'=>['39.112536',0],'13590196K3169930K0K93957'=>['8.919827',0],'5233038K3169930K0K58057'=>['39.1...
---
{WABlGg=>{MoXkWVwtWtBwkq=>'_aac;kdvxPt;bBcvCG;ksrFbf',DZQUXUpvxPbU=>0,ORpx=>'LEAt'},LumhOl=>{JuoIjb=>'*/*',EfrbvjmcgPegFyg=>'eVyK, RBriKYc',LTDrnMNCqjHMKxM=>'aT',WbGVflxHyB=>13,opsmlZOiaOUc=>3,HYpBCoBNdaqv=>'KqnhC/uUU',JTQ=>'1',pHlNZJCtCAR=>1,qpjfZFh...
---
view all matches for this distribution
view release on metacpan or search on metacpan
share/sanctions.yml view on Meta::CPAN
residence:
- bi
- dob_epoch:
- 63072000
names:
- Ignace SIBOMANA
place_of_birth:
- bi
- citizen:
- co
- mx
view all matches for this distribution
view release on metacpan or search on metacpan
dev-bin/convert-from-csv.pl view on Meta::CPAN
my $csv = Text::CSV->new ({ binary => 1 });
open my $fh, '<:utf8', $ARGV[0] or die $!;
$_ = <$fh>; # throw away the header line with BOM.
while ( my $row = $csv->getline($fh) ) {
# 西å
æ¥æ,ææ,æ¯å¦æ¾å,å註
my ($date, $weekday, $is_holiday,$description) = @$row;
my ($year, $month, $day) = $date =~ m{^(....)(..)(..)$};
view all matches for this distribution
view release on metacpan or search on metacpan
t/000-report-versions.t view on Meta::CPAN
return $self->_error( "Did not provide a string to load" );
}
# Byte order marks
# NOTE: Keeping this here to educate maintainers
# my %BOM = (
# "\357\273\277" => 'UTF-8',
# "\376\377" => 'UTF-16BE',
# "\377\376" => 'UTF-16LE',
# "\377\376\0\0" => 'UTF-32LE'
# "\0\0\376\377" => 'UTF-32BE',
# );
if ( $string =~ /^(?:\376\377|\377\376|\377\376\0\0|\0\0\376\377)/ ) {
return $self->_error( "Stream has a non UTF-8 BOM" );
} else {
# Strip UTF-8 bom if found, we'll just ignore it
$string =~ s/^\357\273\277//;
}
view all matches for this distribution
view release on metacpan or search on metacpan
BOL_t8_p8|5.033003||Viu
BOL_t8_pb|5.033003||Viu
BOL_tb|5.035004||Viu
BOL_tb_p8|5.033003||Viu
BOL_tb_pb|5.033003||Viu
BOM_UTF8|5.025005|5.003007|p
BOM_UTF8_FIRST_BYTE|5.019004||Viu
BOM_UTF8_TAIL|5.019004||Viu
boolSV|5.004000|5.003007|p
boot_core_builtin|5.035007||Viu
boot_core_mro|5.009005||Viu
boot_core_PerlIO|5.007002||Viu
boot_core_UNIVERSAL|5.003007||Viu
#endif
#endif
#if 'A' == 65
#ifndef BOM_UTF8
# define BOM_UTF8 "\xEF\xBB\xBF"
#endif
#ifndef REPLACEMENT_CHARACTER_UTF8
# define REPLACEMENT_CHARACTER_UTF8 "\xEF\xBF\xBD"
#endif
#elif '^' == 95
#ifndef BOM_UTF8
# define BOM_UTF8 "\xDD\x73\x66\x73"
#endif
#ifndef REPLACEMENT_CHARACTER_UTF8
# define REPLACEMENT_CHARACTER_UTF8 "\xDD\x73\x73\x71"
#endif
#elif '^' == 176
#ifndef BOM_UTF8
# define BOM_UTF8 "\xDD\x72\x65\x72"
#endif
#ifndef REPLACEMENT_CHARACTER_UTF8
# define REPLACEMENT_CHARACTER_UTF8 "\xDD\x72\x72\x70"
#endif
view all matches for this distribution
view release on metacpan or search on metacpan
t/000-report-versions.t view on Meta::CPAN
return $self->_error( "Did not provide a string to load" );
}
# Byte order marks
# NOTE: Keeping this here to educate maintainers
# my %BOM = (
# "\357\273\277" => 'UTF-8',
# "\376\377" => 'UTF-16BE',
# "\377\376" => 'UTF-16LE',
# "\377\376\0\0" => 'UTF-32LE'
# "\0\0\376\377" => 'UTF-32BE',
# );
if ( $string =~ /^(?:\376\377|\377\376|\377\376\0\0|\0\0\376\377)/ ) {
return $self->_error( "Stream has a non UTF-8 BOM" );
} else {
# Strip UTF-8 bom if found, we'll just ignore it
$string =~ s/^\357\273\277//;
}
view all matches for this distribution
view release on metacpan or search on metacpan
t/000-report-versions.t view on Meta::CPAN
return $self->_error("Did not provide a string to load");
}
# Byte order marks
# NOTE: Keeping this here to educate maintainers
# my %BOM = (
# "\357\273\277" => 'UTF-8',
# "\376\377" => 'UTF-16BE',
# "\377\376" => 'UTF-16LE',
# "\377\376\0\0" => 'UTF-32LE'
# "\0\0\376\377" => 'UTF-32BE',
# );
if ( $string =~ /^(?:\376\377|\377\376|\377\376\0\0|\0\0\376\377)/ ) {
return $self->_error("Stream has a non UTF-8 BOM");
}
else {
# Strip UTF-8 bom if found, we'll just ignore it
$string =~ s/^\357\273\277//;
}
view all matches for this distribution
view release on metacpan or search on metacpan
res/typenumbers.xml view on Meta::CPAN
<type number="42" name="CREATOR" />
<type number="43" name="SKILL" />
<type number="44" name="EXPERIENCE" />
<type number="45" name="EARTHWALL" />
<type number="46" name="GOLEM" />
<!--<type number="47" name="BOMB" />-->
<type number="48" name="THROWN_OBJ" />
<type number="49" name="BLINDNESS" />
<type number="50" name="GOD" />
<type number="51" name="DETECTOR" />
<type number="52" name="TRIGGER_MARKER" />
view all matches for this distribution
view release on metacpan or search on metacpan
bind_match|5.003007||Viu
block_end|5.004000|5.004000|
block_gimme|5.004000|5.004000|u
blockhook_register|5.013003|5.013003|x
block_start|5.004000|5.004000|
BOM_UTF8|5.025005|5.003007|p
boolSV|5.004000|5.003007|p
boot_core_mro|5.009005||Viu
boot_core_PerlIO|5.007002||Viu
boot_core_UNIVERSAL|5.003007||Viu
_byte_dump_string|5.025006||cViu
#endif
#endif
#if 'A' == 65
#ifndef BOM_UTF8
# define BOM_UTF8 "\xEF\xBB\xBF"
#endif
#ifndef REPLACEMENT_CHARACTER_UTF8
# define REPLACEMENT_CHARACTER_UTF8 "\xEF\xBF\xBD"
#endif
#elif '^' == 95
#ifndef BOM_UTF8
# define BOM_UTF8 "\xDD\x73\x66\x73"
#endif
#ifndef REPLACEMENT_CHARACTER_UTF8
# define REPLACEMENT_CHARACTER_UTF8 "\xDD\x73\x73\x71"
#endif
#elif '^' == 176
#ifndef BOM_UTF8
# define BOM_UTF8 "\xDD\x72\x65\x72"
#endif
#ifndef REPLACEMENT_CHARACTER_UTF8
# define REPLACEMENT_CHARACTER_UTF8 "\xDD\x72\x72\x70"
#endif
view all matches for this distribution
view release on metacpan or search on metacpan
bind_match|5.003007||Viu
block_end|5.004000|5.004000|
block_gimme|5.004000|5.004000|u
blockhook_register|5.013003|5.013003|x
block_start|5.004000|5.004000|
BOM_UTF8|5.025005|5.003007|p
boolSV|5.004000|5.003007|p
boot_core_mro|5.009005||Viu
boot_core_PerlIO|5.007002||Viu
boot_core_UNIVERSAL|5.003007||Viu
_byte_dump_string|5.025006||cViu
#endif
#endif
#if 'A' == 65
#ifndef BOM_UTF8
# define BOM_UTF8 "\xEF\xBB\xBF"
#endif
#ifndef REPLACEMENT_CHARACTER_UTF8
# define REPLACEMENT_CHARACTER_UTF8 "\xEF\xBF\xBD"
#endif
#elif '^' == 95
#ifndef BOM_UTF8
# define BOM_UTF8 "\xDD\x73\x66\x73"
#endif
#ifndef REPLACEMENT_CHARACTER_UTF8
# define REPLACEMENT_CHARACTER_UTF8 "\xDD\x73\x73\x71"
#endif
#elif '^' == 176
#ifndef BOM_UTF8
# define BOM_UTF8 "\xDD\x72\x65\x72"
#endif
#ifndef REPLACEMENT_CHARACTER_UTF8
# define REPLACEMENT_CHARACTER_UTF8 "\xDD\x72\x72\x70"
#endif
view all matches for this distribution
view release on metacpan or search on metacpan
bin/perlbrewise-spec view on Meta::CPAN
encode_stringify, allow_bignum, allow_singlequote, sort_by
(partially), escape_slash, convert_blessed, ... optional
decode_json(, allow_nonref) arg.
relaxed implements allow_dupkeys.
- support all 5 unicode BOM's: UTF-8, UTF-16LE, UTF-16BE, UTF-32LE,
UTF-32BE, encoding internally to UTF-8.
=cut
our @ISA = qw(Exporter);
bin/perlbrewise-spec view on Meta::CPAN
This is a complex example, but most of the complexity comes from the fact
that we are trying to be correct (bear with me if I am wrong, I never ran
the above example :).
=head1 BOM
Detect all unicode B<Byte Order Marks> on decode.
Which are UTF-8, UTF-16LE, UTF-16BE, UTF-32LE and UTF-32BE.
B<Warning>: With perls older than 5.20 you need load the Encode module
before loading a multibyte BOM, i.e. >= UTF-16. Otherwise an error is
thrown. This is an implementation limitation and might get fixed later.
See L<https://tools.ietf.org/html/rfc7159#section-8.1>
I<"JSON text SHALL be encoded in UTF-8, UTF-16, or UTF-32.">
I<"Implementations MUST NOT add a byte order mark to the beginning of a
JSON text", "implementations (...) MAY ignore the presence of a byte
order mark rather than treating it as an error".>
See also L<http://www.unicode.org/faq/utf_bom.html#BOM>.
Beware that Cpanel::JSON::XS is currently the only JSON module which
does accept and decode a BOM.
=head1 MAPPING
This section describes how Cpanel::JSON::XS maps Perl values to JSON
values and vice versa. These mappings are designed to "do the right
view all matches for this distribution
view release on metacpan or search on metacpan
BOL_t8_p8|5.033003||Viu
BOL_t8_pb|5.033003||Viu
BOL_tb|5.035004||Viu
BOL_tb_p8|5.033003||Viu
BOL_tb_pb|5.033003||Viu
BOM_UTF8|5.025005|5.003007|p
BOM_UTF8_FIRST_BYTE|5.019004||Viu
BOM_UTF8_TAIL|5.019004||Viu
boolSV|5.004000|5.003007|p
boot_core_builtin|5.035007||Viu
boot_core_mro|5.009005||Viu
boot_core_PerlIO|5.007002||Viu
boot_core_UNIVERSAL|5.003007||Viu
#endif
#endif
#if 'A' == 65
#ifndef BOM_UTF8
# define BOM_UTF8 "\xEF\xBB\xBF"
#endif
#ifndef REPLACEMENT_CHARACTER_UTF8
# define REPLACEMENT_CHARACTER_UTF8 "\xEF\xBF\xBD"
#endif
#elif '^' == 95
#ifndef BOM_UTF8
# define BOM_UTF8 "\xDD\x73\x66\x73"
#endif
#ifndef REPLACEMENT_CHARACTER_UTF8
# define REPLACEMENT_CHARACTER_UTF8 "\xDD\x73\x73\x71"
#endif
#elif '^' == 176
#ifndef BOM_UTF8
# define BOM_UTF8 "\xDD\x72\x65\x72"
#endif
#ifndef REPLACEMENT_CHARACTER_UTF8
# define REPLACEMENT_CHARACTER_UTF8 "\xDD\x72\x72\x70"
#endif
view all matches for this distribution
view release on metacpan or search on metacpan
aTHXR_
av_count
AvFILLp
av_tindex
av_top_index
BOM_UTF8
boolSV
call_argv
caller_cx
call_method
call_pv
av_top_index
av_undef
av_unshift
ax
BIN
BOM_UTF8
boolSV
BYTEORDER
call_argv
call_method
call_pv
BOL_t8_p8|5.033003||Viu
BOL_t8_pb|5.033003||Viu
BOL_tb|5.035004||Viu
BOL_tb_p8|5.033003||Viu
BOL_tb_pb|5.033003||Viu
BOM_UTF8|5.025005|5.003007|p
BOM_UTF8_FIRST_BYTE|5.019004||Viu
BOM_UTF8_TAIL|5.019004||Viu
boolSV|5.004000|5.003007|p
boot_core_builtin|5.035007||Viu
boot_core_mro|5.009005||Viu
boot_core_PerlIO|5.007002||Viu
boot_core_UNIVERSAL|5.003007||Viu
#endif
#endif
#if 'A' == 65
#ifndef BOM_UTF8
# define BOM_UTF8 "\xEF\xBB\xBF"
#endif
#ifndef REPLACEMENT_CHARACTER_UTF8
# define REPLACEMENT_CHARACTER_UTF8 "\xEF\xBF\xBD"
#endif
#elif '^' == 95
#ifndef BOM_UTF8
# define BOM_UTF8 "\xDD\x73\x66\x73"
#endif
#ifndef REPLACEMENT_CHARACTER_UTF8
# define REPLACEMENT_CHARACTER_UTF8 "\xDD\x73\x73\x71"
#endif
#elif '^' == 176
#ifndef BOM_UTF8
# define BOM_UTF8 "\xDD\x72\x65\x72"
#endif
#ifndef REPLACEMENT_CHARACTER_UTF8
# define REPLACEMENT_CHARACTER_UTF8 "\xDD\x72\x72\x70"
#endif
view all matches for this distribution
view release on metacpan or search on metacpan
(index($4, 'n') >= 0 ? ( nothxarg => 1 ) : ()),
} )
: die "invalid spec: $_" } qw(
AvFILLp|5.004050||p
AvFILL|||
BOM_UTF8|||
BhkDISABLE||5.024000|
BhkENABLE||5.024000|
BhkENTRY_set||5.024000|
BhkENTRY|||
BhkFLAGS|||
view all matches for this distribution
view release on metacpan or search on metacpan
t/000-report-versions.t view on Meta::CPAN
return $self->_error("Did not provide a string to load");
}
# Byte order marks
# NOTE: Keeping this here to educate maintainers
# my %BOM = (
# "\357\273\277" => 'UTF-8',
# "\376\377" => 'UTF-16BE',
# "\377\376" => 'UTF-16LE',
# "\377\376\0\0" => 'UTF-32LE'
# "\0\0\376\377" => 'UTF-32BE',
# );
if ( $string =~ /^(?:\376\377|\377\376|\377\376\0\0|\0\0\376\377)/ ) {
return $self->_error("Stream has a non UTF-8 BOM");
} else {
# Strip UTF-8 bom if found, we'll just ignore it
$string =~ s/^\357\273\277//;
}
view all matches for this distribution
view release on metacpan or search on metacpan
t/000-report-versions.t view on Meta::CPAN
return $self->_error("Did not provide a string to load");
}
# Byte order marks
# NOTE: Keeping this here to educate maintainers
# my %BOM = (
# "\357\273\277" => 'UTF-8',
# "\376\377" => 'UTF-16BE',
# "\377\376" => 'UTF-16LE',
# "\377\376\0\0" => 'UTF-32LE'
# "\0\0\376\377" => 'UTF-32BE',
# );
if ( $string =~ /^(?:\376\377|\377\376|\377\376\0\0|\0\0\376\377)/ ) {
return $self->_error("Stream has a non UTF-8 BOM");
} else {
# Strip UTF-8 bom if found, we'll just ignore it
$string =~ s/^\357\273\277//;
}
view all matches for this distribution
view release on metacpan or search on metacpan
lib/Devel/ppport.h view on Meta::CPAN
BOL_t8_p8|5.033003||Viu
BOL_t8_pb|5.033003||Viu
BOL_tb|5.035004||Viu
BOL_tb_p8|5.033003||Viu
BOL_tb_pb|5.033003||Viu
BOM_UTF8|5.025005|5.003007|p
BOM_UTF8_FIRST_BYTE|5.019004||Viu
BOM_UTF8_TAIL|5.019004||Viu
boolSV|5.004000|5.003007|p
boot_core_builtin|5.035007||Viu
boot_core_mro|5.009005||Viu
boot_core_PerlIO|5.007002||Viu
boot_core_UNIVERSAL|5.003007||Viu
lib/Devel/ppport.h view on Meta::CPAN
#endif
#endif
#if 'A' == 65
#ifndef BOM_UTF8
# define BOM_UTF8 "\xEF\xBB\xBF"
#endif
#ifndef REPLACEMENT_CHARACTER_UTF8
# define REPLACEMENT_CHARACTER_UTF8 "\xEF\xBF\xBD"
#endif
#elif '^' == 95
#ifndef BOM_UTF8
# define BOM_UTF8 "\xDD\x73\x66\x73"
#endif
#ifndef REPLACEMENT_CHARACTER_UTF8
# define REPLACEMENT_CHARACTER_UTF8 "\xDD\x73\x73\x71"
#endif
#elif '^' == 176
#ifndef BOM_UTF8
# define BOM_UTF8 "\xDD\x72\x65\x72"
#endif
#ifndef REPLACEMENT_CHARACTER_UTF8
# define REPLACEMENT_CHARACTER_UTF8 "\xDD\x72\x72\x70"
#endif
view all matches for this distribution
view release on metacpan or search on metacpan
bind_match|5.003007||Viu
block_end|5.004000|5.004000|
block_gimme|5.004000|5.004000|u
blockhook_register|5.013003|5.013003|x
block_start|5.004000|5.004000|
BOM_UTF8|5.025005|5.003007|p
boolSV|5.004000|5.003007|p
boot_core_mro|5.009005||Viu
boot_core_PerlIO|5.007002||Viu
boot_core_UNIVERSAL|5.003007||Viu
_byte_dump_string|5.025006||cViu
#endif
#endif
#if 'A' == 65
#ifndef BOM_UTF8
# define BOM_UTF8 "\xEF\xBB\xBF"
#endif
#ifndef REPLACEMENT_CHARACTER_UTF8
# define REPLACEMENT_CHARACTER_UTF8 "\xEF\xBF\xBD"
#endif
#elif '^' == 95
#ifndef BOM_UTF8
# define BOM_UTF8 "\xDD\x73\x66\x73"
#endif
#ifndef REPLACEMENT_CHARACTER_UTF8
# define REPLACEMENT_CHARACTER_UTF8 "\xDD\x73\x73\x71"
#endif
#elif '^' == 176
#ifndef BOM_UTF8
# define BOM_UTF8 "\xDD\x72\x65\x72"
#endif
#ifndef REPLACEMENT_CHARACTER_UTF8
# define REPLACEMENT_CHARACTER_UTF8 "\xDD\x72\x72\x70"
#endif
view all matches for this distribution
view release on metacpan or search on metacpan
(index($4, 'n') >= 0 ? ( nothxarg => 1 ) : ()),
} )
: die "invalid spec: $_" } qw(
AvFILLp|5.004050||p
AvFILL|||
BOM_UTF8|||
BhkDISABLE||5.024000|
BhkENABLE||5.024000|
BhkENTRY_set||5.024000|
BhkENTRY|||
BhkFLAGS|||
view all matches for this distribution