view release on metacpan or search on metacpan
lib/Data/Sofu.pm view on Meta::CPAN
$$self{CURRENT}=0;
$$self{References}=[];
$self->{Commentary}={};
%{$$self{Ref}}=();
my $guess=0;
unless (ref $file) {
$$self{CurFile}=$file;
open $fh,"<:raw",$$self{CurFile} or die "Sofu error open: $$self{CurFile} file: $!";
$guess=1;
binmode $fh;
#eval {require File::BOM;my ($e,$sp)=File::BOM::defuse($fh);$$self{Ret}.=$sp;$e=$e;};undef $@;
}
elsif (ref $file eq "SCALAR") {
$$self{CurFile}="Scalarref";
open $fh,"<:utf8",$file or die "Can't open perlIO: $!" if utf8::is_utf8($$file);
open $fh,"<",$file or die "Can't open perlIO: $!" if !utf8::is_utf8($$file);;
}
elsif (ref $file eq "GLOB") {
$$self{CurFile}="FileHandle";
$fh=$file;
}
lib/Data/Sofu.pm view on Meta::CPAN
}
return $tree;
}
}
if ($guess) {
my $enc=guess_encoding($text);
$text=$enc->decode($text) if ref $enc;
$text=Encode::decode("UTF-8",$text) unless ref $enc;
}
substr($text,0,1,"") if substr($text,0,1) eq chr(65279); # UTF-8 BOM (Why ain't it removed ?)
close $fh if ref $file;
$$self{CurFile}="";
my $u=$self->unpack($text);
$self->{OBJECT}=0;
return $u;
}
sub noComments {
my $self=shift;
$$self{PreserveCommentary}=0;
lib/Data/Sofu.pm view on Meta::CPAN
$$self{CURRENT}=0;
$$self{References}=[];
$self->{Commentary}={};
%{$$self{Ref}}=();
my $guess=0;
unless (ref $file) {
$$self{CurFile}=$file;
open $fh,$$self{CurFile} or die "Sofu error open: $$self{CurFile} file: $!";
$guess=1;
binmode $fh;
#eval {require File::BOM;my ($e,$sp)=File::BOM::defuse($fh);$$self{Ret}.=$sp;$e=$e;};undef $@;
}
elsif (ref $file eq "SCALAR") {
$$self{CurFile}="Scalarref";
open $fh,"<:utf8",$file or die "Can't open perlIO: $!" if utf8::is_utf8($$file);
open $fh,"<",$file or die "Can't open perlIO: $!" if !utf8::is_utf8($$file);
}
elsif (ref $file eq "GLOB") {
$$self{CurFile}="FileHandle";
$fh=$file;
}
lib/Data/Sofu.pm view on Meta::CPAN
}
}
if ($guess) {
my $enc=guess_encoding($text);
$text=$enc->decode($text) if ref $enc;
$text=Encode::decode("UTF-8",$text) unless ref $enc;
}
close $fh if ref $file;
$$self{CurFile}="";
substr($text,0,1,"") if substr($text,0,1) eq chr(65279); # UTF-8 BOM (Why ain't it removed ?)
my $u=$self->unpack($text);
#print Data::Dumper->Dump([$u]);
if (wantarray) {
return () unless $u;
return %{$u} if ref $u eq "HASH";
return (Value=>$u);
}
return unless $u;
return $u if ref $u eq "HASH";
return {Value=>$u};
lib/Data/Sofu.pm view on Meta::CPAN
=head1 NOTE on Unicode
Sofu File are normally written in a Unicode format. C<Data::Sofu> is trying to guess which format to read (usually works, thanks to Encode::Guess).
On the other hand the output defaults to UTF-16 (UNIX) (like SofuD). If you need other encoding you will have to prepare the filehandle yourself and give it to the write() funktions...
open my $fh,">:encoding(latin1)","out.sofu";
writeSofu($fh,$data);
Warning: UTF32 BE is not supported without BOM (looks too much like Binary);
Notes:
As for Encodings under Windows you should always have a :raw a first layer, but to make them compatible with Windows programs you will have to access special tricks:
open my $fh,">:raw:encoding(UTF-16):crlf:utf8","out.sofu" #Write Windows UTF-16 Files
open my $fh,">:raw:encoding(UTF-16)","out.sofu" #Write Unix UTF-16 Files
#Same goes for UTF32
#UTF-8: Don't use :utf8 or :raw:utf8 alone here,
lib/Data/Sofu.pm view on Meta::CPAN
open my $fh,">:raw:encoding(UTF-8)","out.sofu" #Unix style UTF-8
open my $fh,">:raw:encoding(UTF-8):crlf:utf8","out.sofu" #Windows style UTF-8
#And right after open():
print $fh chr(65279); #Print UTF-8 Byte Order Mark (Some programs want it, some programs die on it...)
One last thing:
open my $out,">:raw:encoding(UTF-16BE):crlf:utf8","out.sofu";
print $out chr(65279); #Byte Order Mark
#Now you can write out UTF16 with BOM in BigEndian (even if you machine in Little Endian)
=head1 SEE ALSO
perl(1),L<http://sofu.sf.net>
For Sofud compatible Object Notation: L<Data::Sofu::Object>
For Sofu Binary: L<Data::Sofu::Binary>
lib/Data/Sofu/Binary.pm view on Meta::CPAN
First driver to support: C<Data::Sofu::Binary::Bin0200>
UTF 16 with byte order mark in EVERY string.
Byteoder depends on your machine
=item C<"3"> or C<"UTF-16BE">
First driver to support: C<Data::Sofu::Binary::Bin0200>
No BOM, always BigEndian
=item C<"4"> or C<"UTF-16LE">
First driver to support: C<Data::Sofu::Binary::Bin0200>
No BOM, always LittleEndian
=item C<"5"> or C<"UTF-32">
First driver to support: C<Data::Sofu::Binary::Bin0200>
UTF-32 with byte order mark in EVERY string.
Byteoder depends on your machine
=item C<"6"> or C<"UTF-32BE">
First driver to support: C<Data::Sofu::Binary::Bin0200>
No BOM, always BigEndian
=item C<"7"> or C<"UTF-32LE">
First driver to support: C<Data::Sofu::Binary::Bin0200>
No BOM, always LittleEndian
=item C<"8","9">
Reserved for future use
=item C<"10"> or C<"ascii">
First driver to support: C<Data::Sofu::Binary::Bin0200>
Normal ASCII encoding
lib/Data/Sofu/Binary.pm view on Meta::CPAN
=over
=item C<undef>
First driver to support: C<Data::Sofu::Binary::Bin0200>
Maschine order
This is Default.
BOM is placed to detect the order used.
=item C<"LE">
First driver to support: C<Data::Sofu::Binary::Bin0200>
Little Endian
BOM is placed to detect the order used.
Use this to give it to machines which are using Little Endian and have to read the file alot
=item C<"BE">
First driver to support: C<Data::Sofu::Binary::Bin0200>
Big Endian
BOM is placed to detect the order used.
Use this to give it to machines which are using Big Endian and have to read the file alot
=item C<"7Bit">
First driver to support: C<Data::Sofu::Binary::Bin0200>
Use this byteorder if you can't trust your transport stream to be 8-Bit save.
Encoding is forced to be UTF-7. No byte in the file will be > 127.
BOM is set to 00 00.
=item C<"NOFORCE7Bit">
First driver to support: C<Data::Sofu::Binary::Bin0200>
Use this byteorder if you can't trust your transport stream to be 8-Bit save but you want another enconding than UTF-7
Encoding is NOT forced to be UTF-7.
BOM is set to 00 00.
=back
=item SOFUMARK
First driver to support: C<Data::Sofu::Binary::Bin0200>
Defines how often the string "Sofu" is placed in the file (to tell any user with a text-editor what type of file this one is).
=over
lib/Data/Sofu/Binary/Bin0200.pm view on Meta::CPAN
return $self->{Encoding}=$encoding[$self->{EncID}];
}
if ($encoding[int $id]) {
$self->{EncID}=$id;
return $self->{Encoding}=$encoding[$id];
}
$self->die("Unknown Encoding");
}
=head2 byteorder(BOM)
Internal method.
Switches the byteorder.
See pack() for more on byteorders.
=cut
lib/Data/Sofu/Binary/Bin0200.pm view on Meta::CPAN
#$self->encoding(1);
return 0;
}
$self->{SHORT}="S";
$self->{LONG}="L";
return 0;
}
=head2 bom(BOM)
Internal method.
Detects the byteorder.
See pack() for more on byteorders.
=cut
sub bom {
lib/Data/Sofu/Binary/Bin0200.pm view on Meta::CPAN
}
elsif ($type == 3) {
return $self->unpackHash($tree);
}
elsif ($type == 4) {
return $self->unpackRef($tree);
}
}
=head2 unpack(BOM)
Starts unpacking using BOM, gets encoding and the contents
=cut
sub unpack {
my $self=shift;
my $bom=shift;
$self->{COMMENTS}={};
$self->{References}=[];
$self->{Ref}={};
lib/Data/Sofu/Binary/Bin0200.pm view on Meta::CPAN
}
elsif ($type == 3) {
return $self->unpackMap2($tree);
}
elsif ($type == 4) {
return $self->unpackReference($tree);
}
}
=head2 unpackObject(BOM)
Starts unpacking into a Data::Sofu::Object structure using BOM, gets encoding and the contents
=cut
sub unpackObject {
my $self=shift;
my $bom=shift;
$self->{References}=[];
$self->{Ref}={};
$self->bom($bom);
lib/Data/Sofu/Binary/Bin0200.pm view on Meta::CPAN
7Bit encoding (if your transport stream isn't 8-Bit safe
=item C<"2"> or C<"UTF-16">
UTF 16 with byte order mark in EVERY string.
Byteoder depends on your machine
=item C<"3"> or C<"UTF-16BE">
No BOM, always BigEndian
=item C<"4"> or C<"UTF-16LE">
No BOM, always LittleEndian
=item C<"5"> or C<"UTF-32">
UTF-32 with byte order mark in EVERY string.
Byteoder depends on your machine
=item C<"6"> or C<"UTF-32BE">
No BOM, always BigEndian
=item C<"7"> or C<"UTF-32LE">
No BOM, always LittleEndian
=item C<"8","9">
Reserved for future use
=item C<"10"> or C<"ascii">
Normal ASCII encoding
Might not support all characters and will warn about that.
lib/Data/Sofu/Binary/Bin0200.pm view on Meta::CPAN
Defines how the integers of the binary file are encoded.
=over
=item C<undef>
Maschine order
This is Default.
BOM is placed to detect the order used.
=item C<"LE">
Little Endian
BOM is placed to detect the order used.
Use this to give it to machines which are using Little Endian and have to read the file alot
=item C<"BE">
Big Endian
BOM is placed to detect the order used.
Use this to give it to machines which are using Big Endian and have to read the file alot
=item C<"7Bit">
Use this byteorder if you can't trust your transport stream to be 8-Bit save.
Encoding is forced to be UTF-7. No byte in the file will be > 127.
BOM is set to 00 00.
=item C<"NOFORCE7Bit">
Use this byteorder if you can't trust your transport stream to be 8-Bit save but you want another enconding than UTF-7
Encoding is NOT forced to be UTF-7.
BOM is set to 00 00.
=back
=item SOFUMARK
Defines how often the string "Sofu" is placed in the file (to tell any user with a text-editor what type of file this one is).
=over
=item C<undef>
t/2_sofu_unicode.t view on Meta::CPAN
];
$VAR1->{Ruler}->[4]->{SubSub}->{Test}=$VAR1->{'Ruler'}[4];
#Prepearations done!
#print $dumptext,"\n";
#UTF 8
open $fh,">:raw:encoding(UTF-8)","test2.sofu";
writeSofu($fh,$VAR1);
close $fh;
is_deeply(scalar readSofu("test2.sofu"),$VAR1,"readSofu (UTF 8 Data (Unix, no BOM))");
#print Data::Dumper->Dump([scalar readSofu("test2.sofu")]);
open $fh,">:raw:encoding(UTF-8)","test2.sofu";
print $fh chr(65279); #BOM
writeSofu($fh,$VAR1);
close $fh;
is_deeply(scalar readSofu("test2.sofu"),$VAR1,"readSofu (UTF 8 Data (Unix, with BOM))");
open $fh,">:raw:encoding(UTF-8):crlf:utf8","test2.sofu";
writeSofu($fh,$VAR1);
close $fh;
is_deeply(scalar readSofu("test2.sofu"),$VAR1,"readSofu (UTF 8 Data (Windows, no BOM))");
open $fh,">:raw:encoding(UTF-8):crlf:utf8","test2.sofu";
print $fh chr(65279); #BOM
writeSofu($fh,$VAR1);
close $fh;
is_deeply(scalar readSofu("test2.sofu"),$VAR1,"readSofu (UTF 8 Data (Windows, with BOM))");
#UTF-16
open $fh,">:raw:encoding(UTF-16)","test2.sofu";
writeSofu($fh,$VAR1);
close $fh;
is_deeply(scalar readSofu("test2.sofu"),$VAR1,"readSofu (UTF 16 Data (Unix, machineorder, auto BOM))");
open $fh,">:raw:encoding(UTF-16):crlf:utf8","test2.sofu";
writeSofu($fh,$VAR1);
close $fh;
is_deeply(scalar readSofu("test2.sofu"),$VAR1,"readSofu (UTF 16 Data (Windows, machineorder, auto BOM))");
open $fh,">:raw:encoding(UTF-16LE)","test2.sofu";
writeSofu($fh,$VAR1);
close $fh;
is_deeply(scalar readSofu("test2.sofu"),$VAR1,"readSofu (UTF 16 Data (Unix, little endian, no BOM))");
open $fh,">:raw:encoding(UTF-16LE):crlf:utf8","test2.sofu";
writeSofu($fh,$VAR1);
close $fh;
is_deeply(scalar readSofu("test2.sofu"),$VAR1,"readSofu (UTF 16 Data (Windows, little endian, no BOM))");
open $fh,">:raw:encoding(UTF-16LE)","test2.sofu";
print $fh chr(65279); #BOM
writeSofu($fh,$VAR1);
close $fh;
is_deeply(scalar readSofu("test2.sofu"),$VAR1,"readSofu (UTF 16 Data (Unix, little endian, forced BOM))");
open $fh,">:raw:encoding(UTF-16LE):crlf:utf8","test2.sofu";
print $fh chr(65279); #BOM
writeSofu($fh,$VAR1);
close $fh;
is_deeply(scalar readSofu("test2.sofu"),$VAR1,"readSofu (UTF 16 Data (Windows, little endian, forced BOM))");
open $fh,">:raw:encoding(UTF-16BE)","test2.sofu";
writeSofu($fh,$VAR1);
close $fh;
is_deeply(scalar readSofu("test2.sofu"),$VAR1,"readSofu (UTF 16 Data (Unix, big endian, no BOM))");
open $fh,">:raw:encoding(UTF-16BE):crlf:utf8","test2.sofu";
writeSofu($fh,$VAR1);
close $fh;
is_deeply(scalar readSofu("test2.sofu"),$VAR1,"readSofu (UTF 16 Data (Windows, big endian, no BOM))");
open $fh,">:raw:encoding(UTF-16BE)","test2.sofu";
print $fh chr(65279); #BOM
writeSofu($fh,$VAR1);
close $fh;
is_deeply(scalar readSofu("test2.sofu"),$VAR1,"readSofu (UTF 16 Data (Unix, big endian, forced BOM))");
open $fh,">:raw:encoding(UTF-16BE):crlf:utf8","test2.sofu";
print $fh chr(65279); #BOM
writeSofu($fh,$VAR1);
close $fh;
is_deeply(scalar readSofu("test2.sofu"),$VAR1,"readSofu (UTF 16 Data (Windows, big endian, forced BOM))");
#UTF-32
open $fh,">:raw:encoding(UTF-32)","test2.sofu";
writeSofu($fh,$VAR1);
close $fh;
is_deeply(scalar readSofu("test2.sofu"),$VAR1,"readSofu (UTF 32 Data (Unix, machineorder, auto BOM))");
open $fh,">:raw:encoding(UTF-32):crlf:utf8","test2.sofu";
writeSofu($fh,$VAR1);
close $fh;
is_deeply(scalar readSofu("test2.sofu"),$VAR1,"readSofu (UTF 32 Data (Windows, machineorder, auto BOM))");
open $fh,">:raw:encoding(UTF-32LE)","test2.sofu";
writeSofu($fh,$VAR1);
close $fh;
is_deeply(scalar readSofu("test2.sofu"),$VAR1,"readSofu (UTF 32 Data (Unix, little endian, no BOM))");
open $fh,">:raw:encoding(UTF-32LE):crlf:utf8","test2.sofu";
writeSofu($fh,$VAR1);
close $fh;
is_deeply(scalar readSofu("test2.sofu"),$VAR1,"readSofu (UTF 32 Data (Windows, little endian, no BOM))");
open $fh,">:raw:encoding(UTF-32LE)","test2.sofu";
print $fh chr(65279); #BOM
writeSofu($fh,$VAR1);
close $fh;
is_deeply(scalar readSofu("test2.sofu"),$VAR1,"readSofu (UTF 32 Data (Unix, little endian, forced BOM))");
open $fh,">:raw:encoding(UTF-32LE):crlf:utf8","test2.sofu";
print $fh chr(65279); #BOM
writeSofu($fh,$VAR1);
close $fh;
is_deeply(scalar readSofu("test2.sofu"),$VAR1,"readSofu (UTF 32 Data (Windows, little endian, forced BOM))");
# These don't work for now:
#open $fh,">:raw:encoding(UTF-32BE)","test2.sofu";
#writeSofu($fh,$VAR1);
#close $fh;
#is_deeply(scalar readSofu("test2.sofu"),$VAR1,"readSofu (UTF 32 Data (Unix, big endian, no BOM))");
#open $fh,">:raw:encoding(UTF-32BE):crlf:utf8","test2.sofu";
#writeSofu($fh,$VAR1);
#close $fh;
#is_deeply(scalar readSofu("test2.sofu"),$VAR1,"readSofu (UTF 32 Data (Windows, big endian, no BOM))");
open $fh,">:raw:encoding(UTF-32BE)","test2.sofu";
print $fh chr(65279); #BOM
writeSofu($fh,$VAR1);
close $fh;
is_deeply(scalar readSofu("test2.sofu"),$VAR1,"readSofu (UTF 32 Data (Unix, big endian, forced BOM))");
open $fh,">:raw:encoding(UTF-32BE):crlf:utf8","test2.sofu";
print $fh chr(65279); #BOM
writeSofu($fh,$VAR1);
close $fh;
is_deeply(scalar readSofu("test2.sofu"),$VAR1,"readSofu (UTF 32 Data (Windows, big endian, forced BOM))");
unlink "test.sofu";
unlink "test2.sofu";