Sofu

 view release on metacpan or  search on metacpan

lib/Data/Sofu.pm  view on Meta::CPAN

	$$self{CURRENT}=0;
	$$self{References}=[];
	$self->{Commentary}={};
	%{$$self{Ref}}=();
	my $guess=0;
	unless (ref $file) {
		$$self{CurFile}=$file;
		open $fh,"<:raw",$$self{CurFile} or die "Sofu error open: $$self{CurFile} file: $!";
		$guess=1;
		binmode $fh;
		#eval {require File::BOM;my ($e,$sp)=File::BOM::defuse($fh);$$self{Ret}.=$sp;$e=$e;};undef $@;
	}
	elsif (ref $file eq "SCALAR") {
		$$self{CurFile}="Scalarref";
		open $fh,"<:utf8",$file or die "Can't open perlIO: $!" if utf8::is_utf8($$file);
		open $fh,"<",$file or die "Can't open perlIO: $!"  if !utf8::is_utf8($$file);;
	}
	elsif (ref $file eq "GLOB") {
		$$self{CurFile}="FileHandle";
		$fh=$file;
	}

lib/Data/Sofu.pm  view on Meta::CPAN

			}
			return $tree;
		}

	}
	if ($guess)  {
		my $enc=guess_encoding($text);
		$text=$enc->decode($text) if ref $enc;
		$text=Encode::decode("UTF-8",$text) unless ref $enc;
	}
	substr($text,0,1,"") if substr($text,0,1) eq chr(65279); # UTF-8 BOM (Why ain't it removed ?)
	close $fh if ref $file;
	$$self{CurFile}="";
	my $u=$self->unpack($text);
	$self->{OBJECT}=0;
	return $u;
}

sub noComments {
	my $self=shift;
	$$self{PreserveCommentary}=0;

lib/Data/Sofu.pm  view on Meta::CPAN

	$$self{CURRENT}=0;
	$$self{References}=[];
	$self->{Commentary}={};
	%{$$self{Ref}}=();
	my $guess=0;
	unless (ref $file) {
		$$self{CurFile}=$file;
		open $fh,$$self{CurFile} or die "Sofu error open: $$self{CurFile} file: $!";
		$guess=1;
		binmode $fh;
		#eval {require File::BOM;my ($e,$sp)=File::BOM::defuse($fh);$$self{Ret}.=$sp;$e=$e;};undef $@;
	}
	elsif (ref $file eq "SCALAR") {
		$$self{CurFile}="Scalarref";
		open $fh,"<:utf8",$file or die "Can't open perlIO: $!" if utf8::is_utf8($$file);
		open $fh,"<",$file or die "Can't open perlIO: $!" if !utf8::is_utf8($$file);
	}
	elsif (ref $file eq "GLOB") {
		$$self{CurFile}="FileHandle";
		$fh=$file;
	}

lib/Data/Sofu.pm  view on Meta::CPAN

		}

	}
	if ($guess)  {
		my $enc=guess_encoding($text);
		$text=$enc->decode($text) if ref $enc;
		$text=Encode::decode("UTF-8",$text) unless ref $enc;
	}
	close $fh if ref $file;
	$$self{CurFile}="";
	substr($text,0,1,"") if substr($text,0,1) eq chr(65279); # UTF-8 BOM (Why ain't it removed ?)
	my $u=$self->unpack($text);
	#print Data::Dumper->Dump([$u]);
	if (wantarray) {
		return () unless $u;	
		return %{$u} if ref $u eq "HASH";
		return (Value=>$u);
	}
	return unless $u;
	return $u if ref $u eq "HASH";
	return {Value=>$u};

lib/Data/Sofu.pm  view on Meta::CPAN


=head1 NOTE on Unicode

Sofu File are normally written in a Unicode format. C<Data::Sofu> is trying to guess which format to read (usually works, thanks to Encode::Guess).

On the other hand the output defaults to UTF-16 (UNIX) (like SofuD). If you need other encoding you will have to prepare the filehandle yourself and give it to the write() funktions...

	open my $fh,">:encoding(latin1)","out.sofu";
	writeSofu($fh,$data);

Warning: UTF32 BE is not supported without BOM (looks too much like Binary);

Notes:

As for Encodings under Windows you should always have a :raw a first layer, but to make them compatible with Windows programs you will have to access special tricks:

	open my $fh,">:raw:encoding(UTF-16):crlf:utf8","out.sofu" #Write Windows UTF-16 Files
	open my $fh,">:raw:encoding(UTF-16)","out.sofu" #Write Unix UTF-16 Files
	#Same goes for UTF32
	
	#UTF-8: Don't use :utf8 or :raw:utf8 alone here, 

lib/Data/Sofu.pm  view on Meta::CPAN

	open my $fh,">:raw:encoding(UTF-8)","out.sofu" #Unix style UTF-8 
	open my $fh,">:raw:encoding(UTF-8):crlf:utf8","out.sofu" #Windows style UTF-8

	#And right after open():
	print $fh chr(65279); #Print UTF-8 Byte Order Mark (Some programs want it, some programs die on it...)
	
One last thing:

	open my $out,">:raw:encoding(UTF-16BE):crlf:utf8","out.sofu";
	print $out chr(65279); #Byte Order Mark
	#Now you can write out UTF16 with BOM in BigEndian (even if you machine in Little Endian)


=head1 SEE ALSO

perl(1),L<http://sofu.sf.net>

For Sofud compatible Object Notation: L<Data::Sofu::Object>

For Sofu Binary: L<Data::Sofu::Binary>

lib/Data/Sofu/Binary.pm  view on Meta::CPAN

First driver to support: C<Data::Sofu::Binary::Bin0200>

UTF 16 with byte order mark in EVERY string.

Byteoder depends on your machine

=item C<"3"> or C<"UTF-16BE">

First driver to support: C<Data::Sofu::Binary::Bin0200>

No BOM, always BigEndian

=item C<"4"> or C<"UTF-16LE">

First driver to support: C<Data::Sofu::Binary::Bin0200>

No BOM, always LittleEndian

=item C<"5"> or C<"UTF-32">

First driver to support: C<Data::Sofu::Binary::Bin0200>

UTF-32 with byte order mark in EVERY string.

Byteoder depends on your machine

=item C<"6"> or C<"UTF-32BE">

First driver to support: C<Data::Sofu::Binary::Bin0200>

No BOM, always BigEndian

=item C<"7"> or C<"UTF-32LE">

First driver to support: C<Data::Sofu::Binary::Bin0200>

No BOM, always LittleEndian

=item C<"8","9">

Reserved for future use

=item C<"10"> or C<"ascii">

First driver to support: C<Data::Sofu::Binary::Bin0200>

Normal ASCII encoding

lib/Data/Sofu/Binary.pm  view on Meta::CPAN

=over

=item C<undef>

First driver to support: C<Data::Sofu::Binary::Bin0200>

Maschine order

This is Default. 

BOM is placed to detect the order used.

=item C<"LE">

First driver to support: C<Data::Sofu::Binary::Bin0200>

Little Endian

BOM is placed to detect the order used.

Use this to give it to machines which are using Little Endian and have to read the file alot

=item C<"BE">

First driver to support: C<Data::Sofu::Binary::Bin0200>

Big Endian

BOM is placed to detect the order used.

Use this to give it to machines which are using Big Endian and have to read the file alot

=item C<"7Bit">

First driver to support: C<Data::Sofu::Binary::Bin0200>

Use this byteorder if you can't trust your transport stream to be 8-Bit save.

Encoding is forced to be UTF-7. No byte in the file will be > 127.

BOM is set to 00 00.

=item C<"NOFORCE7Bit">

First driver to support: C<Data::Sofu::Binary::Bin0200>

Use this byteorder if you can't trust your transport stream to be 8-Bit save but you want another enconding than UTF-7

Encoding is NOT forced to be UTF-7.

BOM is set to 00 00.

=back

=item SOFUMARK

First driver to support: C<Data::Sofu::Binary::Bin0200>

Defines how often the string "Sofu" is placed in the file (to tell any user with a text-editor what type of file this one is).

=over

lib/Data/Sofu/Binary/Bin0200.pm  view on Meta::CPAN

		return $self->{Encoding}=$encoding[$self->{EncID}];
	}
	if ($encoding[int $id]) {
		$self->{EncID}=$id;
		return $self->{Encoding}=$encoding[$id];
	}
	$self->die("Unknown Encoding");
	
}

=head2 byteorder(BOM)

Internal method.

Switches the byteorder.

See pack() for more on byteorders.

=cut


lib/Data/Sofu/Binary/Bin0200.pm  view on Meta::CPAN

		#$self->encoding(1);
		return 0;
	}
	$self->{SHORT}="S";
	$self->{LONG}="L";
	return 0;

}


=head2 bom(BOM)

Internal method.

Detects the byteorder.

See pack() for more on byteorders.

=cut

sub bom {

lib/Data/Sofu/Binary/Bin0200.pm  view on Meta::CPAN

	}
	elsif ($type == 3) {
		return $self->unpackHash($tree);
	}
	elsif ($type == 4) {
		return $self->unpackRef($tree);
	}
}


=head2 unpack(BOM)

Starts unpacking using BOM, gets encoding and the contents

=cut


sub unpack {
	my $self=shift;
	my $bom=shift;
	$self->{COMMENTS}={};
	$self->{References}=[];
	$self->{Ref}={};

lib/Data/Sofu/Binary/Bin0200.pm  view on Meta::CPAN

	}
	elsif ($type == 3) {
		return $self->unpackMap2($tree);
	}
	elsif ($type == 4) {
		return $self->unpackReference($tree);
	}
}


=head2 unpackObject(BOM)

Starts unpacking into a Data::Sofu::Object structure using BOM, gets encoding and the contents

=cut


sub unpackObject {
	my $self=shift;
	my $bom=shift;
	$self->{References}=[];
	$self->{Ref}={};
	$self->bom($bom);

lib/Data/Sofu/Binary/Bin0200.pm  view on Meta::CPAN

7Bit encoding (if your transport stream isn't 8-Bit safe

=item C<"2"> or C<"UTF-16">

UTF 16 with byte order mark in EVERY string.

Byteoder depends on your machine

=item C<"3"> or C<"UTF-16BE">

No BOM, always BigEndian

=item C<"4"> or C<"UTF-16LE">

No BOM, always LittleEndian

=item C<"5"> or C<"UTF-32">

UTF-32 with byte order mark in EVERY string.

Byteoder depends on your machine

=item C<"6"> or C<"UTF-32BE">

No BOM, always BigEndian

=item C<"7"> or C<"UTF-32LE">

No BOM, always LittleEndian

=item C<"8","9">

Reserved for future use

=item C<"10"> or C<"ascii">

Normal ASCII encoding

Might not support all characters and will warn about that.

lib/Data/Sofu/Binary/Bin0200.pm  view on Meta::CPAN

Defines how the integers of the binary file are encoded.

=over

=item C<undef>

Maschine order

This is Default. 

BOM is placed to detect the order used.

=item C<"LE">

Little Endian

BOM is placed to detect the order used.

Use this to give it to machines which are using Little Endian and have to read the file alot

=item C<"BE">

Big Endian

BOM is placed to detect the order used.

Use this to give it to machines which are using Big Endian and have to read the file alot

=item C<"7Bit">

Use this byteorder if you can't trust your transport stream to be 8-Bit save.

Encoding is forced to be UTF-7. No byte in the file will be > 127.

BOM is set to 00 00.

=item C<"NOFORCE7Bit">

Use this byteorder if you can't trust your transport stream to be 8-Bit save but you want another enconding than UTF-7

Encoding is NOT forced to be UTF-7.

BOM is set to 00 00.

=back

=item SOFUMARK

Defines how often the string "Sofu" is placed in the file (to tell any user with a text-editor what type of file this one is).

=over

=item C<undef>

t/2_sofu_unicode.t  view on Meta::CPAN

                                                ];
$VAR1->{Ruler}->[4]->{SubSub}->{Test}=$VAR1->{'Ruler'}[4];

#Prepearations done!
#print $dumptext,"\n";

#UTF 8
open $fh,">:raw:encoding(UTF-8)","test2.sofu";
writeSofu($fh,$VAR1);
close $fh;
is_deeply(scalar readSofu("test2.sofu"),$VAR1,"readSofu (UTF 8 Data (Unix, no BOM))");
#print Data::Dumper->Dump([scalar readSofu("test2.sofu")]);

open $fh,">:raw:encoding(UTF-8)","test2.sofu";
print $fh chr(65279); #BOM
writeSofu($fh,$VAR1);
close $fh;
is_deeply(scalar readSofu("test2.sofu"),$VAR1,"readSofu (UTF 8 Data (Unix, with BOM))");

open $fh,">:raw:encoding(UTF-8):crlf:utf8","test2.sofu";
writeSofu($fh,$VAR1);
close $fh;
is_deeply(scalar readSofu("test2.sofu"),$VAR1,"readSofu (UTF 8 Data (Windows, no BOM))");

open $fh,">:raw:encoding(UTF-8):crlf:utf8","test2.sofu";
print $fh chr(65279); #BOM
writeSofu($fh,$VAR1);
close $fh;
is_deeply(scalar readSofu("test2.sofu"),$VAR1,"readSofu (UTF 8 Data (Windows, with BOM))");


#UTF-16
open $fh,">:raw:encoding(UTF-16)","test2.sofu";
writeSofu($fh,$VAR1);
close $fh;
is_deeply(scalar readSofu("test2.sofu"),$VAR1,"readSofu (UTF 16 Data (Unix, machineorder, auto BOM))");

open $fh,">:raw:encoding(UTF-16):crlf:utf8","test2.sofu";
writeSofu($fh,$VAR1);
close $fh;
is_deeply(scalar readSofu("test2.sofu"),$VAR1,"readSofu (UTF 16 Data (Windows, machineorder, auto BOM))");

open $fh,">:raw:encoding(UTF-16LE)","test2.sofu";
writeSofu($fh,$VAR1);
close $fh;
is_deeply(scalar readSofu("test2.sofu"),$VAR1,"readSofu (UTF 16 Data (Unix, little endian, no BOM))");

open $fh,">:raw:encoding(UTF-16LE):crlf:utf8","test2.sofu";
writeSofu($fh,$VAR1);
close $fh;
is_deeply(scalar readSofu("test2.sofu"),$VAR1,"readSofu (UTF 16 Data (Windows, little endian, no BOM))");

open $fh,">:raw:encoding(UTF-16LE)","test2.sofu";
print $fh chr(65279); #BOM
writeSofu($fh,$VAR1);
close $fh;
is_deeply(scalar readSofu("test2.sofu"),$VAR1,"readSofu (UTF 16 Data (Unix, little endian, forced BOM))");

open $fh,">:raw:encoding(UTF-16LE):crlf:utf8","test2.sofu";
print $fh chr(65279); #BOM
writeSofu($fh,$VAR1);
close $fh;
is_deeply(scalar readSofu("test2.sofu"),$VAR1,"readSofu (UTF 16 Data (Windows, little endian, forced BOM))");

open $fh,">:raw:encoding(UTF-16BE)","test2.sofu";
writeSofu($fh,$VAR1);
close $fh;
is_deeply(scalar readSofu("test2.sofu"),$VAR1,"readSofu (UTF 16 Data (Unix, big endian, no BOM))");

open $fh,">:raw:encoding(UTF-16BE):crlf:utf8","test2.sofu";
writeSofu($fh,$VAR1);
close $fh;
is_deeply(scalar readSofu("test2.sofu"),$VAR1,"readSofu (UTF 16 Data (Windows, big endian, no BOM))");

open $fh,">:raw:encoding(UTF-16BE)","test2.sofu";
print $fh chr(65279); #BOM
writeSofu($fh,$VAR1);
close $fh;
is_deeply(scalar readSofu("test2.sofu"),$VAR1,"readSofu (UTF 16 Data (Unix, big endian, forced BOM))");

open $fh,">:raw:encoding(UTF-16BE):crlf:utf8","test2.sofu";
print $fh chr(65279); #BOM
writeSofu($fh,$VAR1);
close $fh;
is_deeply(scalar readSofu("test2.sofu"),$VAR1,"readSofu (UTF 16 Data (Windows, big endian, forced BOM))");

#UTF-32
open $fh,">:raw:encoding(UTF-32)","test2.sofu";
writeSofu($fh,$VAR1);
close $fh;
is_deeply(scalar readSofu("test2.sofu"),$VAR1,"readSofu (UTF 32 Data (Unix, machineorder, auto BOM))");

open $fh,">:raw:encoding(UTF-32):crlf:utf8","test2.sofu";
writeSofu($fh,$VAR1);
close $fh;
is_deeply(scalar readSofu("test2.sofu"),$VAR1,"readSofu (UTF 32 Data (Windows, machineorder, auto BOM))");

open $fh,">:raw:encoding(UTF-32LE)","test2.sofu";
writeSofu($fh,$VAR1);
close $fh;
is_deeply(scalar readSofu("test2.sofu"),$VAR1,"readSofu (UTF 32 Data (Unix, little endian, no BOM))");

open $fh,">:raw:encoding(UTF-32LE):crlf:utf8","test2.sofu";
writeSofu($fh,$VAR1);
close $fh;
is_deeply(scalar readSofu("test2.sofu"),$VAR1,"readSofu (UTF 32 Data (Windows, little endian, no BOM))");

open $fh,">:raw:encoding(UTF-32LE)","test2.sofu";
print $fh chr(65279); #BOM
writeSofu($fh,$VAR1);
close $fh;
is_deeply(scalar readSofu("test2.sofu"),$VAR1,"readSofu (UTF 32 Data (Unix, little endian, forced BOM))");

open $fh,">:raw:encoding(UTF-32LE):crlf:utf8","test2.sofu";
print $fh chr(65279); #BOM
writeSofu($fh,$VAR1);
close $fh;
is_deeply(scalar readSofu("test2.sofu"),$VAR1,"readSofu (UTF 32 Data (Windows, little endian, forced BOM))");

# These don't work for now:
#open $fh,">:raw:encoding(UTF-32BE)","test2.sofu";
#writeSofu($fh,$VAR1);
#close $fh;
#is_deeply(scalar readSofu("test2.sofu"),$VAR1,"readSofu (UTF 32 Data (Unix, big endian, no BOM))");

#open $fh,">:raw:encoding(UTF-32BE):crlf:utf8","test2.sofu";
#writeSofu($fh,$VAR1);
#close $fh;
#is_deeply(scalar readSofu("test2.sofu"),$VAR1,"readSofu (UTF 32 Data (Windows, big endian, no BOM))");

open $fh,">:raw:encoding(UTF-32BE)","test2.sofu";
print $fh chr(65279); #BOM
writeSofu($fh,$VAR1);
close $fh;
is_deeply(scalar readSofu("test2.sofu"),$VAR1,"readSofu (UTF 32 Data (Unix, big endian, forced BOM))");

open $fh,">:raw:encoding(UTF-32BE):crlf:utf8","test2.sofu";
print $fh chr(65279); #BOM
writeSofu($fh,$VAR1);
close $fh;
is_deeply(scalar readSofu("test2.sofu"),$VAR1,"readSofu (UTF 32 Data (Windows, big endian, forced BOM))");

unlink "test.sofu";
unlink "test2.sofu";



( run in 0.882 second using v1.01-cache-2.11-cpan-131fc08a04b )