Sofu

 view release on metacpan or  search on metacpan

lib/Data/Sofu/Binary/Bin0200.pm  view on Meta::CPAN

use base qw/Data::Sofu::Binary/;

#$SIG{__WARN__}=sub {	confess @_;};

=head1 METHODS

See also C<Data::Sofu::Binary> for public methods.

All these methods are INTERNAL, not for use outside of this module...

Except pack().

=head2 new()

Creates a new Binary Driver using DRIVER or the latest one available.

	require Data::Sofu::Binary;
	$bsofu = Data::Sofu::Binary->new("000_002_000_000"); Taking this driver;
	#You can call it directly:
	require Data::Sofu::Binary::Bin0200;
	$bsofu = Data::Sofu::Binary::Bin0200->new(); #The same

=cut 

sub new {
	my $class=shift;
	my $self={};
	bless $self,$class;
	$self->{OBJECT}=0;
	$self->{COMMENTS}=[];
	$self->{SUPPORTED}={"000_002_000_000"=>1};
	return $self;
}

=head2 encoding(ID) 

Switches and/or detetect the encoding.

See pack() for more on encodings.

=cut

sub encoding { #Switches the Encoding
	my $self=shift;
	my $id=shift;
	my @encoding = qw/UTF-8 UTF-7 UTF-16 UTF-16BE UTF-16LE UTF-32 UTF-32BE UTF-32LE null null ascii cp1252 latin1 Latin9 Latin10/;
	my %encoding;
	@encoding{map {lc $_} @encoding} = (0 .. 12);
	if (exists $encoding{lc $id}) {
		$self->{EncID}=$encoding{lc $id};
		return $self->{Encoding}=$encoding[$self->{EncID}];
	}
	if ($encoding[int $id]) {
		$self->{EncID}=$id;
		return $self->{Encoding}=$encoding[$id];
	}
	$self->die("Unknown Encoding");
	
}

=head2 byteorder(BOM)

Internal method.

Switches the byteorder.

See pack() for more on byteorders.

=cut


sub byteorder {
	my $self=shift;
	my $bo=shift;
	if ($bo =~ m/le/i) { #little Endian
		$self->{SHORT}="v";
		$self->{LONG}="V";
		return 0;
	}
	if ($bo =~ m/be/i) { #BIG Endian
		$self->{SHORT}="n";
		$self->{LONG}="N";
		return 0;
	}
	if ($bo=~m/7/) { #7-Bit Mode
		$self->{SHORT}=undef;
		$self->{LONG}=undef;
		$self->encoding(1);
		return 1;
	}
	if ($bo=~m/Force/i) { #7-Bit Mode without UTF-7 encoding
		$self->{SHORT}=undef;
		$self->{LONG}=undef;
		#$self->encoding(1);
		return 0;
	}
	$self->{SHORT}="S";
	$self->{LONG}="L";
	return 0;

}


=head2 bom(BOM)

Internal method.

Detects the byteorder.

See pack() for more on byteorders.

=cut

sub bom {
	my $self=shift;
	my $bo=shift;
	if ($bo==1) { #Machine Order
		$self->{SHORT}="S";
		$self->{LONG}="L";
		return 0;
	}
	if ($bo==256) { #Wrong Order
		if (1 == CORE::unpack('S',pack('v',1))) {# We are little Endian
			$self->{SHORT}="n";
			$self->{LONG}="N";
		}
		else {
			$self->{SHORT}="v";
			$self->{LONG}="V";
		}
		return 0;
	}
	if ($bo==0) { #7-Bit Mode
		$self->{SHORT}=undef;
		$self->{LONG}=undef;
		$self->encoding(1);
		return 1;
	}
	$self->die("Unknown Byteorder: $bo, can't continue");
	return 0;

}

=head2 packShort(INT)

Packs one int-16 to binary using the set byteorder

=cut

sub packShort {
	my $self=shift;
	my $i=shift;
	$self->die("Short too large: $i") if $i > 65535;
	return pack $self->{SHORT},$i if $self->{SHORT};
	$self->die("Can't pack that Short in 7-Bit, too large: $i") if $i > 16383;
	return pack ("CC",($i&0x7F),($i&0x3F80));
}

=head2 packLong(INT)

Packs one int-32 to binary using the set byteorder

=cut

lib/Data/Sofu/Binary/Bin0200.pm  view on Meta::CPAN

}


=head2 unpackArray(TREE)

Decodes an array, its comment and its content

=cut


sub unpackArray {
	my $self=shift;
	my $tree=shift;
	my @result=();
	$self->getComment($tree);
	my $len=$self->getLong($self->get(4));
	$self->die("Error while reading listlength, maybe EOF") unless defined $len;
	return {} if $len == 0;
	#die $len,"\n";
	$#result = $len-1; #Grow the Array :)
	for (my $i = 0;$i < $len;$i++) {
		my $type = $self->getType();
		$result[$i] = $self->unpackType($type,"$tree->$i");
		$self->{Ref}->{"$tree->$i"}=$result[$i];
		push @{$self->{References}},\$result[$i] if ($type == 4);
	}
	return \@result;
	
}


=head2 unpackType(TYPE,TREE)

Decodes a datastructure of TYPE.

=cut


sub unpackType {
	my $self=shift;
	my $type=shift;
	my $tree=shift;
	if ($type == 0) {
		return $self->unpackUndef($tree);
	}
	elsif ($type == 1) {
		return $self->unpackScalar($tree);
	}
	elsif ($type == 2) {
		return $self->unpackArray($tree);
	}
	elsif ($type == 3) {
		return $self->unpackHash($tree);
	}
	elsif ($type == 4) {
		return $self->unpackRef($tree);
	}
}


=head2 unpack(BOM)

Starts unpacking using BOM, gets encoding and the contents

=cut


sub unpack {
	my $self=shift;
	my $bom=shift;
	$self->{COMMENTS}={};
	$self->{References}=[];
	$self->{Ref}={};
	$self->bom($bom);
	my $encoding = $self->get(1);
	$self->die("No Encoding!") unless defined $encoding;
	$self->encoding(CORE::unpack("C",$encoding));
	my $tree="";
	my %result=();
	$self->getComment("=");
	while (defined (my $key = $self->getText())) {
		my $kkey = Data::Sofu::Sofukeyescape($key);
		my $type = $self->getType();
		$result{$key} = $self->unpackType($type,"$tree->$kkey");
		$self->{Ref}->{"$tree->$kkey"}=$result{$key};
		push @{$self->{References}},\$result{$key} if ($type == 4);
	}
	$self->{Ref}->{"="}=\%result;
	$self->postprocess(); #Setting References right.
	return (\%result,$self->{COMMENTS});
	
}


=head2 unpackUndefined(TREE)

Unpacks a Data::Sofu::Undefined and its comment.

=cut

sub unpackUndefined {
	my $self=shift;
	my $tree=shift;
	my $und = Data::Sofu::Undefined->new();
	$self->getComment($und);
	return $und;

}


=head2 unpackValue(TREE)

Unpacks a Data::Sofu::Value, its content and its comment.

=cut

sub unpackValue {
	my $self=shift;
	my $tree=shift;
	my $value = Data::Sofu::Value->new("");
	$self->getComment($value);
	$value->set($self->getText());
	return $value;

lib/Data/Sofu/Binary/Bin0200.pm  view on Meta::CPAN


=head2 unpackList2(TREE)

Unpacks a Data::Sofu::List, its content and its comment.

(Speed optimized, but uses dirty tricks)

=cut


sub unpackList2 { #faster version, using the perlinterface
	my $self=shift;
	my $tree=shift;
	my $list=Data::Sofu::List->new();
	$self->getComment($list);
	my @result;
	my $len=$self->getLong($self->get(4));
	$self->die("Error while reading listlength, maybe EOF") unless defined $len;
	return $list if $len == 0;
	#die $len,"\n";
	$#result = $len-1; #Grow the Array :)
	for (my $i = 0;$i < $len;$i++) {
		my $type = $self->getType();
		$result[$i] = $self->unpackObjectType($type,"$tree->$i");
		$self->{Ref}->{"$tree->$i"}=$result[$i];
		push @{$self->{References}},$result[$i] if ($type == 4);
	}
	$list->{List}=\@result;
	return $list;
	
}

=head2 unpackObjectType(TYPE,TREE)

Unpacks a datastructure defined by TYPE

=cut

sub unpackObjectType {
	my $self=shift;
	my $type=shift;
	my $tree=shift;
	if ($type == 0) {
		return $self->unpackUndefined($tree);
	}
	elsif ($type == 1) {
		return $self->unpackValue($tree);
	}
	elsif ($type == 2) {
		return $self->unpackList2($tree);
	}
	elsif ($type == 3) {
		return $self->unpackMap2($tree);
	}
	elsif ($type == 4) {
		return $self->unpackReference($tree);
	}
}


=head2 unpackObject(BOM)

Starts unpacking into a Data::Sofu::Object structure using BOM, gets encoding and the contents

=cut


sub unpackObject {
	my $self=shift;
	my $bom=shift;
	$self->{References}=[];
	$self->{Ref}={};
	$self->bom($bom);
	my $encoding = $self->get(1);
	$self->die("No Encoding!") unless defined $encoding;
	$self->encoding(CORE::unpack("C",$encoding));
	my $tree="";
	my $map = Data::Sofu::Map->new();
	$self->getComment($map);
	while (defined (my $key = $self->getText())) {
		my $kkey = Data::Sofu::Sofukeyescape($key);
		my $type = $self->getType();
		my $res = $self->unpackObjectType($type,"$tree->$kkey");
		$self->{Ref}->{"$tree->$kkey"}=$res;
		push @{$self->{References}},$res if ($type == 4);
		$map->setAttribute($key,$res);

	}
	$self->{Ref}->{"="}=$map;
	$self->objectprocess(); #Setting References right.
	return $map;
	
}


=head2 packType(TYPE) 

Encodes Type information and returns it.

=cut

sub packType {
	my $self=shift;
	my $type=shift;
	my $str="";
	if ($self->{Mark}) {
		$str="Sofu" if rand() < $self->{Mark};
	}
	return $str.pack("C",$type);
}

=head2 packText(STRING)

Encodes a STRING using Encoding and returns it.

=cut 

sub packText {
	my $self=shift;
	my $text=shift;
	return $self->packLong(0) if not defined $text or $text eq "";
	$text = Encode::encode($self->{Encoding},$text,Encode::FB_CROAK);
	return $self->packLong(length($text)).$text;

lib/Data/Sofu/Binary/Bin0200.pm  view on Meta::CPAN

sub packHash {
	my $self=shift;
	my $data=shift;
	my $tree=shift;
	my $str=$self->packLong(scalar keys %{$data});
	foreach my $key (keys %{$data}) {
		my $kkey = Data::Sofu::Sofukeyescape($key);
		$str.=$self->packText($key);
		$str.=$self->packData($data->{$key},"$tree->$kkey");
	}
	return $str;
}

=head2 pack(TREE,[COMMENTS,[ENCODING,[BYTEORDER,[SOFUMARK]]]])

Packs a structure (TREE) into a string using the Sofu binary file format.

Returns a string representing TREE.

=over

=item TREE

Perl datastructure to pack. Can be a hash, array or scalar (or array of hashes of hashes of arrays or whatever). Anything NOT a hash will be converted to TREE={Value=>TREE};

It can also be a Data::Sofu::Object or derived (Data::Sofu::Map, Data::Sofu::List, Data::Sofu::Value, Data::Sofu::...).
Anything not a Data::Sofu::Map will be converted to one (A Map with one attribute called "Value" that holds TREE).

=item COMMENTS

Comment hash (as returned by Data::Sofu::getSofucomments() or Data::Sofu->new()->comments() after any file was read).

Can be undef or {}.

=item ENCODING

Specifies the encoding of the strings in the binary sofu file, which can be: 

=over

=item C<"0"> or C<"UTF-8">

This is default.

Normal UTF-8 encoding (supports almost all chars)

=item C<"1"> or C<"UTF-7">

This is default for byteorder = 7Bit (See below)

7Bit encoding (if your transport stream isn't 8-Bit safe

=item C<"2"> or C<"UTF-16">

UTF 16 with byte order mark in EVERY string.

Byteoder depends on your machine

=item C<"3"> or C<"UTF-16BE">

No BOM, always BigEndian

=item C<"4"> or C<"UTF-16LE">

No BOM, always LittleEndian

=item C<"5"> or C<"UTF-32">

UTF-32 with byte order mark in EVERY string.

Byteoder depends on your machine

=item C<"6"> or C<"UTF-32BE">

No BOM, always BigEndian

=item C<"7"> or C<"UTF-32LE">

No BOM, always LittleEndian

=item C<"8","9">

Reserved for future use

=item C<"10"> or C<"ascii">

Normal ASCII encoding

Might not support all characters and will warn about that.

=item C<"11"> or C<"cp1252">

Windows Codepage 1252 

Might not support all characters and will warn about that.

=item C<"12"> or C<"latin1">

ISO Latin 1 

Might not support all characters and will warn about that.

=item C<"13"> or C<"latin9">

ISO Latin 9

Might not support all characters and will warn about that.

=item C<"14"> or C<"latin10">

ISO Latin 10

Might not support all characters and will warn about that.

=back

=item BYTEORDER

Defines how the integers of the binary file are encoded.

=over

=item C<undef>

Maschine order

This is Default. 

BOM is placed to detect the order used.

=item C<"LE">

Little Endian

BOM is placed to detect the order used.

Use this to give it to machines which are using Little Endian and have to read the file alot

=item C<"BE">

Big Endian

BOM is placed to detect the order used.

Use this to give it to machines which are using Big Endian and have to read the file alot

=item C<"7Bit">

Use this byteorder if you can't trust your transport stream to be 8-Bit save.

Encoding is forced to be UTF-7. No byte in the file will be > 127.

BOM is set to 00 00.

=item C<"NOFORCE7Bit">

Use this byteorder if you can't trust your transport stream to be 8-Bit save but you want another enconding than UTF-7

Encoding is NOT forced to be UTF-7.

BOM is set to 00 00.

=back

=item SOFUMARK

Defines how often the string "Sofu" is placed in the file (to tell any user with a text-editor what type of file this one is).

=over

=item C<undef>

Only place one "Sofu" at the beginning of the file.

This is default.

=item C<"0" or "">

Place no string anywhere.

=item C<< "1" or >1 >>

Place a string on every place it is possible 

Warning, the file might get big.

=item C<"0.000001" - "0.99999">

Place strings randomly.

=back

=back

B<NOTE:>

Encoding, Byteorder and encoding driver (and Sofumark of course) are saved in the binary file. So you don't need to specify them for reading files, in fact just give them the Data::Sofu's readSofu() and all will be fine.

=cut

sub pack { #Built tree into b-stream
	my $self=shift;
	$self->{OFFSET}="while packing";
	$self->{SEEN}={};
	my $data=shift;
	my $r = ref $data;
	return $self->packObject($data,@_) if $r and $r =~ m/Data::Sofu::/ and $data->isa("Data::Sofu::Object"); 
	$data = {Value=>$data} unless ref $data and ref $data eq "HASH";
	#$self->die("Data format wrong, must be hashref") unless (ref $data and ref $data eq "HASH");
	$self->{SEEN}->{$data}="->";
	my $comments=shift;
	$comments = {} unless defined $comments;
	$self->die("Comment format wrong, must be hashref") unless (ref $comments and ref $comments eq "HASH");
	$self->{Comments}=$comments;
	my $tree;
	#my $encoding=shift;
	#my $byteorder=shift;
	#$encoding=0 unless $encoding;
	#$byteorder=0 unless $byteorder;
	#$self->encoding($encoding) unless $self->byteorder($byteorder);
	#my $mark=shift;



( run in 2.728 seconds using v1.01-cache-2.11-cpan-5735350b133 )