ZMachine
view release on metacpan - search on metacpan
view release on metacpan or search on metacpan
lib/ZMachine/ZSCII.pm view on Meta::CPAN
#pod =cut
sub new {
my ($class, $arg) = @_;
if (! defined $arg) {
$arg = { version => 5 };
} if (! ref $arg) {
$arg = { version => $arg };
}
my $guts = { version => $arg->{version} };
Carp::croak("only Version 5, 7, and 8 ZSCII are supported at present")
unless $guts->{version} == 5
or $guts->{version} == 7
or $guts->{version} == 8;
$guts->{zscii} = { %DEFAULT_ZSCII };
# Why is this an arrayref and not, like alphabets, a string?
# Alphabets are strings because they're guaranteed to fit in bytestrings.
# You can't put a ZSCII character over 0xFF in the alphabet, because it can't
# be put in the story file's alphabet table! By using a string, it's easy to
# just pass in the alphabet from memory to/from the codec. On the other
# hand, the Unicode translation table stores Unicode codepoint values packed
# into words, and it's not a good fit for use in the codec. Maybe a
# ZMachine::Util will be useful for packing/unpacking Unicode translation
# tables.
$guts->{extra} = $arg->{extra_characters}
|| \@DEFAULT_EXTRA;
Carp::confess("Unicode translation table exceeds maximum length of 97")
if @{ $guts->{extra} } > 97;
for (0 .. $#{ $guts->{extra} }) {
Carp::confess("tried to add ambiguous Z->U mapping")
if exists $guts->{zscii}{ chr(155 + $_) };
my $u_char = $guts->{extra}[$_];
# Extra characters must go into the Unicode substitution table, which can
# only represent characters with codepoints between 0 and 0xFFFF. See
# Z-Machine Spec v1.1 § 3.8.4.2.1
Carp::confess("tried to add Unicode codepoint greater than U+FFFF")
if ord($u_char) > 0xFFFF;
$guts->{zscii}{ chr(155 + $_) } = $u_char;
}
$guts->{zscii_for} = { };
for my $zscii_char (sort keys %{ $guts->{zscii} }) {
my $unicode_char = $guts->{zscii}{$zscii_char};
Carp::confess("tried to add ambiguous U->Z mapping")
if exists $guts->{zscii_for}{ $unicode_char };
$guts->{zscii_for}{ $unicode_char } = $zscii_char;
}
my $self = bless $guts => $class;
# The default alphabet is entirely made up of characters that are the same in
# Unicode and ZSCII. If a user wants to put "extra characters" into the
# alphabet table, though, the alphabet should contain ZSCII values. When
# we're building a ZMachine::ZSCII using the contents of the story file's
# alphabet table, that's easy. If we're building a codec to *produce* a
# story file, it's less trivial, because we don't want to think about the
# specific ZSCII codepoints for the Unicode text we'll encode.
#
# We provide alphabet_is_unicode to let the user say "my alphabet is supplied
# in Unicode, please convert it to ZSCII during construction." -- rjbs,
# 2013-01-19
my $alphabet = $arg->{alphabet} || $DEFAULT_ALPHABET;
# It's okay if the user supplies alphabet_is_unicode but not alphabet,
# because the default alphabet is all characters with the same value in both
# character sets! -- rjbs, 2013-01-20
$alphabet = $self->unicode_to_zscii($alphabet)
if $arg->{alphabet_is_unicode};
$self->{alphabet} = $alphabet;
$self->{shortcut} = $class->_shortcuts_for( $self->{alphabet} );
return $self;
}
#pod =method encode
#pod
#pod my $packed_zchars = $z->encode( $unicode_text );
#pod
#pod This method takes a string of text and encodes it to a bytestring of packed
#pod Z-characters.
#pod
#pod Internally, it converts the Unicode text to ZSCII, then to Z-characters, and
#pod then packs them. Before this processing, any native newline characters (the
#pod value of C<\n>) are converted to C<U+000D> to match the Z-Machine's use of
#pod character 0x00D for newline.
#pod
#pod =cut
sub encode {
my ($self, $string) = @_;
$string =~ s/\n/\x0D/g;
my $zscii = $self->unicode_to_zscii($string);
my $zchars = $self->zscii_to_zchars($zscii);
return $self->pack_zchars($zchars);
}
#pod =method decode
#pod
#pod my $text = $z->decode( $packed_zchars );
#pod
#pod This method takes a bytestring of packed Z-characters and returns a string of
#pod text.
#pod
#pod Internally, it unpacks the Z-characters, converts them to ZSCII, and then
#pod converts those to Unicode. Any ZSCII characters 0x00D are converted to the
view all matches for this distributionview release on metacpan - search on metacpan
( run in 6.379 seconds using v1.00-cache-2.02-grep-82fe00e-cpan-48ebf85a1963 )