ZMachine

 view release on metacpan or  search on metacpan

lib/ZMachine/ZSCII.pm  view on Meta::CPAN

#pod =cut

sub new {
  my ($class, $arg) = @_;

  if (! defined $arg) {
    $arg = { version => 5 };
  } if (! ref $arg) {
    $arg = { version => $arg };
  }

  my $guts = { version => $arg->{version} };

  Carp::croak("only Version 5, 7, and 8 ZSCII are supported at present")
    unless $guts->{version} == 5
        or $guts->{version} == 7
        or $guts->{version} == 8;

  $guts->{zscii} = { %DEFAULT_ZSCII };

  # Why is this an arrayref and not, like alphabets, a string?
  # Alphabets are strings because they're guaranteed to fit in bytestrings.
  # You can't put a ZSCII character over 0xFF in the alphabet, because it can't
  # be put in the story file's alphabet table!  By using a string, it's easy to
  # just pass in the alphabet from memory to/from the codec.  On the other
  # hand, the Unicode translation table stores Unicode codepoint values packed
  # into words, and it's not a good fit for use in the codec.  Maybe a
  # ZMachine::Util will be useful for packing/unpacking Unicode translation
  # tables.
  $guts->{extra} = $arg->{extra_characters}
                || \@DEFAULT_EXTRA;

  Carp::confess("Unicode translation table exceeds maximum length of 97")
    if @{ $guts->{extra} } > 97;

  for (0 .. $#{ $guts->{extra} }) {
    Carp::confess("tried to add ambiguous Z->U mapping")
      if exists $guts->{zscii}{ chr(155 + $_) };

    my $u_char = $guts->{extra}[$_];

    # Extra characters must go into the Unicode substitution table, which can
    # only represent characters with codepoints between 0 and 0xFFFF.  See
    # Z-Machine Spec v1.1 § 3.8.4.2.1
    Carp::confess("tried to add Unicode codepoint greater than U+FFFF")
      if ord($u_char) > 0xFFFF;

    $guts->{zscii}{ chr(155 + $_) } = $u_char;
  }

  $guts->{zscii_for} = { };
  for my $zscii_char (sort keys %{ $guts->{zscii} }) {
    my $unicode_char = $guts->{zscii}{$zscii_char};

    Carp::confess("tried to add ambiguous U->Z mapping")
      if exists $guts->{zscii_for}{ $unicode_char };

    $guts->{zscii_for}{ $unicode_char } = $zscii_char;
  }

  my $self = bless $guts => $class;

  # The default alphabet is entirely made up of characters that are the same in
  # Unicode and ZSCII.  If a user wants to put "extra characters" into the
  # alphabet table, though, the alphabet should contain ZSCII values.  When
  # we're building a ZMachine::ZSCII using the contents of the story file's
  # alphabet table, that's easy.  If we're building a codec to *produce* a
  # story file, it's less trivial, because we don't want to think about the
  # specific ZSCII codepoints for the Unicode text we'll encode.
  #
  # We provide alphabet_is_unicode to let the user say "my alphabet is supplied
  # in Unicode, please convert it to ZSCII during construction." -- rjbs,
  # 2013-01-19
  my $alphabet = $arg->{alphabet} || $DEFAULT_ALPHABET;

  # It's okay if the user supplies alphabet_is_unicode but not alphabet,
  # because the default alphabet is all characters with the same value in both
  # character sets! -- rjbs, 2013-01-20
  $alphabet = $self->unicode_to_zscii($alphabet)
    if $arg->{alphabet_is_unicode};

  $self->{alphabet} = $alphabet;
  $self->{shortcut} = $class->_shortcuts_for( $self->{alphabet} );

  return $self;
}

#pod =method encode
#pod
#pod   my $packed_zchars = $z->encode( $unicode_text );
#pod
#pod This method takes a string of text and encodes it to a bytestring of packed
#pod Z-characters.
#pod
#pod Internally, it converts the Unicode text to ZSCII, then to Z-characters, and
#pod then packs them.  Before this processing, any native newline characters (the
#pod value of C<\n>) are converted to C<U+000D> to match the Z-Machine's use of
#pod character 0x00D for newline.
#pod
#pod =cut

sub encode {
  my ($self, $string) = @_;

  $string =~ s/\n/\x0D/g;

  my $zscii  = $self->unicode_to_zscii($string);
  my $zchars = $self->zscii_to_zchars($zscii);

  return $self->pack_zchars($zchars);
}

#pod =method decode
#pod
#pod   my $text = $z->decode( $packed_zchars );
#pod
#pod This method takes a bytestring of packed Z-characters and returns a string of
#pod text.
#pod
#pod Internally, it unpacks the Z-characters, converts them to ZSCII, and then
#pod converts those to Unicode.  Any ZSCII characters 0x00D are converted to the

 view all matches for this distribution
 view release on metacpan -  search on metacpan

( run in 6.379 seconds using v1.00-cache-2.02-grep-82fe00e-cpan-48ebf85a1963 )