Encode-DosHebrew

 view release on metacpan or  search on metacpan

lib/Encode/DosHebrew.pm  view on Meta::CPAN

=head1 NAME

Encode::DosHebrew - DOS Hebrew Encoding

=head1 SYNOPSIS

  use Encode;
  use Encode::DosHebrew;
  use File::BOM;

  # Hebrew word for "newspaper"
  my $dosTxt = "\x92\xA0\xFA\xED\x8F\xA7";

  open $outF, ">:utf8:via(File::BOM)", 'unicode.txt'
	  or die "can't write: $!\n";
  print $outF decode('DosHebrew', $dosTxt), "\n";
  close $outF;

=head1 ABSTRACT

This module implements a DOS 8-bit encoding of Hebrew, which includes vowels (nikud),
as well as pointed (dagesh) consonants, in addition to the standard consonants.  It
is a superset of Code page 862 (which includes only consonants).

Although data files exist that use this "DosHebrew" encoding, its origin is unclear,
and there are no known standards which describe it.

=head1 BUGS

Only the "decode" function is implemented at this time.

=head1 DESCRIPTION

To find how to use this module in detail, see Encode.

=head1 SEE ALSO

Encode

=head1 AUTHOR

Tzadik Vanderhoof, E<lt>tzadikv@cpan.orgE<gt>

=cut

package Encode::DosHebrew;
use strict;
use warnings;
use base 'Encode::Encoding';
use feature ":5.10";
use Carp;

our $VERSION = '0.5';

__PACKAGE__->Define('DosHebrew');

my %dos2uni;

sub hebChr {
	my ($c) = @_;
	
	return chr(0x0500 + $c);
}

sub initDos2Uni {
	my $data = dos2UniData();
	open my $dataF, '<', \$data or die "Can't open data";
	while (<$dataF>) {
		my @x = m|\b([0-9a-f]{2})\b|ig or next;
		my $from = chr(hex(shift @x));
		my $to = join '', map { hebChr hex $_ } @x;
		$dos2uni{$from} = $to;
	}
	close $dataF;



( run in 2.135 seconds using v1.01-cache-2.11-cpan-39bf76dae61 )