MARC-File-MARCMaker

 view release on metacpan or  search on metacpan

lib/MARC/File/MARCMaker.pm  view on Meta::CPAN

=cut

sub _char2maker { #deal with charmap default
    my @marc_string = split (//, shift);
    my $charmap = shift; #|| $charset; #add default value
    my $maker_string = join ('', map {${$charmap}{$_} } @marc_string);
    #replace html-style entities (´) with code in curly braces ({acute})
    while ($maker_string =~ s/(&)([^ ]{1,7}?)(;)/{$2}/o) {}

    return $maker_string;
} #_char2maker

######################


=head2 Default charset

usmarc_default() -- Originally from MARC.pm. Offers default mnemonics for character encoding and decoding.

Used by _maker2char.

This perhaps should be an internal _usmarc_default().

=cut

sub usmarc_default { # rec
    my @hexchar = (0x00..0x1a,0x1c,0x7f..0x8c,0x8f..0xa0,0xaf,0xbb,
           0xbe,0xbf,0xc7..0xdf,0xfc,0xfd,0xff);
    my %inchar = map {sprintf ("%2.2X",int $_), chr($_)} @hexchar;

    $inchar{esc} = chr(0x1b);        # escape
    $inchar{dollar} = chr(0x24);    # dollar sign
    $inchar{curren} = chr(0x24);    # dollar sign - alternate
    $inchar{24} = chr(0x24);        # dollar sign - alternate
    $inchar{bsol} = chr(0x5c);        # back slash (reverse solidus)
    $inchar{lcub} = chr(0x7b);        # opening curly brace
    $inchar{rcub} = "}";        # closing curly brace - part 1
    $inchar{joiner} = chr(0x8d);    # zero width joiner
    $inchar{nonjoin} = chr(0x8e);    # zero width non-joiner
    $inchar{Lstrok} = chr(0xa1);    # latin capital letter l with stroke
    $inchar{Ostrok} = chr(0xa2);    # latin capital letter o with stroke
    $inchar{Dstrok} = chr(0xa3);    # latin capital letter d with stroke
    $inchar{THORN} = chr(0xa4);        # latin capital letter thorn (icelandic)
    $inchar{AElig} = chr(0xa5);        # latin capital letter AE
    $inchar{OElig} = chr(0xa6);        # latin capital letter OE
    $inchar{softsign} = chr(0xa7);    # modifier letter soft sign
    $inchar{middot} = chr(0xa8);    # middle dot
    $inchar{flat} = chr(0xa9);        # musical flat sign
    $inchar{reg} = chr(0xaa);        # registered sign
    $inchar{plusmn} = chr(0xab);    # plus-minus sign
    $inchar{Ohorn} = chr(0xac);        # latin capital letter o with horn
    $inchar{Uhorn} = chr(0xad);        # latin capital letter u with horn
    $inchar{mlrhring} = chr(0xae);    # modifier letter right half ring (alif)
    $inchar{mllhring} = chr(0xb0);    # modifier letter left half ring (ayn)
    $inchar{lstrok} = chr(0xb1);    # latin small letter l with stroke
    $inchar{ostrok} = chr(0xb2);    # latin small letter o with stroke
    $inchar{dstrok} = chr(0xb3);    # latin small letter d with stroke
    $inchar{thorn} = chr(0xb4);        # latin small letter thorn (icelandic)
    $inchar{aelig} = chr(0xb5);        # latin small letter ae
    $inchar{oelig} = chr(0xb6);        # latin small letter oe
    $inchar{hardsign} = chr(0xb7);    # modifier letter hard sign
    $inchar{inodot} = chr(0xb8);    # latin small letter dotless i
    $inchar{pound} = chr(0xb9);        # pound sign
    $inchar{eth} = chr(0xba);        # latin small letter eth
    $inchar{ohorn} = chr(0xbc);        # latin small letter o with horn
    $inchar{uhorn} = chr(0xbd);        # latin small letter u with horn
    $inchar{deg} = chr(0xc0);        # degree sign
    $inchar{scriptl} = chr(0xc1);    # latin small letter script l
    $inchar{phono} = chr(0xc2);        # sound recording copyright
    $inchar{copy} = chr(0xc3);        # copyright sign
    $inchar{sharp} = chr(0xc4);        # sharp
    $inchar{iquest} = chr(0xc5);    # inverted question mark
    $inchar{iexcl} = chr(0xc6);        # inverted exclamation mark
    $inchar{hooka} = chr(0xe0);        # combining hook above
    $inchar{grave} = chr(0xe1);        # combining grave
    $inchar{acute} = chr(0xe2);        # combining acute
    $inchar{circ} = chr(0xe3);        # combining circumflex
    $inchar{tilde} = chr(0xe4);        # combining tilde
    $inchar{macr} = chr(0xe5);        # combining macron
    $inchar{breve} = chr(0xe6);        # combining breve
    $inchar{dot} = chr(0xe7);        # combining dot above
    $inchar{diaer} = chr(0xe8);        # combining diaeresis
    $inchar{uml} = chr(0xe8);        # combining umlaut
    $inchar{caron} = chr(0xe9);        # combining hacek
    $inchar{ring} = chr(0xea);        # combining ring above
    $inchar{llig} = chr(0xeb);        # combining ligature left half
    $inchar{rlig} = chr(0xec);        # combining ligature right half
    $inchar{rcommaa} = chr(0xed);    # combining comma above right
    $inchar{dblac} = chr(0xee);        # combining double acute
    $inchar{candra} = chr(0xef);    # combining candrabindu
    $inchar{cedil} = chr(0xf0);        # combining cedilla
    $inchar{ogon} = chr(0xf1);        # combining ogonek
    $inchar{dotb} = chr(0xf2);        # combining dot below
    $inchar{dbldotb} = chr(0xf3);    # combining double dot below
    $inchar{ringb} = chr(0xf4);        # combining ring below
    $inchar{dblunder} = chr(0xf5);    # combining double underscore
    $inchar{under} = chr(0xf6);        # combining underscore
    $inchar{commab} = chr(0xf7);    # combining comma below
    $inchar{rcedil} = chr(0xf8);    # combining right cedilla
    $inchar{breveb} = chr(0xf9);    # combining breve below
    $inchar{ldbltil} = chr(0xfa);    # combining double tilde left half
    $inchar{rdbltil} = chr(0xfb);    # combining double tilde right half
    $inchar{commaa} = chr(0xfe);    # combining comma above
    if ($MARC::DEBUG) {
        foreach my $str (sort keys %inchar) {
            printf "%s = %x\n", $str, ord($inchar{$str});
        }
    }
    return \%inchar;
} #usmarc_default

###################################################

=head2 ustext_default

ustext_default -- Originally from MARC.pm. Offers default mnemonics for character encoding and decoding.

Used by _char2maker.

This perhaps should be an internal _ustext_default().

=cut

sub ustext_default {
    my @hexchar = (0x00..0x1a,0x1c,0x7f..0x8c,0x8f..0xa0,0xaf,0xbb,
           0xbe,0xbf,0xc7..0xdf,0xfc,0xfd,0xff);
    my %outchar = map {chr($_), sprintf ("{%2.2X}",int $_)} @hexchar;

    my @ascchar = map {chr($_)} (0x20..0x23,0x25..0x7a,0x7c,0x7e);
    foreach my $asc (@ascchar) { $outchar{$asc} = $asc;}

    $outchar{chr(0x1b)} = '{esc}';    # escape
    $outchar{chr(0x24)} = '{dollar}';    # dollar sign
    $outchar{chr(0x5c)} = '{bsol}';    # back slash (reverse solidus)
    $outchar{chr(0x7b)} = '{lcub}';    # opening curly brace
    $outchar{chr(0x7d)} = '{rcub}';    # closing curly brace
    $outchar{chr(0x8d)} = '{joiner}';    # zero width joiner
    $outchar{chr(0x8e)} = '{nonjoin}';    # zero width non-joiner
    $outchar{chr(0xa1)} = '{Lstrok}';    # latin capital letter l with stroke
    $outchar{chr(0xa2)} = '{Ostrok}';    # latin capital letter o with stroke
    $outchar{chr(0xa3)} = '{Dstrok}';    # latin capital letter d with stroke
    $outchar{chr(0xa4)} = '{THORN}';    # latin capital letter thorn (icelandic)
    $outchar{chr(0xa5)} = '{AElig}';    # latin capital letter AE
    $outchar{chr(0xa6)} = '{OElig}';    # latin capital letter OE
    $outchar{chr(0xa7)} = '{softsign}';    # modifier letter soft sign
    $outchar{chr(0xa8)} = '{middot}';    # middle dot
    $outchar{chr(0xa9)} = '{flat}';    # musical flat sign
    $outchar{chr(0xaa)} = '{reg}';    # registered sign
    $outchar{chr(0xab)} = '{plusmn}';    # plus-minus sign
    $outchar{chr(0xac)} = '{Ohorn}';    # latin capital letter o with horn
    $outchar{chr(0xad)} = '{Uhorn}';    # latin capital letter u with horn
    $outchar{chr(0xae)} = '{mlrhring}';    # modifier letter right half ring (alif)
    $outchar{chr(0xb0)} = '{mllhring}';    # modifier letter left half ring (ayn)
    $outchar{chr(0xb1)} = '{lstrok}';    # latin small letter l with stroke
    $outchar{chr(0xb2)} = '{ostrok}';    # latin small letter o with stroke
    $outchar{chr(0xb3)} = '{dstrok}';    # latin small letter d with stroke
    $outchar{chr(0xb4)} = '{thorn}';    # latin small letter thorn (icelandic)
    $outchar{chr(0xb5)} = '{aelig}';    # latin small letter ae
    $outchar{chr(0xb6)} = '{oelig}';    # latin small letter oe
    $outchar{chr(0xb7)} = '{hardsign}';    # modifier letter hard sign
    $outchar{chr(0xb8)} = '{inodot}';    # latin small letter dotless i
    $outchar{chr(0xb9)} = '{pound}';    # pound sign
    $outchar{chr(0xba)} = '{eth}';    # latin small letter eth
    $outchar{chr(0xbc)} = '{ohorn}';    # latin small letter o with horn
    $outchar{chr(0xbd)} = '{uhorn}';    # latin small letter u with horn
    $outchar{chr(0xc0)} = '{deg}';    # degree sign
    $outchar{chr(0xc1)} = '{scriptl}';    # latin small letter script l
    $outchar{chr(0xc2)} = '{phono}';    # sound recording copyright
    $outchar{chr(0xc3)} = '{copy}';    # copyright sign
    $outchar{chr(0xc4)} = '{sharp}';    # sharp
    $outchar{chr(0xc5)} = '{iquest}';    # inverted question mark
    $outchar{chr(0xc6)} = '{iexcl}';    # inverted exclamation mark
    $outchar{chr(0xe0)} = '{hooka}';    # combining hook above
    $outchar{chr(0xe1)} = '{grave}';    # combining grave
    $outchar{chr(0xe2)} = '{acute}';    # combining acute
    $outchar{chr(0xe3)} = '{circ}';    # combining circumflex
    $outchar{chr(0xe4)} = '{tilde}';    # combining tilde
    $outchar{chr(0xe5)} = '{macr}';    # combining macron
    $outchar{chr(0xe6)} = '{breve}';    # combining breve
    $outchar{chr(0xe7)} = '{dot}';    # combining dot above
    $outchar{chr(0xe8)} = '{uml}';    # combining diaeresis (umlaut)
    $outchar{chr(0xe9)} = '{caron}';    # combining hacek
    $outchar{chr(0xea)} = '{ring}';    # combining ring above
    $outchar{chr(0xeb)} = '{llig}';    # combining ligature left half
    $outchar{chr(0xec)} = '{rlig}';    # combining ligature right half
    $outchar{chr(0xed)} = '{rcommaa}';    # combining comma above right
    $outchar{chr(0xee)} = '{dblac}';    # combining double acute
    $outchar{chr(0xef)} = '{candra}';    # combining candrabindu
    $outchar{chr(0xf0)} = '{cedil}';    # combining cedilla
    $outchar{chr(0xf1)} = '{ogon}';    # combining ogonek
    $outchar{chr(0xf2)} = '{dotb}';    # combining dot below
    $outchar{chr(0xf3)} = '{dbldotb}';    # combining double dot below
    $outchar{chr(0xf4)} = '{ringb}';    # combining ring below
    $outchar{chr(0xf5)} = '{dblunder}';    # combining double underscore
    $outchar{chr(0xf6)} = '{under}';    # combining underscore
    $outchar{chr(0xf7)} = '{commab}';    # combining comma below
    $outchar{chr(0xf8)} = '{rcedil}';    # combining right cedilla
    $outchar{chr(0xf9)} = '{breveb}';    # combining breve below
    $outchar{chr(0xfa)} = '{ldbltil}';    # combining double tilde left half
    $outchar{chr(0xfb)} = '{rdbltil}';    # combining double tilde right half
    $outchar{chr(0xfe)} = '{commaa}';    # combining comma above
    if ($MARC::DEBUG) {
        foreach my $num (sort keys %outchar) {
            printf "%x = %s\n", ord($num), $outchar{$num};
        }
    }
    return \%outchar;
} #ustext_default


#################################################################### 

=head2 _maker2char default

_maker2char() -- Translates MARCMaker encoded character into MARC-8 character.

=cut

sub _maker2char { # rec
    my $marc_string = shift;



( run in 1.730 second using v1.01-cache-2.11-cpan-71847e10f99 )