Bundle-PBib
view release on metacpan or search on metacpan
lib/Biblio/bp/lib/bp-cs-apple.pl view on Meta::CPAN
'00AB', 199, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
'00BB', 200, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
'2026', 201, # HORIZONTAL ELLIPSIS
'00A0', 202, # NO-BREAK SPACE
'00C0', 203, # LATIN CAPITAL LETTER A WITH GRAVE
'00C3', 204, # LATIN CAPITAL LETTER A WITH TILDE
'00D5', 205, # LATIN CAPITAL LETTER O WITH TILDE
'0152', 206, # LATIN CAPITAL LIGATURE OE
'0153', 207, # LATIN SMALL LIGATURE OE
'2013', 208, # EN DASH
'2014', 209, # EM DASH
'201C', 210, # LEFT DOUBLE QUOTATION MARK
'201D', 211, # RIGHT DOUBLE QUOTATION MARK
'2018', 212, # LEFT SINGLE QUOTATION MARK
'2019', 213, # RIGHT SINGLE QUOTATION MARK
'00F7', 214, # DIVISION SIGN
'25CA', 215, # LOZENGE
'00FF', 216, # LATIN SMALL LETTER Y WITH DIAERESIS
'0178', 217, # LATIN CAPITAL LETTER Y WITH DIAERESIS
'2044', 218, # FRACTION SLASH
'00A4', 219, # CURRENCY SIGN
'2039', 220, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
'203A', 221, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
'FB01', 222, # LATIN SMALL LIGATURE FI
'FB02', 223, # LATIN SMALL LIGATURE FL
'2021', 224, # DOUBLE DAGGER
'00B7', 225, # MIDDLE DOT
'201A', 226, # SINGLE LOW-9 QUOTATION MARK
'201E', 227, # DOUBLE LOW-9 QUOTATION MARK
'2030', 228, # PER MILLE SIGN
'00C2', 229, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
'00CA', 230, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
'00C1', 231, # LATIN CAPITAL LETTER A WITH ACUTE
'00CB', 232, # LATIN CAPITAL LETTER E WITH DIAERESIS
'00C8', 233, # LATIN CAPITAL LETTER E WITH GRAVE
'00CD', 234, # LATIN CAPITAL LETTER I WITH ACUTE
'00CE', 235, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
'00CF', 236, # LATIN CAPITAL LETTER I WITH DIAERESIS
'00CC', 237, # LATIN CAPITAL LETTER I WITH GRAVE
'00D3', 238, # LATIN CAPITAL LETTER O WITH ACUTE
'00D4', 239, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
'E000', 240, # APPLE LOGO
'00D2', 241, # LATIN CAPITAL LETTER O WITH GRAVE
'00DA', 242, # LATIN CAPITAL LETTER U WITH ACUTE
'00DB', 243, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
'00D9', 244, # LATIN CAPITAL LETTER U WITH GRAVE
'0131', 245, # LATIN SMALL LETTER DOTLESS I
'02C6', 246, # MODIFIER LETTER CIRCUMFLEX ACCENT
'02DC', 247, # SMALL TILDE
'00AF', 248, # MACRON
'02D8', 249, # BREVE
'02D9', 250, # DOT ABOVE (Mandarin Chinese light tone)
'02DA', 251, # RING ABOVE
'00B8', 252, # CEDILLA
'02DD', 253, # DOUBLE ACUTE ACCENT
'02DB', 254, # OGONEK
'02C7', 255, # CARON (Mandarin Chinese third tone)
);
# Table done.
$unicode = '';
$repl = '';
$can = '';
$eb_eval_fromcanon = '';
$eb_eval_tocanon = '';
$eb_nomapC = '';
$eb_nomapA = '';
$eb_mapC = '';
$eb_mapA = '';
#
# Build the eval string for the fromcanon code.
#
# For each 8bit code, we either:
# 1) don't have a character for this code. So we zap and complain.
# 2) we do know it, so we translate them all at once after we're done
# with all the ones we don't know.
#
foreach $can (128..255) {
$unicode = &bib'decimal_to_unicode($can);
$repl = pack("C", $can);
if (defined $umap{$unicode}) {
$eb_mapC .= $repl;
$eb_mapA .= pack("C", $umap{$unicode});
} else {
$eb_nomapC .= $repl;
$eb_eval_fromcanon .= "tr/$repl//d && \&bib'gotwarn(\"Can't convert "
. &bib'unicode_name($unicode) . " to Apple\");\n";
}
}
substr($eb_eval_fromcanon,0,0) = "if (/[$eb_nomapC]/) {\n";
$eb_eval_fromcanon .= "}\ntr/$eb_mapC/$eb_mapA/;\n";
#
# Build the eval string for the tocanon code.
#
# nomapA just means there isn't a direct 8bit replacement. We just insert
# the extended character.
#
foreach $unicode (keys %umap) {
next if $unicode =~ /^00/;
$repl = pack("C", $umap{$unicode});
$eb_nomapA .= $repl;
$eb_eval_tocanon .= "s/$repl/$bib'cs_ext$unicode/g;\n";
}
substr($eb_eval_tocanon,0,0) = "if (/[$eb_nomapA]/) {\n";
$eb_eval_tocanon .= "}\ntr/$eb_mapA/$eb_mapC/;\n";
#####################
sub tocanon {
local($_, $protect) = @_;
&bib'panic("cs-apple tocanon called with no arguments!") unless defined $_;
eval $eb_eval_tocanon;
$_;
}
######
sub fromcanon {
local($_, $protect) = @_;
&bib'panic("cs-apple fromcanon called with no arguments!") unless defined $_;
# 8 bit mappings. This variable is set up at package load time.
# The algorithm goes as follows:
# step 1: Zap and complain about any 8bit characters we don't map.
# This is done with a tr/<character>//d for each character.
# step 2: Use tr/<canons>/<apples>/ to translate all the two-way
# mapped characters right across.
eval $eb_eval_fromcanon;
return $_ unless /$bib'cs_escape/o;
# The standard 7bit map.
1 while s/${bib'cs_ext}00([0-7].)/pack("C", hex($1))/ge;
while (/${bib'cs_ext}(....)/) {
$unicode = $1;
defined $umap{$unicode}
&& s/${bib'cs_ext}$unicode/pack("C", $umap{$unicode})/ge
&& next;
&bib'gotwarn("Can't convert ".&bib'unicode_name($unicode)." to Apple");
s/${bib'cs_ext}$unicode//g;
}
while (/${bib'cs_meta}(....)/) {
$repl = $1;
&bib'gotwarn("Can't convert ".&bib'meta_name($repl)." to Apple");
s/${bib'cs_meta}$repl//g;
}
$_;
}
#######################
# end of package
#######################
1;
( run in 1.368 second using v1.01-cache-2.11-cpan-8f98c5d2c55 )