Bundle-PBib
view release on metacpan or search on metacpan
lib/Biblio/bp/lib/bp-cs-troff.pl view on Meta::CPAN
#
# bibliography package for Perl
#
# troff character set.
#
# Dana Jacobsen (dana@acm.org)
# 13 January 1995, rewrote 20 November 1995 for new canon.
# Last modified on 23 March 1996.
package bp_cs_troff;
######
$bib'charsets{'troff', 'i_name'} = 'troff';
$bib'charsets{'troff', 'tocanon'} = "bp_cs_troff'tocanon";
$bib'charsets{'troff', 'fromcanon'} = "bp_cs_troff'fromcanon";
$bib'charsets{'troff', 'toesc'} = '[\\\\]';
$bib'charsets{'troff', 'fromesc'} = "[\\\\\200-\377]|${bib'cs_ext}|${bib'cs_meta}";
######
$opt_doublebs = 1;
# variables used throughout the package
$unicode = '';
$mine = '';
$can = '';
# Rather than defining all our maps and running code for reverse maps at
# load time, we're going to embed them in functions. When tocanon or
# fromcanon get called, we do the init if we haven't already. This should
# save startup time -- especially if they never actually call our function!
# In the troff code in particular, the tocanon code needs a lot of reverse
# maps and eval code. If we're doing xyz->troff, we don't need to load
# all of that.
$cs_init = 0;
$cs_to_init = 0;
$cs_fr_init = 0;
######
sub init_cs {
# This uses the Roman-8 mapping for the rarer Latin-1 characters.
# XXXXX We need to think about which particular mapping we think is
# best for the characters that have multiple mappings. We might
# even want options for groff, Roman-8, -ms, etc.
%charmap = (
'00A0', '\0',
'00A1', '\(r!',
'00A2', '\(ct',
'00A3', '\(Po',
'00A4', '\(Cs',
'00A5', '\(Ye',
'00A6', '\(bb',
'00A7', '\(sc',
'00A8', '\(ad',
'00A9', '\(co',
'00AA', '\(Of',
'00AB', '\(Fo',
'00AC', '\(no',
'00AD', '\(hy',
'00AE', '\(rg',
'00AF', '\(a-',
'00B0', '\(de',
'00B1', '\(+-',
'00B2', '\(S2',
'00B3', '\(S3',
'00B4', '\(aa',
'00B5', '\(*m',
'00B6', '\(ps',
'00B7', '\(md',
'00B8', '\(ac',
'00B9', '\(S1',
'00BA', '\(Om',
'00BB', '\(Fc',
'00BC', '\(14',
'00BD', '\(12',
'00BE', '\(34',
'00BF', '\(r?',
'00C0', 'A\*`',
'00C1', 'A\*\'',
'00C2', 'A\*^',
'00C3', 'A\*~',
'00C4', 'A\*:',
lib/Biblio/bp/lib/bp-cs-troff.pl view on Meta::CPAN
'00FB', 'u\*^',
'00FC', 'u\*:',
'00FD', 'y\*\'',
'00FE', '\(Tp',
'00FF', 'y\*:',
'010D', 'c\*v',
'015F', 's\*,',
'017E', 'z\*v',
'2002', '\ ',
'2003', '\ \ ',
'2007', '\|', # I think this is wrong. It's a "numsp" Number Space.
'2009', '\^',
'2014', '\-',
'201C', '\*Q',
'201D', '\*U',
);
%metamap = (
'0302', '\u\s-3', # superscript
'0312', '\d\s-3', # subscript
'030F', '\s3\d',
'031F', '\s3\u',
);
%fontmap = (
'0101', '\fR',
'0102', '\fI',
'0103', '\fB',
'0104', '\fC',
'0110', '\fP', # simplistic, but it should work.
'0111', '\f1', # we just go to font 1 when they want the previous font.
'0112', '\f1', # or they end fonts.
'0113', '\f1',
'0114', '\f1',
);
$cs_init = 1;
}
sub init_cs_fr {
&init_cs unless $cs_init;
# We're not using any eval strings at the moment, so there isn't anything
# to do here.
$cs_fr_init = 1;
}
sub init_cs_to {
&init_cs unless $cs_init;
# Build up a search string to do the reverse map.
$cmap_eval = '';
#$cmap_from_eval = '';
%rmap = ();
$mineE = '';
# Step 1: Build a reverse map
while (($unicode, $mine) = each %charmap) {
$rmap{$mine} = &bib'unicode_to_canon( $unicode );
}
# Step 2: walk through the keys in sorted order
# (sigh, without a tree, this is still as slow as a dog)
foreach $mine (sort keys %rmap) {
$can = $rmap{$mine};
$mineE = $mine;
$mineE =~ s/(\W)/\\$1/g;
if ( $mine !~ /\\\(../ && $mine !~ /.\\\*./ ) {
$cmap_eval .= "s/$mineE/$can/g;\n";
}
# This isn't being used right now.
#$cmap_from_eval .= "s/$can/$mineE/g;\n";
}
# Leave rmap
# These are characters that need to be mapped only in the to mapping.
# There are a zillion different ways to write each symbol in troff, one for
# each macro package and each implementation.
# Nightmare 1: one troff character maps to different entities. Example:
# (groff) \(Cs --> '00A4 CURRENCY SIGN
# (roman8) \(Cs --> '2660 BLACK SPADE SUIT
# Nightmare 2: one entity maps to multiple characters, but no one of them
# is supported by a large group of implementations.
%chartos = (
'A\*a', '00C5',
'a\*a', '00E5',
'A\*o', '00C5',
'a\*o', '00E5',
'O\*/', '00D8',
'o\*/', '00F8',
'\*CC', '010C',
'\*Cc', '010D',
'\*CE', '011A',
'\*Ce', '011B',
'\*CL', '013D',
'\*Cl', '013E',
'\*CN', '0147',
'\*Cn', '0148',
'\*?', '00BF',
'\*!', '00A1',
'\(n~', '00F1',
);
$cmap_to_eval = '';
foreach $mine (sort keys %chartos) {
$can = &bib'unicode_to_canon( $chartos{$mine} );
$mineE = $mine;
$mineE =~ s/(\W)/\\$1/g;
if ( $mine !~ /\\\(../ && $mine !~ /.\\\*./ ) {
$cmap_to_eval .= "s/$mineE/$can/g;\n";
} else {
# Mapped up front with the rest.
if (defined $rmap{$mine}) {
&bib'goterror("Error in troff tables -- duplicate entry for $mine.");
}
$rmap{$mine} = $can;
}
}
$cs_to_init = 1;
}
#####################
sub tocanon {
local($_, $protect) = @_;
&bib'panic("cs-troff tocanon called with no arguments!") unless defined $_;
# always check to see if we have any characters to change
# (with our toesc search string, is this first check necessary?)
return $_ unless /\\/;
# do this even if we don't have opt_doublebs on
s/\\\\/\\/g;
&init_cs_to unless $cs_to_init;
study;
# Check for accents of the form \(xx and x\*x
if (/\\[(*]/) {
while (/(\\\(..)/) {
$repl = $1;
if (!defined $rmap{$repl}) {
&bib'gotwarn("Unknown troff special $repl");
$can = '';
} else {
$can = $rmap{$repl};
}
$repl =~ s/(\W)/\\$1/g;
s/$repl/$can/g;
}
# Next check for all characters of the form x\*x
while (/(.\\\*.)/) {
$repl = $1;
if (!defined $rmap{$repl}) {
next if s/\\\*Q/${bib'cs_ext}201C/go;
next if s/\\\*U/${bib'cs_ext}201D/go;
&bib'gotwarn("Unknown troff special $repl");
$can = '';
} else {
$can = $rmap{$repl};
}
$repl =~ s/(\W)/\\$1/g;
s/$repl/$can/g;
}
return $_ unless /\\/;
}
eval $cmap_eval;
return $_ unless /\\/;
# OK, they've got something fairly weird.
# Handle the different ways of specifying characters
eval $cmap_to_eval;
return $_ unless /\\/;
if (/\\f[123RIBP]/) {
# font changes
while (/\\f([123RIBP])/) {
$repl = $1;
$repl eq 'P' && ($mine = $bib'cs_meta . '0110');
$repl =~ /[1R]/ && ($mine = $bib'cs_meta . '0101');
$repl =~ /[2I]/ && ($mine = $bib'cs_meta . '0102');
$repl =~ /[3B]/ && ($mine = $bib'cs_meta . '0103');
s/\\f$repl/$mine/g;
}
$_ = &bib'font_check($_);
}
while (($unicode, $mine) = each %metamap) {
$mine =~ s/(\W)/\\$1/g;
s/$mine/${bib'cs_meta}$can/g;
}
return $_ unless /\\/;
# Last of all, the escape character. First we check to see if there is
# anything else. We can't delete it because of the way troff does it's
# coding.
if (/\\[^e]/) {
&bib'gotwarn("Unknown troff characters in '$_'");
}
# Then convert the escape character
s/\\e/\\/g;
$_;
}
######
sub fromcanon {
local($_, $protect) = @_;
local($repl);
&bib'panic("cs-troff fromcanon called with no arguments!") unless defined $_;
s/\\/\\e/g;
# tr/\200-\237//d && &bib'gotwarn("Zapped chars.");
if (/[\200-\237]/) {
while (/([\200-\237])/) {
$repl = $1;
$unicode = &bib'canon_to_unicode($repl);
&bib'gotwarn("Can't convert ".&bib'unicode_name($unicode)." to troff");
s/$repl//g;
}
}
&init_cs_fr unless $cs_fr_init;
# Which one of these to use probably depends on the frequency of
# special characters. The first method will be best with only one
# or two, but the second is better if there are a lot.
while (/([\240-\377])/g) {
$repl = $1;
$unicode = &bib'canon_to_unicode($repl);
s/$repl/$charmap{$unicode}/g;
}
# Note that the definition of cmap_from_eval is now commented out above.
#if (/[\240-\377]/) {
# eval $cmap_from_eval;
#}
# should we make the output have double backslashes?
$opt_doublebs && s/\\/\\\\/g;
# Maybe we can go now?
return $_ unless /$bib'cs_escape/o;
while (/${bib'cs_ext}(....)/) {
$unicode = $1;
if ($unicode =~ /^00[0-7]/) {
1 while s/${bib'cs_ext}00([0-7].)/pack("C", hex($1))/ge;
next;
}
defined $charmap{$unicode} && s/${bib'cs_ext}$unicode/$charmap{$unicode}/g
&& next;
$can = &bib'unicode_approx($repl);
defined $can && s/$bib'cs_ext$repl/$can/g && next;
&bib'gotwarn("Can't convert ".&bib'unicode_name($unicode)." to troff");
s/${bib'cs_ext}$unicode//g;
}
while (/${bib'cs_meta}(....)/) {
$repl = $1;
defined $fontmap{$repl} && s/${bib'cs_meta}$repl/$fontmap{$repl}/g
&& next;
defined $metamap{$repl} && s/${bib'cs_meta}$repl/$metamap{$repl}/g
&& next;
$can = &bib'meta_approx($repl);
defined $can && s/$bib'cs_meta$repl/$can/g && next;
&bib'gotwarn("Can't convert ".&bib'meta_name($repl)." to troff");
s/${bib'cs_meta}$repl//g;
}
$_;
}
#######################
# end of package
#######################
1;
( run in 0.575 second using v1.01-cache-2.11-cpan-99c4e6809bf )