Bundle-PBib
view release on metacpan or search on metacpan
lib/Biblio/bp/lib/bp-p-cs.pl view on Meta::CPAN
#
# bibliography package for Perl
#
# Character set common variables and routines
#
# Dana Jacobsen (dana@acm.org)
# 18 November 1995 (last modified 17 March 1996)
# for bib'nocharset which calls fromcanon:
require "bp-cs-none.pl";
######
#
# Return canonical character for a unicode hex string.
#
sub unicode_to_canon {
local($hex) = @_;
$hex =~ tr/a-f/A-F/;
# XXXXX Should we prepend '0' characters if we don't have 4 digits?
if ($hex !~ /^[\dA-F]{4}$/) {
&bib'gotwarn("Invalid Unicode character: $hex");
return '';
}
if ($hex =~ /00(..)/) {
return pack("C", hex($1));
}
return $bib'cs_ext . $hex;
}
sub canon_to_unicode {
local($can) = @_;
local($hex);
if (length($can) == 1) {
$hex = sprintf("%2lx", ord($can));
$hex =~ tr/a-f /A-F0/;
return( '00' . $hex );
}
if ($can =~ /$bib'cs_ext(....)/) {
$hex = $1;
$hex =~ tr/a-f/A-F/;
return $hex;
}
if ($can eq $bib'cs_char_escape) {
return &bib'canon_to_unicode($bib'cs_escape);
}
return &bib'gotwarn("Can't convert $can to Unicode");
}
sub decimal_to_unicode {
local($num) = @_;
local($hex);
if ($num < 256) {
$hex = sprintf("00%2lx", $num);
} elsif ($num < 65536) {
local($div) = $num / 256;
local($high) = int($div);
local($low) = 256 * ($div - $high);
$hex = sprintf("%2lx%2lx", $high, $low);
} else {
return &bib'gotwarn("Illegal number $num given to decimal_to_unicode");
}
$hex =~ tr/a-f /A-F0/;
$hex;
}
sub unicode_to_decimal {
local($uni) = @_;
return &bib'gotwarn("Illegal unicode length: $uni") unless length($uni) == 4;
return &bib'gotwarn("Illegal unicode string: $uni") if $uni =~ /[^\da-fA-F]/;
hex($uni);
}
sub unicode_name {
local($hex) = @_;
local($name);
# For now, just print hex value
$name = "Unicode '$hex'";
$name;
}
sub meta_name {
local($hex) = @_;
local($name);
# For now, just print hex value
$name = "Meta '$hex'";
$name;
}
# Oh boy, this is getting really complicated.
#
# We have an approx table set up, which says that one can approximate XXXX
# by YYYY, where presumably YYYY is easier. There shouldn't be any loops,
# so programs can recurse through the table.
#
# That's for the meta codes. For the unicode approx, we just have a
# string. This allows multiple character approximations.
#
# XXXXX Think about C3's idea of multiple approximations.
#
# A map of 0000 means that it maps to the null string -- our "approximation"
# is to get rid of it. This is what we can do if it isn't terribly harmful
# to remove it.
sub meta_approx {
local($orig) = @_;
require "${glb_bpprefix}p-cstab.pl" unless defined %bib'mapprox_tab;
if (defined $mapprox_tab{$orig}) {
return '' if $mapprox_tab{$orig} eq '0000';
return "${bib'cs_meta}$mapprox_tab{$orig}";
}
undef;
}
sub unicode_approx {
local($orig) = @_;
# XXXXX Should we warn them that they're getting an approx?
require "${glb_bpprefix}p-cstab.pl" unless defined %bib'uapprox_tab;
$uapprox_tab{$orig};
}
######
#
# Font change checker. Verifies and tries to correct font changes.
#
# After fonts are converted in your tocanon routine, call this. In your
# fromcanon routine, you can assume this has been called.
#
# XXXXX Should we call this in bp.pl's conversion routines? That would
# guarantee that it has been run. Right now, we let each module
# decide when and if it needs to be run.
#
# It takes a string that has font changes in it and makes sure they always
# match up and that there isn't an odd number (more starts than ends, more
# ends than starts).
#
sub font_check {
local($_) = @_;
# XXXXX Ought to read meta information from 00 or as input.
return $_ unless /${bib'cs_meta}01[01]/;
local(@fontstack) = ();
local($fontsmatch, $font, $pfont);
# Check for this special occurance: They don't have end fonts (or don't
# use them). They just make everything a begin font (troff often does this).
# Solution: Try to fix it up by replacing each begin after the first with
# an endprevious / begin pair. Then remove the last begin.
if (!/${bib'cs_meta}011/) {
local($pos) = $[;
local($lpos) = 0;
local($distance) = length($bib'cs_meta) + 3;
local($n) = 0;
while (($pos = index($_, "${bib'cs_meta}010", $pos)) >= $[) {
$n++;
if ($n == 1) {
$lpos = $pos;
$pfont = substr($_, $pos + $distance, 1);
$pos++;
next;
}
$lpos = $pos;
$font = substr($_, $pos + $distance, 1);
substr($_, $pos, 0) = "${bib'cs_meta}0110";
$pos += ($distance*2); # need to skip over the one we just put in.
}
if ($n > 1) {
# now remove the last begin
substr($_, $lpos + $distance + 1, $distance + 1) = '';
} elsif ($n == 1) {
( run in 0.571 second using v1.01-cache-2.11-cpan-8f98c5d2c55 )