Alvis-Convert
view release on metacpan or search on metacpan
bin/alvisXSL view on Meta::CPAN
#!/usr/bin/perl
# Reads a list of files, either all gzipped, all bzip2ed or text.
# Runs XSL translation on them using the input XSL script,
# and outputs the result to a file.
#
# Uses xsltproc run using stdin.
#
#
use strict;
use POSIX;
use Encode;
use IO::Handle;
use Alvis::Utils;
###################### CONFIGURATION #####################
my $XSL = "xsl/alvisLinks.xsl";
my $XSLARGS = "-param CUTOFF 8.5 -stringparam SCORETYPE 'standard'";
# records per group sent to one XSLTPROC instance
my $MAXSIZE = 10000000;
my $RECORDELEMENT = "documentRecord";
my $GROUPELEMENT = "documentCollection";
# toss out whatever else was included, and add this
my $GROUPELEMENTEXTRA = " xmlns=\"http://alvis.info/enriched/\" version=\"1.1\"";
############ END CONFIGURATION ######################
# autoflush
select((select(STDERR), $| = 1)[0]);
# encoding pragmas follow any includes like "use"
use encoding 'utf8';
use open ':utf8';
my $USAGE = "alvisXSL [--gzip|--bzip2|--dir] [--xslargs ARGS] [--xsl XSL-FILE] XML-FILE+\n"
. " Runs xsltproc multiple times on inputs. To convert into\n"
. " into XML, use alvisDecollect as a post-processor.\n"
. " dir = descend into directories, but not recursively\n"
. " xsl = $XSL\n"
. " xslargs = $XSLARGS\n";
# command line inputs
my $usegzip = 0;
my $usebzip2 = 0;
my $usedir = 0;
#################################################################
#
# file feeder
#
#################################################################
my @files = ();
my @dirfiles = ();
my $usingdir = 0;
my $withdir = "";
sub morefiles () {
if ( $#files>=0 ) {
return 1;
}
if ( $usingdir ) {
return 1;
}
return 0;
}
sub nextfile () {
my $nf;
if ( $usingdir ) {
#print STDERR "Using dir\n";
while ( ($nf=shift(@dirfiles)) ) {
if ( -f $nf ) {
return $nf;
}
}
$usingdir = 0;
$withdir = "";
return &nextfile();
}
$nf = shift(@files);
#print STDERR "Got $nf\n";
if ( !$nf ) {
return $nf;
}
if ( -d $nf ) {
#print STDERR "Is dir\n";
if ( $usedir ) {
( run in 1.159 second using v1.01-cache-2.11-cpan-39bf76dae61 )