Alvis-Convert

 view release on metacpan or  search on metacpan

bin/alvisXSL  view on Meta::CPAN

#!/usr/bin/perl

# Reads a list of files, either all gzipped, all bzip2ed or text.
# Runs XSL translation on them using the input XSL script,
# and outputs the result to a file.
#
# Uses xsltproc run using stdin.
# 
#
use strict;
use POSIX;
use Encode;
use IO::Handle;

use Alvis::Utils;

###################### CONFIGURATION #####################

my $XSL = "xsl/alvisLinks.xsl";
my $XSLARGS = "-param CUTOFF 8.5 -stringparam SCORETYPE 'standard'";

#  records per group sent to one XSLTPROC instance
my $MAXSIZE = 10000000;

my $RECORDELEMENT = "documentRecord";
my $GROUPELEMENT = "documentCollection";
#  toss out whatever else was included, and add this
my $GROUPELEMENTEXTRA = " xmlns=\"http://alvis.info/enriched/\" version=\"1.1\"";


############ END CONFIGURATION ######################

#  autoflush
select((select(STDERR), $| = 1)[0]);

# encoding pragmas follow any includes like "use"
use encoding 'utf8';
use open ':utf8';


my $USAGE = "alvisXSL [--gzip|--bzip2|--dir] [--xslargs ARGS] [--xsl XSL-FILE] XML-FILE+\n" 
  . "   Runs xsltproc multiple times on inputs.   To convert into\n"
  . "   into XML, use alvisDecollect as a post-processor.\n" 
  . "   dir = descend into directories, but not recursively\n"
  . "   xsl = $XSL\n"
  . "   xslargs = $XSLARGS\n";

#  command line inputs
my $usegzip = 0;
my $usebzip2 = 0;
my $usedir = 0;

#################################################################
#
#  file feeder
#
#################################################################

my @files = ();
my @dirfiles = ();
my $usingdir = 0;
my $withdir = "";

sub morefiles () {
  if ( $#files>=0 ) {
    return 1;
  }
  if ( $usingdir ) {
    return 1;
  }
  return 0;
}

sub nextfile () {
  my $nf;
  if ( $usingdir ) {
    #print STDERR "Using dir\n";
    while ( ($nf=shift(@dirfiles))  ) {
      if ( -f $nf ) {
	return $nf;
      }
    }
    $usingdir = 0;
    $withdir = "";
    return &nextfile();
  } 
  $nf = shift(@files);
  #print STDERR "Got $nf\n";
  if ( !$nf ) {
    return $nf;
  }
  if ( -d $nf ) {
    #print STDERR "Is dir\n";
    if ( $usedir ) {



( run in 1.159 second using v1.01-cache-2.11-cpan-39bf76dae61 )