Convert-MRC
view release on metacpan or search on metacpan
lib/Convert/MRC.pm view on Meta::CPAN
#
# This file is part of Convert-MRC
#
# This software is copyright (c) 2013 by Alan K. Melby.
#
# This is free software; you can redistribute it and/or modify it under
# the same terms as the Perl 5 programming language system itself.
#
# MRC to TBX converter
# written June-Nov 2008 by Nathan E. Rasmussen
# Modified 2013 by Nathan G. Glenn
# Example input data follows:
# TEST DATA HERE
package Convert::MRC;
use strict;
use warnings;
use Data::Dumper;
use Carp;
use English qw(-no_match_vars);
use Log::Message::Simple qw (:STD);
#import global constants used in processing
use Convert::MRC::Variables;
# ABSTRACT: CONVERT MRC TO TBX-BASIC
our $VERSION = '4.03'; # VERSION
use open ':encoding(utf8)', ':std'; # incoming/outgoing data will be UTF-8
our @origARGV = @ARGV;
local @ARGV = (q{-}) unless @ARGV; # if no filenames given, take std input
#use batch() if called as a script
__PACKAGE__->new->batch(@ARGV) unless caller;
#allows us to get some kind of version string during development, when $VERSION is undefined
#($VERSION is inserted by a dzil plugin at build time)
sub _version {
## no critic (ProhibitNoStrict)
no strict 'vars';
return $VERSION || q{??};
}
sub new {
my ($class) = @_;
my $self = bless {}, $class;
$self->_init;
return $self;
}
sub _init {
my ($self) = @_;
$self->input_fh( \*STDIN );
$self->tbx_fh( \*STDOUT );
$self->log_fh( \*STDERR );
return;
}
sub tbx_fh {
## no critic (RequireBriefOpen)
my ( $application, $fh ) = @_;
if ($fh) {
if ( ref($fh) eq 'GLOB' ) {
$application->{tbx_fh} = $fh;
}
else {
open my $fh2, '>', $fh or die "Couldn't open $fh";
$application->{tbx_fh} = $fh2;
}
}
return $application->{tbx_fh};
}
sub log_fh {
## no critic (RequireBriefOpen)
my ( $application, $fh ) = @_;
if ($fh) {
if ( ref($fh) eq 'GLOB' ) {
$application->{log_fh} = $fh;
}
else {
open my $fh2, '>', $fh or die "Couldn't open $fh";
$application->{log_fh} = $fh2;
}
}
return $application->{log_fh};
}
#same thing as Log::Message::Simple::error, but verbose is always off.
sub _error {
my ($msg) = @_;
error $msg, 0;
return;
}
#prints the given message to the current log file handle.
sub _log {
my ( $self, $message ) = @_;
print { $self->{log_fh} } $message;
return;
}
sub input_fh {
## no critic (RequireBriefOpen)
my ( $application, $fh ) = @_;
if ($fh) {
if ( ref($fh) eq 'GLOB' ) {
$application->{input_fh} = $fh;
}
#emulate diamond operator
elsif ($fh eq q{-}){
$application->{input_fh} = \*STDIN;
}
else {
open my $fh2, '<', $fh or die "Couldn't open $fh";
$application->{input_fh} = $fh2;
}
}
return $application->{input_fh};
}
sub batch {
my ( $self, @mrc_files ) = @_;
## no critic (ProhibitOneArgSelect)
for my $mrc (@mrc_files) {
# find an appropriate name for output and warning files
my $suffix = _get_suffix($mrc);
#set output, error and input files
my $outTBX = "$mrc$suffix.tbx";
my $outWarn = "$mrc$suffix.log";
# print STDERR "See $outTBX and $outWarn for output.\n";
$self->input_fh($mrc);
$self->log_fh($outWarn);
$self->tbx_fh($outTBX);
#convert the input file, sending output to appropriate file handles
$self->convert;
# close these so that they are written.
close $self->log_fh();
close $self->tbx_fh();
# close input too, since it's been exhausted.
close $self->input_fh();
lib/Convert/MRC.pm view on Meta::CPAN
my $err =
"The input MRC is missing a line beginning with =MRCTermTable. You must include such a line to switch on the TBX converter -- all preceding material is ignored.";
carp $err;
_error $err;
$self->_finish_processing($select);
return;
}
#in case the file was header only
if ( $segment eq 'header' and not $aborted ) {
#check and print header
unless ( $self->_printHeader( \%header ) ) {
_error
"TBX header could not be completed because a required A-row is missing or malformed.";
$aborted = 1;
}
#alert user to lack of content
_error('The file contained no concepts or parties.');
#close the opened, and empty, body element
print "</body>\n";
}
if ($aborted) {
carp "See log -- processing could not be completed.\n";
$self->_finish_processing($select);
return;
}
print "</text>\n</martif>\n";
msg( "File includes links to:\n\t" . ( join "\n\t", @linksMade ) )
if @linksMade;
msg "File includes IDs:\n\t" . ( join "\n\t", @idsUsed )
if @idsUsed;
# TODO: is this necessary? also look for tbx_fh and input_fh
# next open would close implicitly but not reset $INPUT_LINE_NUMBER
$self->_finish_processing($select);
return;
}
sub _finish_processing {
## no critic (ProhibitOneArgSelect)
my ( $self, $select ) = @_;
#clear all processing data
delete $self->{concept};
delete $self->{langSet};
delete $self->{term};
delete $self->{party};
delete $self->{unsortedTerm};
delete $self->{party};
delete $self->{langSetDefined};
#print all messages to the object's log
$self->_log( Log::Message::Simple->stack_as_string() );
Log::Message::Simple->flush();
select $select;
# user's responsibility to close the various filehandles
return;
}
# do nothing if no term level is open
sub _closeTerm {
my ($self) = @_;
if ( defined $self->{term} ) {
# print STDOUT Dumper $self->{unsortedTerm} ;
# print STDOUT Dumper $self;
my $id = ${ $self->{unsortedTerm} }[0]->{'ID'} ||
#necessary for error reporting; $ID might be undef
'C' . $self->{concept} . $self->{langSet} . $self->{term};
my $tig = $self->_sortRefs( @{ $self->{unsortedTerm} } );
my $posContext = pop @$tig;
unless ( $posContext || $self->{langSetDefined} ) {
_error
"Term $id (see line @{[$INPUT_LINE_NUMBER - 1]}) is lacking an element necessary for TBX-Basic.\n\tTo make it valid for human use only, add one of:\n\t\ta definition (at the language level)\n\t\tan example of use in context (at the term level).\n\tTo...
}
$self->_printRow($tig);
undef $self->{term};
undef $self->{unsortedTerm};
}
return;
}
# nothing if no lang level is open
sub _closeLangSet {
my ($self) = @_;
if ( defined $self->{langSet} ) {
print "</langSet>\n";
undef $self->{langSet};
undef $self->{langSetDefined};
}
return;
}
# nothing if no concept level is open
sub _closeConcept {
my ($self) = @_;
if ( defined $self->{concept} ) {
print "</termEntry>\n";
undef $self->{concept};
}
return;
}
my $NUM_MONTHS = 12;
sub _parseRow {
my ( $self, $row_text ) = @_;
$row_text =~ s/\s*$//; # super-chomp: cut off any trailing whitespace at all
# later, split will eliminate between-field whitespace
# and the keyword and langtag parsers will eliminate other space
( run in 2.600 seconds using v1.01-cache-2.11-cpan-5837b0d9d2c )