Convert-MRC

 view release on metacpan or  search on metacpan

lib/Convert/MRC.pm  view on Meta::CPAN

#
# This file is part of Convert-MRC
#
# This software is copyright (c) 2013 by Alan K. Melby.
#
# This is free software; you can redistribute it and/or modify it under
# the same terms as the Perl 5 programming language system itself.
#
# MRC to TBX converter
# written June-Nov 2008 by Nathan E. Rasmussen
# Modified 2013 by Nathan G. Glenn

# Example input data follows:

# TEST DATA HERE

package Convert::MRC;
use strict;
use warnings;
use Data::Dumper;
use Carp;
use English qw(-no_match_vars);

use Log::Message::Simple qw (:STD);

#import global constants used in processing
use Convert::MRC::Variables;

# ABSTRACT: CONVERT MRC TO TBX-BASIC
our $VERSION = '4.03'; # VERSION

use open ':encoding(utf8)', ':std';    # incoming/outgoing data will be UTF-8

our @origARGV = @ARGV;
local @ARGV = (q{-}) unless @ARGV;            # if no filenames given, take std input

#use batch() if called as a script
__PACKAGE__->new->batch(@ARGV) unless caller;

#allows us to get some kind of version string during development, when $VERSION is undefined
#($VERSION is inserted by a dzil plugin at build time)
sub _version {
	## no critic (ProhibitNoStrict)
	no strict 'vars';
	return $VERSION || q{??};
}


sub new {
    my ($class) = @_;
    my $self = bless {}, $class;
    $self->_init;
    return $self;
}

sub _init {
    my ($self) = @_;
    $self->input_fh( \*STDIN );
    $self->tbx_fh( \*STDOUT );
    $self->log_fh( \*STDERR );
	return;
}


sub tbx_fh {
	## no critic (RequireBriefOpen)
    my ( $application, $fh ) = @_;
    if ($fh) {
        if ( ref($fh) eq 'GLOB' ) {
            $application->{tbx_fh} = $fh;
        }
        else {
            open my $fh2, '>', $fh or die "Couldn't open $fh";
            $application->{tbx_fh} = $fh2;
        }
    }
    return $application->{tbx_fh};
}


sub log_fh {
	## no critic (RequireBriefOpen)
    my ( $application, $fh ) = @_;
    if ($fh) {
        if ( ref($fh) eq 'GLOB' ) {
            $application->{log_fh} = $fh;
        }
        else {
            open my $fh2, '>', $fh or die "Couldn't open $fh";
            $application->{log_fh} = $fh2;
        }
    }
    return $application->{log_fh};
}

#same thing as Log::Message::Simple::error, but verbose is always off.
sub _error {
    my ($msg) = @_;
    error $msg, 0;
	return;
}

#prints the given message to the current log file handle.
sub _log {
    my ( $self, $message ) = @_;
    print { $self->{log_fh} } $message;
	return;
}


sub input_fh {
	## no critic (RequireBriefOpen)
    my ( $application, $fh ) = @_;
    if ($fh) {
        if ( ref($fh) eq 'GLOB' ) {
            $application->{input_fh} = $fh;
        }
		#emulate diamond operator
		elsif ($fh eq q{-}){
			$application->{input_fh} = \*STDIN;
		}
        else {
            open my $fh2, '<', $fh or die "Couldn't open $fh";
            $application->{input_fh} = $fh2;
        }
    }
    return $application->{input_fh};
}


sub batch {
    my ( $self, @mrc_files ) = @_;
    ## no critic (ProhibitOneArgSelect)
    for my $mrc (@mrc_files) {

        # find an appropriate name for output and warning files
        my $suffix = _get_suffix($mrc);

        #set output, error and input files
        my $outTBX  = "$mrc$suffix.tbx";
        my $outWarn = "$mrc$suffix.log";

        # print STDERR "See $outTBX and $outWarn for output.\n";
        $self->input_fh($mrc);
        $self->log_fh($outWarn);
        $self->tbx_fh($outTBX);

        #convert the input file, sending output to appropriate file handles
        $self->convert;

        # close these so that they are written.
        close $self->log_fh();
        close $self->tbx_fh();

        # close input too, since it's been exhausted.
        close $self->input_fh();

lib/Convert/MRC.pm  view on Meta::CPAN

        my $err =
"The input MRC is missing a line beginning with =MRCTermTable. You must include such a line to switch on the TBX converter -- all preceding material is ignored.";

        carp $err;
        _error $err;

        $self->_finish_processing($select);
        return;
    }

    #in case the file was header only
    if ( $segment eq 'header' and not $aborted ) {

        #check and print header
        unless ( $self->_printHeader( \%header ) ) {
            _error
"TBX header could not be completed because a required A-row is missing or malformed.";
            $aborted = 1;
        }

        #alert user to lack of content
        _error('The file contained no concepts or parties.');

        #close the opened, and empty, body element
        print "</body>\n";
    }

    if ($aborted) {
        carp "See log -- processing could not be completed.\n";
        $self->_finish_processing($select);
        return;
    }

    print "</text>\n</martif>\n";
    msg( "File includes links to:\n\t" . ( join "\n\t", @linksMade ) )
      if @linksMade;

    msg "File includes IDs:\n\t" . ( join "\n\t", @idsUsed )
      if @idsUsed;

    # TODO: is this necessary? also look for tbx_fh and input_fh
    # next open would close implicitly but not reset $INPUT_LINE_NUMBER
    $self->_finish_processing($select);
    return;
}

sub _finish_processing {
	## no critic (ProhibitOneArgSelect)
    my ( $self, $select ) = @_;

    #clear all processing data
    delete $self->{concept};
    delete $self->{langSet};
    delete $self->{term};
    delete $self->{party};
    delete $self->{unsortedTerm};
    delete $self->{party};
    delete $self->{langSetDefined};

    #print all messages to the object's log
    $self->_log( Log::Message::Simple->stack_as_string() );
    Log::Message::Simple->flush();

    select $select;

    # user's responsibility to close the various filehandles
	return;
}


# do nothing if no term level is open
sub _closeTerm {
    my ($self) = @_;
    if ( defined $self->{term} ) {

        # print STDOUT Dumper $self->{unsortedTerm} ;
        # print STDOUT Dumper $self;
        my $id = ${ $self->{unsortedTerm} }[0]->{'ID'} ||

          #necessary for error reporting; $ID might be undef
          'C' . $self->{concept} . $self->{langSet} . $self->{term};
        my $tig        = $self->_sortRefs( @{ $self->{unsortedTerm} } );
        my $posContext = pop @$tig;
        unless ( $posContext || $self->{langSetDefined} ) {
            _error
"Term $id (see line @{[$INPUT_LINE_NUMBER - 1]}) is lacking an element necessary for TBX-Basic.\n\tTo make it valid for human use only, add one of:\n\t\ta definition (at the language level)\n\t\tan example of use in context (at the term level).\n\tTo...
        }
        $self->_printRow($tig);
        undef $self->{term};
        undef $self->{unsortedTerm};
    }
	return;
}

# nothing if no lang level is open
sub _closeLangSet {
    my ($self) = @_;
    if ( defined $self->{langSet} ) {
        print "</langSet>\n";
        undef $self->{langSet};
        undef $self->{langSetDefined};
    }
	return;
}

# nothing if no concept level is open
sub _closeConcept {
    my ($self) = @_;
    if ( defined $self->{concept} ) {
        print "</termEntry>\n";
        undef $self->{concept};
    }
	return;
}


my $NUM_MONTHS = 12;
sub _parseRow {
    my ( $self, $row_text ) = @_;
    $row_text =~ s/\s*$//; # super-chomp: cut off any trailing whitespace at all
         # later, split will eliminate between-field whitespace
         # and the keyword and langtag parsers will eliminate other space



( run in 2.600 seconds using v1.01-cache-2.11-cpan-5837b0d9d2c )