Convert-TBX-Basic

 view release on metacpan or  search on metacpan

lib/Convert/TBX/Basic.pm  view on Meta::CPAN

#
# This file is part of Convert-TBX-Basic
#
# This software is copyright (c) 2016 by Alan K. Melby.
#
# This is free software; you can redistribute it and/or modify it under
# the same terms as the Perl 5 programming language system itself.
#
package Convert::TBX::Basic;
use strict;
use warnings;
# ABSTRACT: Convert TBX-Basic data into TBX-Min
our $VERSION = '0.03'; # VERSION
use XML::Twig;
use autodie;
use Path::Tiny;
use Carp;
use Log::Any '$log';
use TBX::Min 0.07;
use Try::Tiny;
use Exporter::Easy (
    OK => ['basic2min']
);
use open ':encoding(utf-8)', ':std'; #this ensures output file is UTF-8

my %status_map = (
    'preferredTerm-admn-sts' => 'preferred',
    'admittedTerm-admn-sts' => 'admitted',
    'deprecatedTerm-admn-sts' => 'notRecommended',
    'supersededTerm-admn-st' => 'obsolete'
);

sub basic2min {
    @_ == 3 or croak 'Usage: basic2min(data, source-language, target-language)';
    my ($data, $source, $target) = @_;

    my $fh = _get_handle($data);

    # build a twig out of the input document
    my $twig = XML::Twig->new(
        output_encoding => 'UTF-8',
        do_not_chain_handlers => 1,
        keep_spaces     => 0,

        # these store new entries, langGroups and termGroups
        start_tag_handlers => {
            termEntry => \&_entry_start,
            langSet => \&_langStart,
            tig => \&_termGrpStart,
        },

        TwigHandlers    => {
        	# header attributes
            title => \&_title,
            sourceDesc => \&_source_desc,
            'titleStmt/note' => \&_title_note,

            # decide whether to add a new entry
            termEntry => \&_entry,

            # becomes part of the current TBX::Min::ConceptEntry object
            'termEntry/descrip[@type="subjectField"]' => sub {
                shift->{tbx_min_min_current_entry}->
                    subject_field($_->text)},

            # these become attributes of the current
            # TBX::Min::TIG object
            'tig/termNote[@type="administrativeStatus"]' => \&_status,
            term => sub {shift->{tbx_min_current_term_grp}->
                term($_->text)},
            'tig/termNote[@type="partOfSpeech"]' => sub {
                shift->{tbx_min_current_term_grp}->
                part_of_speech($_->text)},
            'tig/note' => \&_as_note,
            'tig/admin[@type="customerSubset"]' => sub {
                shift->{tbx_min_current_term_grp}->customer($_->text)},

            # the information which cannot be converted faithfully
            # gets added as a note to the current TBX::Min::TIG,
            # with its data category prepended



( run in 0.663 second using v1.01-cache-2.11-cpan-39bf76dae61 )