Convert-TBX-Basic
view release on metacpan or search on metacpan
lib/Convert/TBX/Basic.pm view on Meta::CPAN
#
# This file is part of Convert-TBX-Basic
#
# This software is copyright (c) 2016 by Alan K. Melby.
#
# This is free software; you can redistribute it and/or modify it under
# the same terms as the Perl 5 programming language system itself.
#
package Convert::TBX::Basic;
use strict;
use warnings;
# ABSTRACT: Convert TBX-Basic data into TBX-Min
our $VERSION = '0.03'; # VERSION
use XML::Twig;
use autodie;
use Path::Tiny;
use Carp;
use Log::Any '$log';
use TBX::Min 0.07;
use Try::Tiny;
use Exporter::Easy (
OK => ['basic2min']
);
use open ':encoding(utf-8)', ':std'; #this ensures output file is UTF-8
my %status_map = (
'preferredTerm-admn-sts' => 'preferred',
'admittedTerm-admn-sts' => 'admitted',
'deprecatedTerm-admn-sts' => 'notRecommended',
'supersededTerm-admn-st' => 'obsolete'
);
sub basic2min {
@_ == 3 or croak 'Usage: basic2min(data, source-language, target-language)';
my ($data, $source, $target) = @_;
my $fh = _get_handle($data);
# build a twig out of the input document
my $twig = XML::Twig->new(
output_encoding => 'UTF-8',
do_not_chain_handlers => 1,
keep_spaces => 0,
# these store new entries, langGroups and termGroups
start_tag_handlers => {
termEntry => \&_entry_start,
langSet => \&_langStart,
tig => \&_termGrpStart,
},
TwigHandlers => {
# header attributes
title => \&_title,
sourceDesc => \&_source_desc,
'titleStmt/note' => \&_title_note,
# decide whether to add a new entry
termEntry => \&_entry,
# becomes part of the current TBX::Min::ConceptEntry object
'termEntry/descrip[@type="subjectField"]' => sub {
shift->{tbx_min_min_current_entry}->
subject_field($_->text)},
# these become attributes of the current
# TBX::Min::TIG object
'tig/termNote[@type="administrativeStatus"]' => \&_status,
term => sub {shift->{tbx_min_current_term_grp}->
term($_->text)},
'tig/termNote[@type="partOfSpeech"]' => sub {
shift->{tbx_min_current_term_grp}->
part_of_speech($_->text)},
'tig/note' => \&_as_note,
'tig/admin[@type="customerSubset"]' => sub {
shift->{tbx_min_current_term_grp}->customer($_->text)},
# the information which cannot be converted faithfully
# gets added as a note to the current TBX::Min::TIG,
# with its data category prepended
( run in 0.663 second using v1.01-cache-2.11-cpan-39bf76dae61 )