PMLTQ-PML2BASE
view release on metacpan or search on metacpan
lib/PMLTQ/PML2BASE.pm view on Meta::CPAN
package PMLTQ::PML2BASE;
our $AUTHORITY = 'cpan:MATY';
$PMLTQ::PML2BASE::VERSION = '3.0.1';
# ABSTRACT: Convert from PML to SQL
use 5.006;
use strict;
use warnings;
$|=1;
use Data::Dumper;
use open qw(:std :utf8);
use Carp;
# $SIG{__WARN__} = sub { Carp::cluck(@_); };
use constant MAX_NAME_LENGTH => 16;
use constant HYBRID=>1; # create both separate node table and node table with attributes
use constant NO_TREE_TABLE=>2; # don't create one common common node table for all node types
use constant MAX_MIN_ORD=>4; # node tables have #max_ord and #min_ord columns
use constant TOP_TREE_FLAG=>8; # The <root>__#files table has a 'top' column indicating that
# a given root node belongs to the top-level tree list
# of a file (i.e. it is not a nested #NODE within
# some non-#NODE and non-#TREES data structure)
#
# Generic SQL DB scheme for PML data:
#
# - every structure/container/ has a unique idx (Number) and carries attributes as columns
#
# - container has a '#content' column
#
# - cdata/constant/choice attributes are stored in the respective columns
#
# - structure/container members are in a separate table where they have a unique idx,
# referred to by the member column
#
# - unordered-list/alt members are stored in a separate table, whose columns
# are a 1:N idx referred to by the member column, and a LM/AM column
# containing the value (following the rules described here);
#
# - sequence members are in a separate table whose columns correspond
# to the elements, each containing an reference to a table
# representing all occurrences of that element; this table has
# an 1:N idx, '#pos' containing position of the element in the sequence,
# '#elem-pos' containing the number of preceding elements of the same name in the sequence+1,
# and the value, as usual. To retrieve a complete content of a certain sequence as a table,
# one has to use a UNION on all the element tables, ordering by #pos and possibly outputing
# a constant '#name' column
#
# - the table names should be derived from PML type names in a canonical way,
# one per PML schema type decl
#
# Possible modifications:
#
# a) cast all cdata structure members also into separate tables and thus
# keep varchar data in separate tables
#
# b) cast node attributes to a separate table, separating the tree structure
# from node data, making the tree-structure table very thin
# in fact: this is necessary, since nodes can be of different types
#
# c) use cdata format information to determine the table column format
#
# some hacks:
#
# - updating refs in strips .rf suffix if the member is a PMLREF but
# not if it is a list of PMLREFs
#
# - prefix of PMLREFs is stripped down; should be kept and
# used to verify the target based on filename on UPDATE
#
#
use Treex::PML::Schema;
use Treex::PML::Instance;
use PMLTQ::Common;
use PMLTQ::Relation;
use List::Util qw(max first);
use Cwd;
use Carp;
PMLTQ::Relation->load();
my ($file_table,$root_name,$schema,$references_table,%schema,%fh,%orig_name,%seen_ref_schema,
$node_table,$index_id,$last_type_no,$tree_no,$filename,
$idx,$node_idx, $last_tree_no, %pmlref_target_info, $relations, @dump_fns
);
our %opts;
sub init {
$idx = $opts{'init-idx'} || 0;
$node_idx=($opts{'init-node-idx'}|| $opts{'init-idx'} || 0);
$last_type_no=0;
$index_id=0;
$tree_no=0;
$filename=undef;
$relations=[];
@dump_fns=();
%pmlref_target_info=();
$opts{'related-schema'} ||= [];
$opts{other_schemas}||=[
map {
Treex::PML::Schema->new({filename=>$_,use_resources=>1})
} @{$opts{'related-schema'}}
];
$opts{hybrid}||=1;
$opts{prefix}||='';
$opts{'data-dir'}||='';
}
sub destroy {
undef $file_table;
undef $root_name;
undef $schema;
undef $references_table;
undef %schema;
undef %fh;
undef %orig_name;
undef %seen_ref_schema;
( run in 0.883 second using v1.01-cache-2.11-cpan-df04353d9ac )