Treex-PML
view release on metacpan or search on metacpan
lib/Treex/PML/Schema.pm view on Meta::CPAN
=item C<base_url>
base URL for referred schemas (usefull when parsing from a file-handle or a string)
=item C<use_resources>
if this option is used with a true value, the parser will attempt to
locate referred schemas also in L<Treex::PML> resource paths.
=item C<revision>, C<minimal_revision>, C<maximal_revision>
constraints to the revision number of the schema.
=item C<validate>
if this option is used with a true value, the parser will validate the
schema on the fly using a RelaxNG grammar given using the
C<relaxng_schema> parameter; if C<relaxng_schema> is not given, the
file 'pml_schema_inline.rng' searched for in L<Treex::PML> resource paths
is assumed.
=item C<relaxng_schema>
a particular RelaxNG grammar to validate against. The value may be an
URL or filename for the grammar in the RelaxNG XML format, or a
XML::LibXML::RelaxNG object representation. The compact format is not
supported.
=back
=cut
BEGIN{
my %parse_opts = (
KeyAttr => {
"member" => "name",
"attribute" => "name",
"element" => "name",
"type" => "name",
"template" => "name",
"derive" => "name",
"let" => "param",
"param" => "name",
},
TextOnly => {
description => 'content',
revision => 'content',
value => 'content',
delete => 'content',
constant => 'value',
},
Stringify => {
description => 'content',
revision => 'content',
value => 'content',
delete => 'content',
},
Solitary => {
map { $_ => 1 }
qw(description revision root cdata structure container sequence constant list alt choice)
},
Bless => {
member => 'Treex::PML::Schema::Member',
attribute => 'Treex::PML::Schema::Attribute',
element => 'Treex::PML::Schema::Element',
type => 'Treex::PML::Schema::Type',
root => 'Treex::PML::Schema::Root',
structure => 'Treex::PML::Schema::Struct',
container => 'Treex::PML::Schema::Container',
sequence => 'Treex::PML::Schema::Seq',
list => 'Treex::PML::Schema::List',
alt => 'Treex::PML::Schema::Alt',
cdata => 'Treex::PML::Schema::CDATA',
constant => 'Treex::PML::Schema::Constant',
choice => 'Treex::PML::Schema::Choice',
template => 'Treex::PML::Schema::Template',
copy => 'Treex::PML::Schema::Copy',
import => 'Treex::PML::Schema::Import',
derive => 'Treex::PML::Schema::Derive',
'*' => 'Treex::PML::Schema::XMLNode',
},
DefaultNs => PML_SCHEMA_NS,
);
sub new {
my ($class,$opts, $more_opts)=@_;
if (!ref $opts) {
# compatibility with older API
$more_opts ||= {};
$opts = { %$more_opts, string => $opts };
}
my $file = $opts->{filename};
my $base = $opts->{base_url};
if (defined $base and length $base) {
$file = Treex::PML::ResolvePath($base,$file,$opts->{use_resources});
} elsif ($opts->{use_resources}) {
$file = Treex::PML::FindInResources($file);
}
my $schema;
my $revision_opts = {
map { $_ => delete($opts->{$_}) }
qw(revision_error revision minimal_revision maximal_revision)
};
if (defined($file) and ref($schema = $opts->{schemas}{$file})) {
print STDERR "schema $file already hashed\n" if $Treex::PML::Debug;
$schema->check_revision($revision_opts);
return $schema;
}
my $parse_opts = {%parse_opts,%$opts};
$parse_opts->{Bless}{pml_schema}=$class;
$parse_opts->{URL} = (ref $file && $file->isa('Treex::PML::Resource::URI')) ? $file->file : $file;
my $pml_reader = Treex::PML::Schema::Reader->new($parse_opts);
my $reader = $pml_reader->reader;
my $version;
eval {
unless ( $reader->nextElement('pml_schema', PML_SCHEMA_NS)==1 ) {
die "Not a PML schema: $file!\n";
}
$version = $reader->getAttribute('version');
$reader->moveToElement;
$schema = $pml_reader->parse_element();
};
if ($@) {
die "Treex::PML::Schema::Reader error while parsing: $file near line ".$reader->lineNumber."\n$@\n";
return;
}
if (defined $version and length $version) {
unless (cmp_revisions($version,PML_VERSION_SUPPORTED)<=0) {
die "Unsupported version of PML schema '$file': this module supports versions up to ".PML_VERSION_SUPPORTED."\n";
}
lib/Treex/PML/Schema.pm view on Meta::CPAN
}
sub serialize_get_children {
my ($self,$opts)=@_;
my @children = $self->SUPER::serialize_get_children($opts);
return (
(grep { defined($_->[1]) and length($_->[1]) } (
['revision',$self->{revision}],
['description',$self->{description}]
)
),
(grep { $_->[0] eq 'reference' } @children),
(grep { $_->[0] eq 'root' } @children),
(grep { $_->[0] !~ /^(?:root|reference)$/ } @children)
);
}
=item $schema->post_process($options)
Auxiliary method used internally by the PML Schema parser. It
simplifies the schema and for each declaration object creates back
references to its parent declaration and schema and pre-computes the
type attribute path returned by $decl->get_decl_path().
=cut
sub post_process {
my ($schema,$opts)=@_;
$schema->simplify($opts);
$schema->for_each_decl(sub{
my ($decl)=@_;
weaken( $decl->{-schema} = $schema );
my $parent = $decl->{-parent};
my $decl_is = $decl->get_decl_type;
if (
$decl_is == PML_STRUCTURE_DECL ||
$decl_is == PML_CONTAINER_DECL ||
$decl_is == PML_SEQUENCE_DECL ||
$decl_is == PML_LIST_DECL ||
$decl_is == PML_ALT_DECL ||
$decl_is == PML_CHOICE_DECL ||
$decl_is == PML_CONSTANT_DECL ||
$decl_is == PML_CDATA_DECL
) {
my $parent_is = $parent->get_decl_type;
if ($parent_is == PML_TYPE_DECL) {
$decl->{-path} = '!'.$parent->get_name;
} elsif ($parent_is == PML_ROOT_DECL) {
$decl->{-path} = '';
} elsif ($parent_is == PML_ATTRIBUTE_DECL ||
$parent_is == PML_MEMBER_DECL ||
$parent_is == PML_ELEMENT_DECL) {
$decl->{-path} = $parent->{-parent}{-path}.'/'.$parent->get_name;
} elsif ($parent_is == PML_CONTAINER_DECL and $decl_is != PML_ATTRIBUTE_DECL) {
$decl->{-path} = $parent->{-path}.'/#content';
} elsif ($parent_is == PML_LIST_DECL) {
$decl->{-path} = $parent->{-path}.'/LM';
} elsif ($parent_is == PML_ALT_DECL) {
$decl->{-path} = $parent->{-path}.'/AM';
}
if ($decl_is == PML_LIST_DECL and !$decl->{-decl} and $decl->{role} eq '#KNIT') {
# warn ("List $decl->{-path} with role=\"#KNIT\" must have a content type declaration: assuming <cdata format=\"PMLREF\">!\n");
__fix_knit_type($schema,$decl,$decl->{-path}.'/LM');
}
} elsif ($decl_is == PML_MEMBER_DECL) {
if (!$decl->{-decl} and $decl->{role} eq '#KNIT') {
# warn ("Member $decl->{-parent}{-path}/$decl->{-name} with role=\"#KNIT\" must have a content type declaration: assuming <cdata format=\"PMLREF\">!\n");
__fix_knit_type($schema,$decl);
}
}
});
}
sub __fix_knit_type {
my ($schema,$decl,$path)=@_;
$decl->{-decl}='cdata';
my $cdata = $decl->{cdata}= bless {
format => 'PMLREF',
-xml_name => 'cdata',
-attributes => [ 'format' ],
}, 'Treex::PML::Schema::CDATA';
weaken( $cdata->{-schema} = $schema );
weaken( $cdata->{-parent} = $decl );
if (defined $path) {
$cdata->{-path} = $path;
} elsif ($decl->{-parent} and $decl->{-name}) {
$cdata->{-path} = "$decl->{-parent}{-path}/$decl->{-name}";
}
}
sub _traverse_data {
my ($data,$sub,$seen,$hashes_only)=@_;
$seen->{$data}=1;
if (UNIVERSAL::isa($data,'ARRAY')) {
$sub->($data,0) unless $hashes_only;
foreach my $val (@$data) {
if (ref($val) and !exists $seen->{$val}) {
_traverse_data($val,$sub,$seen,$hashes_only);
}
}
} elsif (UNIVERSAL::isa($data,'HASH')) {
$sub->($data,1);
foreach my $val (values %$data) {
if (ref($val) and !exists $seen->{$val}) {
_traverse_data($val,$sub,$seen,$hashes_only);
}
}
}
}
=back
=head1 CLASSES FOR TYPE DECLARATIONS
=over 3
=item L<Treex::PML::Schema::Decl>
=item L<Treex::PML::Schema::Root>
=item L<Treex::PML::Schema::Type>
=item L<Treex::PML::Schema::Struct>
=item L<Treex::PML::Schema::Container>
=item L<Treex::PML::Schema::Seq>
=item L<Treex::PML::Schema::List>
=item L<Treex::PML::Schema::Alt>
=item L<Treex::PML::Schema::Choice>
=item L<Treex::PML::Schema::CDATA>
=item L<Treex::PML::Schema::Constant>
=item L<Treex::PML::Schema::Member>
=item L<Treex::PML::Schema::Element>
=item L<Treex::PML::Schema::Attribute>
=back
( run in 0.663 second using v1.01-cache-2.11-cpan-524268b4103 )