Treex-PML

 view release on metacpan or  search on metacpan

lib/Treex/PML/Schema.pm  view on Meta::CPAN


=item C<base_url>

base URL for referred schemas (usefull when parsing from a file-handle or a string)

=item C<use_resources>

if this option is used with a true value, the parser will attempt to
locate referred schemas also in L<Treex::PML> resource paths.

=item C<revision>, C<minimal_revision>, C<maximal_revision>

constraints to the revision number of the schema.

=item C<validate>

if this option is used with a true value, the parser will validate the
schema on the fly using a RelaxNG grammar given using the
C<relaxng_schema> parameter; if C<relaxng_schema> is not given, the
file 'pml_schema_inline.rng' searched for in L<Treex::PML> resource paths
is assumed.

=item C<relaxng_schema>

a particular RelaxNG grammar to validate against. The value may be an
URL or filename for the grammar in the RelaxNG XML format, or a
XML::LibXML::RelaxNG object representation. The compact format is not
supported.

=back

=cut

BEGIN{
  my %parse_opts = (
  KeyAttr => {
    "member"    => "name",
    "attribute" => "name",
    "element"   => "name",
    "type"      => "name",
    "template"  => "name",
    "derive"    => "name",
    "let"       => "param",
    "param"     => "name",
  },
  TextOnly => {
    description => 'content',
    revision    => 'content',
    value       => 'content',
    delete      => 'content',
    constant    => 'value',
  },
  Stringify => {
    description => 'content',
    revision    => 'content',
    value       => 'content',
    delete      => 'content',
  },
  Solitary => {
    map { $_ => 1 }
      qw(description revision root cdata structure container sequence constant list alt choice)
     },
  Bless => {
    member =>  'Treex::PML::Schema::Member',
    attribute =>  'Treex::PML::Schema::Attribute',
    element =>  'Treex::PML::Schema::Element',
    type =>  'Treex::PML::Schema::Type',
    root =>  'Treex::PML::Schema::Root',
    structure =>  'Treex::PML::Schema::Struct',
    container =>  'Treex::PML::Schema::Container',
    sequence =>  'Treex::PML::Schema::Seq',
    list =>  'Treex::PML::Schema::List',
    alt =>  'Treex::PML::Schema::Alt',
    cdata =>  'Treex::PML::Schema::CDATA',
    constant =>  'Treex::PML::Schema::Constant',
    choice =>  'Treex::PML::Schema::Choice',
    template => 'Treex::PML::Schema::Template',
    copy => 'Treex::PML::Schema::Copy',
    import => 'Treex::PML::Schema::Import',
    derive => 'Treex::PML::Schema::Derive',
    '*' => 'Treex::PML::Schema::XMLNode',
  },
  DefaultNs => PML_SCHEMA_NS,
);

sub new {
  my ($class,$opts, $more_opts)=@_;
  if (!ref $opts) {
    # compatibility with older API
    $more_opts ||= {};
    $opts = { %$more_opts, string => $opts };
  }

  my $file = $opts->{filename};

  my $base = $opts->{base_url};
  if (defined $base and length $base) {
    $file = Treex::PML::ResolvePath($base,$file,$opts->{use_resources});
  } elsif ($opts->{use_resources}) {
    $file = Treex::PML::FindInResources($file);
  }
  my $schema;
  my $revision_opts = {
    map { $_ => delete($opts->{$_}) }
      qw(revision_error revision minimal_revision maximal_revision)
  };
  if (defined($file) and ref($schema = $opts->{schemas}{$file})) {
    print STDERR "schema $file already hashed\n" if $Treex::PML::Debug;
    $schema->check_revision($revision_opts);
    return $schema;
  }
  my $parse_opts = {%parse_opts,%$opts};
  $parse_opts->{Bless}{pml_schema}=$class;
  $parse_opts->{URL} = (ref $file && $file->isa('Treex::PML::Resource::URI')) ? $file->file : $file;

  my $pml_reader = Treex::PML::Schema::Reader->new($parse_opts);
  my $reader = $pml_reader->reader;
  my $version;
  eval {
    unless (  $reader->nextElement('pml_schema', PML_SCHEMA_NS)==1 ) {
      die "Not a PML schema: $file!\n";
    }
    $version = $reader->getAttribute('version');
    $reader->moveToElement;
    $schema = $pml_reader->parse_element();
  };
  if ($@) {
    die "Treex::PML::Schema::Reader error while parsing: $file near line ".$reader->lineNumber."\n$@\n";
    return;
  }
  if (defined $version and length $version) {
    unless (cmp_revisions($version,PML_VERSION_SUPPORTED)<=0) {
      die "Unsupported version of PML schema '$file': this module supports versions up to ".PML_VERSION_SUPPORTED."\n";
    }

lib/Treex/PML/Schema.pm  view on Meta::CPAN

}
sub serialize_get_children {
  my ($self,$opts)=@_;
  my @children = $self->SUPER::serialize_get_children($opts);
  return (
    (grep { defined($_->[1]) and length($_->[1]) } (
      ['revision',$self->{revision}],
      ['description',$self->{description}]
     )
    ),
    (grep { $_->[0] eq 'reference' } @children),
    (grep { $_->[0] eq 'root' } @children),
    (grep { $_->[0] !~ /^(?:root|reference)$/ } @children)
   );
}

=item $schema->post_process($options)

Auxiliary method used internally by the PML Schema parser. It
simplifies the schema and for each declaration object creates back
references to its parent declaration and schema and pre-computes the
type attribute path returned by $decl->get_decl_path().

=cut

sub post_process {
  my ($schema,$opts)=@_;
  $schema->simplify($opts);
  $schema->for_each_decl(sub{
    my ($decl)=@_;
    weaken( $decl->{-schema} = $schema );
    my $parent = $decl->{-parent};
    my $decl_is = $decl->get_decl_type;
    if (
      $decl_is == PML_STRUCTURE_DECL ||
      $decl_is == PML_CONTAINER_DECL ||
      $decl_is == PML_SEQUENCE_DECL ||
      $decl_is == PML_LIST_DECL ||
      $decl_is == PML_ALT_DECL ||
      $decl_is == PML_CHOICE_DECL ||
      $decl_is == PML_CONSTANT_DECL ||
      $decl_is == PML_CDATA_DECL
     ) {
      my $parent_is = $parent->get_decl_type;
      if ($parent_is == PML_TYPE_DECL) {
        $decl->{-path} = '!'.$parent->get_name;
      } elsif ($parent_is == PML_ROOT_DECL) {
        $decl->{-path} = '';
      } elsif ($parent_is == PML_ATTRIBUTE_DECL ||
               $parent_is == PML_MEMBER_DECL    ||
               $parent_is == PML_ELEMENT_DECL) {
        $decl->{-path} = $parent->{-parent}{-path}.'/'.$parent->get_name;
      } elsif ($parent_is == PML_CONTAINER_DECL and $decl_is != PML_ATTRIBUTE_DECL) {
        $decl->{-path} = $parent->{-path}.'/#content';
      } elsif ($parent_is == PML_LIST_DECL) {
        $decl->{-path} = $parent->{-path}.'/LM';
      } elsif ($parent_is == PML_ALT_DECL) {
        $decl->{-path} = $parent->{-path}.'/AM';
      }
      if ($decl_is == PML_LIST_DECL and !$decl->{-decl} and $decl->{role} eq '#KNIT') {
        # warn ("List $decl->{-path} with role=\"#KNIT\" must have a content type declaration: assuming <cdata format=\"PMLREF\">!\n");
        __fix_knit_type($schema,$decl,$decl->{-path}.'/LM');
      }
    } elsif ($decl_is == PML_MEMBER_DECL) {
      if (!$decl->{-decl} and $decl->{role} eq '#KNIT') {
        # warn ("Member  $decl->{-parent}{-path}/$decl->{-name} with role=\"#KNIT\" must have a content type declaration: assuming <cdata format=\"PMLREF\">!\n");
        __fix_knit_type($schema,$decl);
      }
    }
  });
}

sub __fix_knit_type {
  my ($schema,$decl,$path)=@_;
  $decl->{-decl}='cdata';
  my $cdata = $decl->{cdata}= bless {
    format => 'PMLREF',
    -xml_name => 'cdata',
    -attributes => [ 'format' ],
  }, 'Treex::PML::Schema::CDATA';
  weaken( $cdata->{-schema} = $schema );
  weaken( $cdata->{-parent} = $decl );
  if (defined $path) {
    $cdata->{-path} = $path;
  } elsif ($decl->{-parent} and $decl->{-name}) {
    $cdata->{-path} = "$decl->{-parent}{-path}/$decl->{-name}";
  }
}

sub _traverse_data {
  my ($data,$sub,$seen,$hashes_only)=@_;
  $seen->{$data}=1;
  if (UNIVERSAL::isa($data,'ARRAY')) {
    $sub->($data,0) unless $hashes_only;
    foreach my $val (@$data) {
      if (ref($val) and !exists $seen->{$val}) {
        _traverse_data($val,$sub,$seen,$hashes_only);
      }
    }
  } elsif (UNIVERSAL::isa($data,'HASH')) {
    $sub->($data,1);
    foreach my $val (values %$data) {
      if (ref($val) and !exists $seen->{$val}) {
        _traverse_data($val,$sub,$seen,$hashes_only);
      }
    }
  }
}



=back

=head1 CLASSES FOR TYPE DECLARATIONS

=over 3

=item L<Treex::PML::Schema::Decl>

=item L<Treex::PML::Schema::Root>

=item L<Treex::PML::Schema::Type>

=item L<Treex::PML::Schema::Struct>

=item L<Treex::PML::Schema::Container>

=item L<Treex::PML::Schema::Seq>

=item L<Treex::PML::Schema::List>

=item L<Treex::PML::Schema::Alt>

=item L<Treex::PML::Schema::Choice>

=item L<Treex::PML::Schema::CDATA>

=item L<Treex::PML::Schema::Constant>

=item L<Treex::PML::Schema::Member>

=item L<Treex::PML::Schema::Element>

=item L<Treex::PML::Schema::Attribute>

=back



( run in 0.663 second using v1.01-cache-2.11-cpan-524268b4103 )