DTA-CAB

 view release on metacpan or  search on metacpan

CAB/Analyzer.pm  view on Meta::CPAN

}

## PACKAGE::_am_tagh_fst2moota($taghvar='$_')
##  + access-closure macro (EXPR): single moot token analysis from TAGH-style fst analysis
##  + requires: $$taghvar->{hi}; evaluates to:
##    {details=>$taghvar->{hi}, prob=>($$taghvar->{w}||0), tag=>($$taghvar->{hi} =~ /\[\_?([A-Z0-9]+)\]/ ? \$1 : $$taghvar->{hi})}
sub _am_tagh_fst2moota {
  my $taghvar = shift||'$_';
  return ("{details=>$taghvar\->{hi},"
	  ." prob=>($taghvar\->{w}||0),"
	  ." tag=>($taghvar\->{hi} =~ /\\[\\_?((?:[A-Za-z0-9\.]+|\\\$[^\\]]+))\\]/ ? \$1 : $taghvar\->{hi})" ##-- allow e.g. [$(] tags from tokenizer!
	  ."} ##-- _am_tagh_fst2moota\n");
}

## PACKAGE::_am_tagh_list2moota($listvar='@{$_->{morph}}')
##  + access-closure macro (EXPR): moot token analysis-list from TAGH-style fst analysis-list
##  + evaluates to (something like):
##    (map { $${_am_tagh_fst2moota('$_')} } $$listvar)
sub _am_tagh_list2moota {
  my $listvar = shift||'@{$_->{morph}}';
  #return "(map {ref(\$_) ? "._am_tagh_fst2moota('$_')." : {details=>\$_,tag=>\$_,prob=>0}} $listvar) ##-- _am_tagh_list2moota\n";

CAB/Analyzer/Moot.pm  view on Meta::CPAN

    $msent = [map {
      $w  = $_;
      $mw = $w->{$lab} = $w->{$lab} ? {%{$w->{$lab}}} : ($w->{$lab}={}); ##-- copy $w->{moot} if present
      $mw->{text} = (defined($mw->{word}) ? $mw->{word} : (($use_dmoot ? $_->{dmoot} : undef) ? ($use_dmoot ? $_->{dmoot} : undef)->{tag} : ($_->{xlit} ? $_->{xlit}{latin1Text} : $_->{text}) ##== _am_xlit
							  ) ##== _am_tag
		    ) if (!defined($mw->{text}));
      $mw->{text} = lc($mw->{text}) if ($lctext);
      $mw->{analyses} = [{tag=>"NE",details=>"NE.xp",prob=>0}] if ($xpne && ($w->{xp}//"") =~ /\b((?:pers)Name)\b/i); #place
      $mw->{analyses} = [{tag=>$fmtag,details=>"$fmtag.xp",prob=>0}] if ($xpfm && ($w->{xp}//"") =~ /\bforeign\b/i);
      $val = undef;	      ##-- temporary for _am_tagh_moota_uniq()
      $mw->{analyses} = [(map {$val && $val->{details} eq $_->{details} ? qw() : ($val=$_)} sort {($a->{details}//"") cmp ($b->{details}//"") || ($a->{prob}//0) <=> ($b->{prob}//0)} (map {{details=>$_->{hi}, prob=>($_->{w}||0), tag=>($_->{hi} =~ /\[\...
																							  } map {ref($_) ? $_ : {hi=>$_}} map {$_ ? @$_ : qw()}
																						       @$w{qw(mlatin tokpp toka)},
																						       ($use_dmoot && $w->{xlit} && !$w->{xlit}{isLatinExt} ? [$fmtag, "XY"] : qw()),
																						       ($use_dmoot && $w->{dmoot} ? $w->{dmoot}{morph}
																							: ($w->{morph}, ($w->{rw} ? (map {$_->{morph}} @{$w->{rw}}) : qw())))) ##-- _am_tagh_list2moota
			 )	##== _am_tagh_moota_uniq

			] if (!defined($mw->{analyses}));
      foreach (@{$mw->{analyses}}) {
	##-- tag-translation hack: apply BEFORE sending to moot!

CAB/Format/TT.pm  view on Meta::CPAN

  my ($toks,%sa);
  my $sents =
    [
     map {
       %sa=qw();
       $toks=
	 [
	  map {
	    if ($_ =~ /^\%\%(.*)$/) {
	      ##-- generic line: add to '_cmts' attribute of current sentence
	      push(@{$sa{_cmts}},$1) if ($1 !~ /^\$[WS]B\$$/); ##-- generic comment, treated as sentence attribute
	      qw()
	    } elsif ($_ =~ /^$/) {
	      ##-- blank line: ignore
	      qw()
	    } elsif (/^([^\t]*)\t([0-9]+) ([0-9]+)(?:\t(.*))?$/) {
	      ##-- token
	      {text=>$1,
		 #loc=>{off=>$2,len=>$3},
		 ($tloc ? ($tloc=>"$2 $3") : qw()),
		 ($4    ? (toka=>[map {/^\[(.*)\]$/ ? $1 : $_} split(/\t/,$4)]) : qw())



( run in 0.414 second using v1.01-cache-2.11-cpan-131fc08a04b )