DTA-CAB
view release on metacpan or search on metacpan
CAB/Analyzer.pm view on Meta::CPAN
}
## PACKAGE::_am_tagh_fst2moota($taghvar='$_')
## + access-closure macro (EXPR): single moot token analysis from TAGH-style fst analysis
## + requires: $$taghvar->{hi}; evaluates to:
## {details=>$taghvar->{hi}, prob=>($$taghvar->{w}||0), tag=>($$taghvar->{hi} =~ /\[\_?([A-Z0-9]+)\]/ ? \$1 : $$taghvar->{hi})}
sub _am_tagh_fst2moota {
my $taghvar = shift||'$_';
return ("{details=>$taghvar\->{hi},"
." prob=>($taghvar\->{w}||0),"
." tag=>($taghvar\->{hi} =~ /\\[\\_?((?:[A-Za-z0-9\.]+|\\\$[^\\]]+))\\]/ ? \$1 : $taghvar\->{hi})" ##-- allow e.g. [$(] tags from tokenizer!
."} ##-- _am_tagh_fst2moota\n");
}
## PACKAGE::_am_tagh_list2moota($listvar='@{$_->{morph}}')
## + access-closure macro (EXPR): moot token analysis-list from TAGH-style fst analysis-list
## + evaluates to (something like):
## (map { $${_am_tagh_fst2moota('$_')} } $$listvar)
sub _am_tagh_list2moota {
my $listvar = shift||'@{$_->{morph}}';
#return "(map {ref(\$_) ? "._am_tagh_fst2moota('$_')." : {details=>\$_,tag=>\$_,prob=>0}} $listvar) ##-- _am_tagh_list2moota\n";
CAB/Analyzer/Moot.pm view on Meta::CPAN
$msent = [map {
$w = $_;
$mw = $w->{$lab} = $w->{$lab} ? {%{$w->{$lab}}} : ($w->{$lab}={}); ##-- copy $w->{moot} if present
$mw->{text} = (defined($mw->{word}) ? $mw->{word} : (($use_dmoot ? $_->{dmoot} : undef) ? ($use_dmoot ? $_->{dmoot} : undef)->{tag} : ($_->{xlit} ? $_->{xlit}{latin1Text} : $_->{text}) ##== _am_xlit
) ##== _am_tag
) if (!defined($mw->{text}));
$mw->{text} = lc($mw->{text}) if ($lctext);
$mw->{analyses} = [{tag=>"NE",details=>"NE.xp",prob=>0}] if ($xpne && ($w->{xp}//"") =~ /\b((?:pers)Name)\b/i); #place
$mw->{analyses} = [{tag=>$fmtag,details=>"$fmtag.xp",prob=>0}] if ($xpfm && ($w->{xp}//"") =~ /\bforeign\b/i);
$val = undef; ##-- temporary for _am_tagh_moota_uniq()
$mw->{analyses} = [(map {$val && $val->{details} eq $_->{details} ? qw() : ($val=$_)} sort {($a->{details}//"") cmp ($b->{details}//"") || ($a->{prob}//0) <=> ($b->{prob}//0)} (map {{details=>$_->{hi}, prob=>($_->{w}||0), tag=>($_->{hi} =~ /\[\...
} map {ref($_) ? $_ : {hi=>$_}} map {$_ ? @$_ : qw()}
@$w{qw(mlatin tokpp toka)},
($use_dmoot && $w->{xlit} && !$w->{xlit}{isLatinExt} ? [$fmtag, "XY"] : qw()),
($use_dmoot && $w->{dmoot} ? $w->{dmoot}{morph}
: ($w->{morph}, ($w->{rw} ? (map {$_->{morph}} @{$w->{rw}}) : qw())))) ##-- _am_tagh_list2moota
) ##== _am_tagh_moota_uniq
] if (!defined($mw->{analyses}));
foreach (@{$mw->{analyses}}) {
##-- tag-translation hack: apply BEFORE sending to moot!
CAB/Format/TT.pm view on Meta::CPAN
my ($toks,%sa);
my $sents =
[
map {
%sa=qw();
$toks=
[
map {
if ($_ =~ /^\%\%(.*)$/) {
##-- generic line: add to '_cmts' attribute of current sentence
push(@{$sa{_cmts}},$1) if ($1 !~ /^\$[WS]B\$$/); ##-- generic comment, treated as sentence attribute
qw()
} elsif ($_ =~ /^$/) {
##-- blank line: ignore
qw()
} elsif (/^([^\t]*)\t([0-9]+) ([0-9]+)(?:\t(.*))?$/) {
##-- token
{text=>$1,
#loc=>{off=>$2,len=>$3},
($tloc ? ($tloc=>"$2 $3") : qw()),
($4 ? (toka=>[map {/^\[(.*)\]$/ ? $1 : $_} split(/\t/,$4)]) : qw())
( run in 0.249 second using v1.01-cache-2.11-cpan-cc502c75498 )