NLP-GATE
view release on metacpan or search on metacpan
lib/NLP/GATE/Document.pm view on Meta::CPAN
my $offset = 0;
my $text = "";
for my $el ($doc->findnodes("/GateDocument/TextWithNodes")) {
foreach my $c ($el->childNodes()) {
if($c->nodeType() == 1) { # element node
## get the attribute id
my $nodeid = _getAttr($c,"id");
$nodemap{$nodeid} = $offset;
} elsif($c->nodeType() == 3 || $c->nodeType() == 4) {
## 3: text
## 4: cdata
my $t = $c->textContent();
$offset += length($t);
$text .= $t;
} else {
croak "Invalid node type encountered: ",$c->nodeType(),"\n";
}
}
}
$self->{text} = $text;
## process the annotation features, replacing node ids with offset information
( run in 0.356 second using v1.01-cache-2.11-cpan-454fe037f31 )