Text-Corpus-NewYorkTimes
view release on metacpan or search on metacpan
lib/Text/Corpus/NewYorkTimes/Document.pm view on Meta::CPAN
sub getCategoriesUncontrolled
{
my $Self = shift;
# return the categories if previously computed.
return $Self->{categories_uncontrolled} if exists $Self->{categories_uncontrolled};
# below is a list of the xpath expressions to get all the hand assigned categories.
my @xpathExpressions =
(
'/nitf/head/docdata/identified-content/classifier[@class="indexing_service" and @type="biographical_categories"]', # 2.2.3 biographic categories
'/nitf/head/docdata/identified-content/classifier[@class="indexing_service" and @type="descriptor"]', # 2.2.15 descriptors
'/nitf/head/docdata/identified-content/location[@class="indexing_service"]', # 2.2.22 locations
'/nitf/head/docdata/identified-content/classifier[@class="indexing_service" and @type="names"]', # 2.2.23 names
'/nitf/head/docdata/identified-content/org[@class="indexing_service"]', # 2.2.34 organizations
'/nitf/head/docdata/identified-content/person[@class="indexing_service"]', # 2.2.36 people
'/nitf/head/docdata/identified-content/object.title[@class="indexing_service"]', # 2.2.45 titles
);
# get the categories.
my @categories;
foreach my $xpathExpression (@xpathExpressions)
{
my @nodeset = $Self->{xpathEngine}->findnodes($xpathExpression);
foreach my $node (@nodeset)
{
push @categories, $node->textContent();
lib/Text/Corpus/NewYorkTimes/Document.pm view on Meta::CPAN
sub getCategoriesControlled
{
my $Self = shift;
# return the categories if previously computed.
return $Self->{categories_controlled} if exists $Self->{categories_controlled};
# below is a list of the xpath expressions to get all the hand assigned categories.
my @xpathExpressions =
(
'/nitf/head/docdata/identified-content/classifier[@class="online_producer" and @type="general_descriptor"]', # 2.2.17 general online descriptors
'/nitf/head/docdata/identified-content/classifier[@class="online_producer" and @type="descriptor"]', # 2.2.26 online descriptors
'/nitf/head/docdata/identified-content/location[@class="online_producer"]', # 2.2.29 online locations
'/nitf/head/docdata/identified-content/org[@class="online_producer"]', # 2.2.30 online organizations
'/nitf/head/docdata/identified-content/person[@class="online_producer"]', # 2.2.31 online people
'/nitf/head/docdata/identified-content/object.title[@class="online_producer"]', # 2.2.33 online titles
);
# get the categories.
my @categories;
foreach my $xpathExpression (@xpathExpressions)
{
my @nodeset = $Self->{xpathEngine}->findnodes($xpathExpression);
foreach my $node (@nodeset)
{
push @categories, $node->textContent();
( run in 0.262 second using v1.01-cache-2.11-cpan-454fe037f31 )