Text-Corpus-NewYorkTimes

 view release on metacpan or  search on metacpan

lib/Text/Corpus/NewYorkTimes/Document.pm  view on Meta::CPAN

sub getCategoriesUncontrolled
{
  my $Self = shift;

  # return the categories if previously computed.
  return $Self->{categories_uncontrolled} if exists $Self->{categories_uncontrolled};

  # below is a list of the xpath expressions to get all the hand assigned categories.
  my @xpathExpressions =
  (
  '/nitf/head/docdata/identified-content/classifier[@class="indexing_service" and @type="biographical_categories"]', # 2.2.3 biographic categories
  '/nitf/head/docdata/identified-content/classifier[@class="indexing_service" and @type="descriptor"]', # 2.2.15 descriptors
  '/nitf/head/docdata/identified-content/location[@class="indexing_service"]', # 2.2.22 locations
  '/nitf/head/docdata/identified-content/classifier[@class="indexing_service" and @type="names"]', # 2.2.23 names
  '/nitf/head/docdata/identified-content/org[@class="indexing_service"]', # 2.2.34 organizations
  '/nitf/head/docdata/identified-content/person[@class="indexing_service"]', # 2.2.36 people
  '/nitf/head/docdata/identified-content/object.title[@class="indexing_service"]', # 2.2.45 titles
  );

  # get the categories.
  my @categories;
  foreach my $xpathExpression (@xpathExpressions)
  {
    my @nodeset = $Self->{xpathEngine}->findnodes($xpathExpression);
    foreach my $node (@nodeset)
    {
      push @categories, $node->textContent();

lib/Text/Corpus/NewYorkTimes/Document.pm  view on Meta::CPAN

sub getCategoriesControlled
{
  my $Self = shift;

  # return the categories if previously computed.
  return $Self->{categories_controlled} if exists $Self->{categories_controlled};

  # below is a list of the xpath expressions to get all the hand assigned categories.
  my @xpathExpressions =
  (
  '/nitf/head/docdata/identified-content/classifier[@class="online_producer" and @type="general_descriptor"]', # 2.2.17 general online descriptors
  '/nitf/head/docdata/identified-content/classifier[@class="online_producer" and @type="descriptor"]', # 2.2.26 online descriptors
  '/nitf/head/docdata/identified-content/location[@class="online_producer"]', # 2.2.29 online locations
  '/nitf/head/docdata/identified-content/org[@class="online_producer"]', # 2.2.30 online organizations
  '/nitf/head/docdata/identified-content/person[@class="online_producer"]', # 2.2.31 online people
  '/nitf/head/docdata/identified-content/object.title[@class="online_producer"]', # 2.2.33 online titles
  );

  # get the categories.
  my @categories;
  foreach my $xpathExpression (@xpathExpressions)
  {
    my @nodeset = $Self->{xpathEngine}->findnodes($xpathExpression);
    foreach my $node (@nodeset)
    {
      push @categories, $node->textContent();



( run in 0.587 second using v1.01-cache-2.11-cpan-454fe037f31 )