Alvis-Convert

 view release on metacpan or  search on metacpan

bin/alvis_wikipedia_add_cats  view on Meta::CPAN

    die("Instantiating Alvis::Wikipedia::WikitextParser failed.\n");
}
my $G=Alvis::Wikipedia::CatGraph->new(method=>$Method,
				      root=>$Root);
if (!defined($G))
{
    die("Instantiating Alvis::Wikipedia::CatGraph failed.\n");
}

print STDERR "Loading the graph....\r";
if (!$G->load_graph($DumpF))
{
    die("Loading the graph dump failed: " . $G->errmsg());
}
print STDERR "\n";

my $List;
if ($ListF)
{
    print STDERR "Getting the list of categories....\r";
    open(L,"<:utf8",$ListF) || die("Unable to open \"$ListF\"");;
    while (my $l=<L>)
    {
	chomp $l;
	push(@$List,$l);
    }
    close(L);
    print STDERR "\n";
}

print STDERR "Building the path length map....\r";
if (!$G->build_path_length_map($List))
{
    die("Building the path length map failed. " . 
	$G->errmsg());
}
print STDERR "\n";

system("mkdir -p $OutDir");
if (!&_add_cats_to_collection($Dir,{alvisSuffix=>$Suffix}))
{
    die("Adding categories to the collection failed. " . $G->errmsg());
}

sub _parse_entries
{
    my $entries=shift;
    my $options=shift;
    my $alvis_entries=shift;
    
    for my $e (@$entries)
    {
	if ($Seen{$e})
	{
	    next;
	}
	
	$Seen{$e}=1;
	if (-d $e)
	{
	    my @entries=glob("$e/*");;
	    &_parse_entries(\@entries,$options,$alvis_entries);
	    next;
	}

	my ($basename,$suffix);
	if ($e=~/^(.*)\.([^\.]+)$/)
	{
	    $basename=$1;
	    $suffix=$2;
	}
	else
	{
	    warn "Skipping non-suffixed non-directory entry \"$e\"." if 
		$Warnings;
	    next;
	}
	
	if ($suffix eq $options->{alvisSuffix})
	{
	    $alvis_entries->{$basename}{alvisF}=$e;
	}
    }
}

sub _add_cats_to_collection
{
    my $root_dir=shift;
    my $options=shift;

    my @entries=glob("$root_dir/*");
    my %alvis_entries=();
    %Seen=();
    print "Parsing the source directory entries...\r";
    &_parse_entries(\@entries,$options,\%alvis_entries);	
    print "                                       \r";

    for my $base_name (keys %alvis_entries)
    {
	my $alvisXML;
	
	if (exists($alvis_entries{$base_name}{alvisF}))
	{
	    my $f=$alvis_entries{$base_name}{alvisF};
	    open(W,"<:utf8",$f) || die("Unable to open \"$f\"");
	    my $out=$f;
	    $out=~s/^.*\///sgo;
	    open(OUT,">:utf8","$OutDir/$out") || 
		die("Unable to open \"$OutDir/$out\"");

	    my $new_rec="";
	    my $N=1;
	    while (my $record=&_get_next_rec(*W))
	    {
		my $cats=&_get_cats($record);
		if (!defined($cats))
		{
		    warn "Getting the categories of record #$N in file " .
			"\"$f\" failed."; 
		    next;
		}

		$Scores=$G->get_relative_scores($cats);
		if (!defined($Scores))
		{
		    warn "Getting the relative scores of record #$N in file " .
			"\"$f\" failed."; 
		    next;
		}
		
		$new_rec=&_output_rec_with_new_scores($record,$Scores);
		if (!defined($new_rec))
		{
		     warn "Getting the new, category-added version " .
			 "of record #$N in file " .
			 "\"$f\" failed."; 
		    next;
		}

		print OUT $new_rec;
		print STDERR "$N\r";
		$N++;
	    }
	    close(W);
	    print STDERR "\n";
	    close(OUT);
	}
    }

    return 1;
}



( run in 1.220 second using v1.01-cache-2.11-cpan-99c4e6809bf )