Alvis-Convert

 view release on metacpan or  search on metacpan

bin/html2alvis  view on Meta::CPAN

my $ODir='.';  
my $NPerOurDir=1000;
my $MetaEncoding='iso-8859-1';
my $HTMLEncoding=undef;
my $HTMLEncodingFromMeta=0;
my $IncOrigDoc=1;

GetOptions('help|?'=>\$PrintHelp, 
	   'man'=>\$PrintManual,
	   'warnings!'=>\$Warnings,
	   'html-ext=s'=>\$HTMLSuffix,
	   'meta-ext=s'=>\$MetaSuffix,
	   'out-dir=s'=>\$ODir,
	   'N-per-out-dir=s'=>\$NPerOurDir,
	   'meta-encoding=s'=>\$MetaEncoding,
	   'html-encoding=s'=>\$HTMLEncoding,
	   'html-encoding-from-meta!'=>\$HTMLEncodingFromMeta,
	   'original!'=>\$IncOrigDoc) or 
    pod2usage(2);
pod2usage(1) if $PrintHelp;
pod2usage(-exitstatus => 0, -verbose => 2) if $PrintManual;
pod2usage(1) if (@ARGV!=1);

my $SDir=shift @ARGV;

$|=1;

my $C=Alvis::Convert->new(outputRootDir=>$ODir,
			  outputNPerSubdir=>$NPerOurDir,
			  outputAtSameLocation=>0,
			  metaEncoding=>$MetaEncoding,
			  sourceEncoding=>$HTMLEncoding,
			  includeOriginalDocument=>$IncOrigDoc,
                          sourceEncodingFromMeta=>$HTMLEncodingFromMeta);

my %Seen;
$C->init_output();
if (!&_convert_collection($SDir,{htmlSuffix=>$HTMLSuffix,
				 metaSuffix=>$MetaSuffix}))
{
    die("Conversion failed. " . $C->errmsg());
}


sub _parse_entries
{
    my $entries=shift;
    my $options=shift;
    my $html_entries=shift;
    
    for my $e (@$entries)
    {
	if ($Seen{$e})
	{
	    next;
	}
	
	$Seen{$e}=1;
	if (-d $e)
	{
	    my @entries=glob("$e/*");;
	    &_parse_entries(\@entries,$options,$html_entries);
	    next;
	}

	my ($basename,$suffix);
	if ($e=~/^(.*)\.([^\.]+)$/)
	{
	    $basename=$1;
	    $suffix=$2;
	}
	else
	{
	    warn "Skipping non-suffixed non-directory entry \"$e\"." if 
		$Warnings;
	    next;
	}
	
	if ($suffix eq $options->{metaSuffix})
	{
	    $html_entries->{$basename}{metaF}=$e;
	}
	elsif ($suffix eq $options->{htmlSuffix})
	{
	    $html_entries->{$basename}{htmlF}=$e;
	}
    }
}

sub _convert_collection
{
    my $root_dir=shift;
    my $options=shift;

    my @entries=glob("$root_dir/*");
    my %html_entries=();
    %Seen=();
    print "Parsing the source directory entries...\r";
    &_parse_entries(\@entries,$options,\%html_entries);	
    print "                                       \r";

    for my $base_name (keys %html_entries)
    {
	my ($meta_txt,$html_txt);

	if (exists($html_entries{$base_name}{metaF}))
	{
	    $meta_txt=$C->read_meta($html_entries{$base_name}{metaF});
	    if (!defined($meta_txt))
	    {
		warn "Reading meta file " .
		    "\"$html_entries{$base_name}{metaF}\" failed. " .
		    $C->errmsg() if 
		    $Warnings;
		$C->clearerr();
		next;
	    }
	}
	else # no meta file
	{
	    warn "No Meta file for basename \"$base_name\"." if 
		$Warnings;
	    next;
	}

	my $alvisXML;
	
	if (exists($html_entries{$base_name}{htmlF}))
	{
	    $html_txt=$C->read_HTML($html_entries{$base_name}{htmlF},
                                    $meta_txt);
	    if (!defined($html_txt))
	    {
		warn "Reading the HTML for basename \"$base_name\" failed. " .
		    $C->errmsg() if 
		    $Warnings;
		$C->clearerr();
		next;
	    }
	}
	else
	{
	     warn "No HTML file for basename \"$base_name\"." if 
		$Warnings;
	     next;
	}

	$alvisXML=$C->HTML($html_txt,$meta_txt);
	if (!defined($alvisXML))
	{
	    warn "Obtaining the Alvis version of the " .
		"\"$base_name\"'s HTML file failed. " . $C->errmsg() if 
		$Warnings;
	    $C->clearerr();
	    next;



( run in 0.437 second using v1.01-cache-2.11-cpan-39bf76dae61 )