Text-Distill
view release on metacpan or search on metacpan
lib/Text/Distill.pm view on Meta::CPAN
my $ContentPath = $ContainerNode->getAttributeNode('full-path')->string_value;
if (my $ContentMember = $arch->memberNamed( $ContentPath )) {
my $XMLContent = $ContentMember->contents();
$xpc->unregisterNs('opf');
$xpc->registerNs('opf', 'http://www.idpf.org/2007/opf');
my $Content;
eval { $Content = $xml->parse_string($XMLContent); };
if ($@) {
$! = 11;
Carp::confess("[libxml2 error ". $@->code() ."] ". $@->message());
}
my @ContentNodes = $xpc->findnodes('//opf:package/opf:manifest/opf:item[
@media-type="application/xhtml+xml"
and
starts-with(@id, "content")
]',
$Content
);
my $HTMLTree = HTML::TreeBuilder->new();
foreach my $ContentNode (@ContentNodes) {
my $HTMLContentPath = $ContentNode->getAttributeNode('href')->string_value;
if (my $HTMLContentMember = $arch->memberNamed( $HTMLContentPath )) {
my $HTMLContent = $HTMLContentMember->contents();
$HTMLTree->parse_content($HTMLContent);
} else {
Carp::confess("[Archive::Zip error] $HTMLContentPath not found in ePub ZIP container");
}
}
$Result = DecodeUtf8($HTMLTree->as_text);
} else {
Carp::confess("[Archive::Zip error] $ContentPath not found in ePub ZIP container");
}
} else {
Carp::confess("[Archive::Zip error] $requiredMember not found in ePub ZIP container");
}
} else {
Carp::confess("[Archive::Zip error] $!");
}
return $Result;
}
sub OPCPartAbsoluteNameFromRelative {
my $Name = shift;
my $Dir = shift;
$Dir =~ s:/$::; # remove trailing slash
my $FullName = ( $Name =~ m:^/: ) ? $Name : # $Name has absolute path
"$Dir/$Name"; # $Name has relative path
$FullName = do{
use bytes; # A-Za-z are case insensitive
lc $FullName;
};
# parse all . and .. in name
my @CleanedSegments;
my @OriginalSegments = split m:/:, $FullName;
for my $Part ( @OriginalSegments ) {
if( $Part eq '.' ) {
# just skip
} elsif( $Part eq '..' ) {
pop @CleanedSegments;
} else {
push @CleanedSegments, $Part;
}
}
return join '/', @CleanedSegments;
}
sub ExtractSingleZipFile {
my $FN = shift;
my $Ext = shift;
my $Zip = Archive::Zip->new();
return unless ( $Zip->read( $FN ) == Archive::Zip::AZ_OK );
my @Files = $Zip->members();
return unless (scalar @Files == 1 && $Files[0]->{fileName} =~ /(\.$Ext)$/);
my $TmpDir = File::Temp::tempdir(cleanup=>1);
my $OutFile = $TmpDir.'/check_' . $$ . '_' . $Files[0]->{fileName};
return $Zip->extractMember( $Files[0], $OutFile ) == Archive::Zip::AZ_OK ? $OutFile : undef;
}
=head2 DetectBookFormat($FilePath, $Format)
Function detects format of an e-book and returns it. You
may suggest the format to start with, this wiil speed up the process a bit
(not required).
$Format can be 'fb2.zip', 'fb2', 'doc.zip', 'doc', 'docx.zip',
'docx', 'epub.zip', 'epub', 'txt.zip', 'txt', 'fb3', 'fb3'
=cut
sub DetectBookFormat {
my $File = shift;
my $Format = shift;
if (defined $Format && $Format =~/^($rxFormats)$/) {
$Format = $1;
} else {
$Format = '';
}
#$Format пеÑвÑм пойдеÑ
my @Formats = ($Format || (), grep{ $_ ne $Format } @DetectionOrder);
foreach( @Formats ) {
return $_ if $Detectors->{$_}->($File);
}
return;
}
( run in 0.882 second using v1.01-cache-2.11-cpan-71847e10f99 )