File-Information
view release on metacpan or search on metacpan
lib/File/Information/Deep.pm view on Meta::CPAN
# Copyright (c) 2025 Philipp Schafft <lion@cpan.org>
# licensed under Artistic License 2.0 (see LICENSE file)
# ABSTRACT: generic module for extracting information from filesystems
package File::Information::Deep;
use v5.20;
use strict;
use warnings;
use parent 'File::Information::Base';
use Carp;
use Scalar::Util qw(weaken);
use Fcntl qw(SEEK_SET);
our $VERSION = v0.16;
my %_PNG_colour_types = ( # namespace: 4c11d438-f6f3-417f-85e3-e56e46851dae
0 => {ise => 'a3934b85-5bec-5cd7-a571-727e4cecfcb1', displayname => 'Greyscale'},
2 => {ise => '56262598-1d35-566d-b9a3-0e752d58b8ce', displayname => 'Truecolor'},
3 => {ise => '67f61b65-4978-510b-b318-247da7934837', displayname => 'Indexed-color'},
4 => {ise => 'cbdafa4e-1cb8-59a9-b6ec-b7a1bef3fcd4', displayname => 'Greyscale with alpha'},
6 => {ise => 'c6ef9ba0-3b7f-5248-a4f4-18e39c14d7b3', displayname => 'Truecolor with alpha'},
);
my %_PNG_filter_method = ( # namespace: 06f15860-8191-41f5-881c-a465be563089
0 => {ise => 'b7a197cb-2eee-517f-ae57-8e299d1a92e9', displayname => 'None'},
1 => {ise => 'c194fcce-c957-5436-861f-09af8526fed8', displayname => 'Sub'},
2 => {ise => 'fe14ef2d-4098-5a7e-81a0-88beae0e1e65', displayname => 'Up'},
3 => {ise => 'b0df25b4-b1fb-52cc-a6be-162440bd9628', displayname => 'Average'},
4 => {ise => '974cf00a-c2e2-5d08-b1da-08169e09b173', displayname => 'Paeth'},
);
my %_PNG_compression_method = ( # namespace: b2b8b4bf-3b0f-4037-9bbc-96e6b53ae73d
0 => {ise => 'f47c8ff3-5218-555d-bf89-ba30706c29e1', displayname => 'deflate'},
);
my %_vmv0_section_types = (
1 => {ise => 'bc0dc85a-8c72-5ab6-a60b-377fdf76f678', displayname => 'init'},
2 => {ise => '18b7bfe0-5e3a-5fe4-ad69-a317e6b2445c', displayname => 'header'},
3 => {ise => '5460c878-23d6-56b9-8600-9375d76fefc5', displayname => 'rodata'},
4 => {ise => '0520d8d6-3a85-56d2-ae2b-77c517cff2ce', displayname => 'text'},
5 => {ise => '95f7f330-a72d-5e0b-ab0f-d46f37edbc9a', displayname => 'trailer'},
6 => {ise => '9bbc79eb-5a31-5797-8a05-56e58c530289', displayname => 'resources'},
);
# Extra tags that do not belong into one of the other lists.
my %_wk = (
'.section' => {ise => 'dad2de0d-9711-5b57-9a31-562122d756ba', displayname => '.section'},
'.chunk' => {ise => 'bff479fa-a818-58dc-b5df-539852fa8b80', displayname => '.chunk'},
);
my %_properties = (
pdf_version => {loader => \&_load_pdf},
pdf_pages => {loader => \&_load_pdf},
odf_keywords => {loader => \&_load_odf},
data_uriid_barcodes => {loader => \&_load_barcodes, rawtype => 'Data::URIID::Barcode'},
vmv0_filesize => {loader => \&_load_vmv0},
vmv0_section_pointer => {loader => \&_load_vmv0},
vmv0_section => {loader => \&_load_vmv0, rawtype => 'File::Information::Chunk'},
vmv0_minimum_handles => {loader => \&_load_vmv0},
vmv0_minimum_ram => {loader => \&_load_vmv0},
vmv0_boundary_text => {loader => \&_load_vmv0},
vmv0_boundary_load => {loader => \&_load_vmv0},
png_ihdr_width => {loader => \&_load_png},
png_ihdr_height => {loader => \&_load_png},
png_ihdr_bit_depth => {loader => \&_load_png},
png_ihdr_color_type => {loader => \&_load_png},
png_ihdr_compression_method => {loader => \&_load_png},
png_ihdr_filter_method => {loader => \&_load_png},
png_ihdr_interlace_method => {loader => \&_load_png},
gif_screen_width => {loader => \&_load_gif},
gif_screen_height => {loader => \&_load_gif},
lib/File/Information/Deep.pm view on Meta::CPAN
}
foreach my $keyword (qw(CreationDate ModDate)) {
$_properties{'pdf_info_'.lc($keyword)}{parsing} = 'pdf_date';
}
foreach my $key (@_odf_medadata_keys) {
$_properties{'odf_info_'.$key} = {loader => \&_load_odf};
}
foreach my $key (qw(creation_date date)) {
$_properties{'odf_info_'.$key}{parsing} = 'iso8601';
}
foreach my $key (@_image_info_keys) {
$_properties{'image_info_'.lc($key)} = {loader => \&_load_image_info};
}
foreach my $key (@_image_extra_keys) {
$_properties{'image_info_extra_'.lc($key =~ s/::/_/r)} = {loader => \&_load_image_info};
}
$_properties{image_info_extra_thumb_mtime}{rawtype} = 'unixts';
$_properties{image_info_extra_thumb_uri}{rawtype} = 'uri';
# Register well known:
foreach my $value (
values(%_PNG_colour_types),
values(%_PNG_filter_method),
values(%_PNG_compression_method),
values(%_vmv0_section_types),
values(%_wk),
) {
Data::Identifier->new(ise => $value->{ise}, displayname => $value->{displayname})->register;
}
#@returns File::Information::Base
sub parent {
my ($self) = @_;
return $self->{parent};
}
# ----------------
sub property_info {
my ($self, @args) = @_;
unless (defined $self->{_dynamic}) {
$self->{_dynamic} = 1;
foreach my $cb (@_dynamic_loaders) {
$self->$cb('__dummy__');
}
}
return $self->SUPER::property_info(@args);
}
sub _new {
my ($pkg, %opts) = @_;
my $self = $pkg->SUPER::_new(%opts, properties => \%_properties);
my $pv = ($self->{properties_values} //= {})->{current} //= {};
my $parent = $self->{parent};
weaken($self->{parent});
# copy a few critical values:
$pv->{contentise} = {raw => $parent->get('contentise', lifecycle => 'current', as => 'uuid')};
eval { $pv->{mediatype} = {raw => $parent->get('mediatype', lifecycle => 'current', as => 'mediatype')} };
return $self;
}
sub _dynamic_property {
my ($self, $prefix, $property) = @_;
my $key;
$property = lc($property);
$property =~ s/::/_/g;
$property =~ s/[^a-z0-9]/_/g;
$_properties{$key = $prefix.'_'.$property} //= {};
return $key;
}
sub _check_mediatype {
my ($self, @mediasubtypes) = @_;
my $v;
return undef unless defined $self->{properties_values}{current}{mediatype}{raw};
$v = $self->{properties_values}{current}{mediatype}{raw};
foreach my $mediasubtype (@mediasubtypes) {
return 1 if $v eq $mediasubtype;
}
return undef;
}
sub _pdf_extract_date {
my ($self, $value) = @_;
require DateTime::Format::Strptime;
state $pdf_date_core_pattern = '%Y%m%d%H%M%S';
state $pdf_date_format_0 = DateTime::Format::Strptime->new('pattern' => $pdf_date_core_pattern, 'time_zone' => 'UTC');
my $dt;
my $core;
my $parser;
# General format: D:YYYYMMDDHHmmSSOHH'mm'
if (($core) = $value =~ /^D:([0-9]{14})Z'{0,2}$/) {
$parser = $pdf_date_format_0;
} elsif (my ($mycore, $tz_dir, $tz_h, $tz_m) = $value =~ /^D:([0-9]{14})(\+|\-)([0-9]{2})'([0-9]{2})'$/) {
my $tz = sprintf('%s%s%s', $tz_dir, $tz_h, $tz_m);
$core = $mycore;
$parser = DateTime::Format::Strptime->new('pattern' => $pdf_date_core_pattern, 'time_zone' => $tz);
}
return undef unless defined($core) && defined($parser);
return $parser->parse_datetime($core);
}
( run in 0.677 second using v1.01-cache-2.11-cpan-39bf76dae61 )