File-Information

 view release on metacpan or  search on metacpan

lib/File/Information/Deep.pm  view on Meta::CPAN

# Copyright (c) 2025 Philipp Schafft <lion@cpan.org>

# licensed under Artistic License 2.0 (see LICENSE file)

# ABSTRACT: generic module for extracting information from filesystems


package File::Information::Deep;

use v5.20;
use strict;
use warnings;

use parent 'File::Information::Base';

use Carp;
use Scalar::Util qw(weaken);
use Fcntl qw(SEEK_SET);

our $VERSION = v0.16;

my %_PNG_colour_types = ( # namespace: 4c11d438-f6f3-417f-85e3-e56e46851dae
    0   => {ise => 'a3934b85-5bec-5cd7-a571-727e4cecfcb1', displayname => 'Greyscale'},
    2   => {ise => '56262598-1d35-566d-b9a3-0e752d58b8ce', displayname => 'Truecolor'},
    3   => {ise => '67f61b65-4978-510b-b318-247da7934837', displayname => 'Indexed-color'},
    4   => {ise => 'cbdafa4e-1cb8-59a9-b6ec-b7a1bef3fcd4', displayname => 'Greyscale with alpha'},
    6   => {ise => 'c6ef9ba0-3b7f-5248-a4f4-18e39c14d7b3', displayname => 'Truecolor with alpha'},
);

my %_PNG_filter_method = ( # namespace: 06f15860-8191-41f5-881c-a465be563089
    0   => {ise => 'b7a197cb-2eee-517f-ae57-8e299d1a92e9', displayname => 'None'},
    1   => {ise => 'c194fcce-c957-5436-861f-09af8526fed8', displayname => 'Sub'},
    2   => {ise => 'fe14ef2d-4098-5a7e-81a0-88beae0e1e65', displayname => 'Up'},
    3   => {ise => 'b0df25b4-b1fb-52cc-a6be-162440bd9628', displayname => 'Average'},
    4   => {ise => '974cf00a-c2e2-5d08-b1da-08169e09b173', displayname => 'Paeth'},
);

my %_PNG_compression_method = ( # namespace: b2b8b4bf-3b0f-4037-9bbc-96e6b53ae73d
    0   => {ise => 'f47c8ff3-5218-555d-bf89-ba30706c29e1', displayname => 'deflate'},
);

my %_vmv0_section_types = (
    1   => {ise => 'bc0dc85a-8c72-5ab6-a60b-377fdf76f678', displayname => 'init'},
    2   => {ise => '18b7bfe0-5e3a-5fe4-ad69-a317e6b2445c', displayname => 'header'},
    3   => {ise => '5460c878-23d6-56b9-8600-9375d76fefc5', displayname => 'rodata'},
    4   => {ise => '0520d8d6-3a85-56d2-ae2b-77c517cff2ce', displayname => 'text'},
    5   => {ise => '95f7f330-a72d-5e0b-ab0f-d46f37edbc9a', displayname => 'trailer'},
    6   => {ise => '9bbc79eb-5a31-5797-8a05-56e58c530289', displayname => 'resources'},
);

# Extra tags that do not belong into one of the other lists.
my %_wk = (
    '.section'  => {ise => 'dad2de0d-9711-5b57-9a31-562122d756ba', displayname => '.section'},
    '.chunk'    => {ise => 'bff479fa-a818-58dc-b5df-539852fa8b80', displayname => '.chunk'},
);

my %_properties = (
    pdf_version                 => {loader => \&_load_pdf},
    pdf_pages                   => {loader => \&_load_pdf},
    odf_keywords                => {loader => \&_load_odf},
    data_uriid_barcodes         => {loader => \&_load_barcodes, rawtype => 'Data::URIID::Barcode'},
    vmv0_filesize               => {loader => \&_load_vmv0},
    vmv0_section_pointer        => {loader => \&_load_vmv0},
    vmv0_section                => {loader => \&_load_vmv0, rawtype => 'File::Information::Chunk'},
    vmv0_minimum_handles        => {loader => \&_load_vmv0},
    vmv0_minimum_ram            => {loader => \&_load_vmv0},
    vmv0_boundary_text          => {loader => \&_load_vmv0},
    vmv0_boundary_load          => {loader => \&_load_vmv0},
    png_ihdr_width              => {loader => \&_load_png},
    png_ihdr_height             => {loader => \&_load_png},
    png_ihdr_bit_depth          => {loader => \&_load_png},
    png_ihdr_color_type         => {loader => \&_load_png},
    png_ihdr_compression_method => {loader => \&_load_png},
    png_ihdr_filter_method      => {loader => \&_load_png},
    png_ihdr_interlace_method   => {loader => \&_load_png},
    gif_screen_width            => {loader => \&_load_gif},
    gif_screen_height           => {loader => \&_load_gif},

lib/File/Information/Deep.pm  view on Meta::CPAN

}
foreach my $keyword (qw(CreationDate ModDate)) {
    $_properties{'pdf_info_'.lc($keyword)}{parsing} = 'pdf_date';
}

foreach my $key (@_odf_medadata_keys) {
    $_properties{'odf_info_'.$key} = {loader => \&_load_odf};
}
foreach my $key (qw(creation_date date)) {
    $_properties{'odf_info_'.$key}{parsing} = 'iso8601';
}

foreach my $key (@_image_info_keys) {
     $_properties{'image_info_'.lc($key)} = {loader => \&_load_image_info};
}
foreach my $key (@_image_extra_keys) {
     $_properties{'image_info_extra_'.lc($key =~ s/::/_/r)} = {loader => \&_load_image_info};
}
$_properties{image_info_extra_thumb_mtime}{rawtype} = 'unixts';
$_properties{image_info_extra_thumb_uri}{rawtype} = 'uri';


# Register well known:
foreach my $value (
    values(%_PNG_colour_types),
    values(%_PNG_filter_method),
    values(%_PNG_compression_method),
    values(%_vmv0_section_types),
    values(%_wk),
) {
    Data::Identifier->new(ise => $value->{ise}, displayname => $value->{displayname})->register;
}


#@returns File::Information::Base
sub parent {
    my ($self) = @_;
    return $self->{parent};
}

# ----------------
sub property_info {
    my ($self, @args) = @_;

    unless (defined $self->{_dynamic}) {
        $self->{_dynamic} = 1;
        foreach my $cb (@_dynamic_loaders) {
            $self->$cb('__dummy__');
        }
    }

    return $self->SUPER::property_info(@args);
}

sub _new {
    my ($pkg, %opts) = @_;
    my $self = $pkg->SUPER::_new(%opts, properties => \%_properties);
    my $pv = ($self->{properties_values} //= {})->{current} //= {};
    my $parent = $self->{parent};

    weaken($self->{parent});

    # copy a few critical values:
    $pv->{contentise} = {raw => $parent->get('contentise', lifecycle => 'current', as => 'uuid')};
    eval { $pv->{mediatype}  = {raw => $parent->get('mediatype',  lifecycle => 'current', as => 'mediatype')} };

    return $self;
}

sub _dynamic_property {
    my ($self, $prefix, $property) = @_;
    my $key;

    $property = lc($property);
    $property =~ s/::/_/g;
    $property =~ s/[^a-z0-9]/_/g;
    $_properties{$key = $prefix.'_'.$property} //= {};

    return $key;
}

sub _check_mediatype {
    my ($self, @mediasubtypes) = @_;
    my $v;

    return undef unless defined $self->{properties_values}{current}{mediatype}{raw};

    $v = $self->{properties_values}{current}{mediatype}{raw};

    foreach my $mediasubtype (@mediasubtypes) {
        return 1 if $v eq $mediasubtype;
    }

    return undef;
}

sub _pdf_extract_date {
    my ($self, $value) = @_;
    require DateTime::Format::Strptime;

    state $pdf_date_core_pattern = '%Y%m%d%H%M%S';
    state $pdf_date_format_0 = DateTime::Format::Strptime->new('pattern' => $pdf_date_core_pattern, 'time_zone' => 'UTC');
    my $dt;
    my $core;
    my $parser;

    # General format: D:YYYYMMDDHHmmSSOHH'mm'

    if (($core) = $value =~ /^D:([0-9]{14})Z'{0,2}$/) {
        $parser = $pdf_date_format_0;
    } elsif (my ($mycore, $tz_dir, $tz_h, $tz_m) = $value =~ /^D:([0-9]{14})(\+|\-)([0-9]{2})'([0-9]{2})'$/) {
        my $tz = sprintf('%s%s%s', $tz_dir, $tz_h, $tz_m);
        $core = $mycore;
        $parser = DateTime::Format::Strptime->new('pattern' => $pdf_date_core_pattern, 'time_zone' => $tz);
    }

    return undef unless defined($core) && defined($parser);

    return $parser->parse_datetime($core);
}



( run in 0.677 second using v1.01-cache-2.11-cpan-39bf76dae61 )