Module-Generic

 view release on metacpan or  search on metacpan

lib/Module/Generic/File/Magic.pm  view on Meta::CPAN

        return( $self->error( "Cannot read $path: $!" ) );
    my $buf = '';
    read( $fh, $buf, $max );
    close( $fh );
    return( $buf );
}

# _run_json_detection( $buf ) -> mime_type | undef
# Tests the JSON magic database against $buf.
# Returns the MIME type of the first matching entry (highest priority first).
sub _run_json_detection
{
    my $self = shift( @_ );
    my $buf  = shift( @_ );

    my $db = $self->_load_json_db;
    return( undef ) unless( defined( $db ) );

    foreach my $entry ( @$db )
    {
        foreach my $m ( @{$entry->{matches}} )
        {
            if( _match_bytes( $buf, $m ) )
            {
                return( $entry->{mime} );
            }
        }
    }
    return( undef );
}

# _with_flags( $flags, $method, @args )
# Temporarily overrides flags, runs $method, restores original state.
sub _with_flags
{
    my $self   = shift( @_ );
    my $flags  = shift( @_ );
    my $method = shift( @_ );

    my $orig_flags = $self->{flags};
    $self->close if( $BACKEND eq 'xs' );
    $self->{flags} = $flags;

    my $result = $self->$method( @_ );

    $self->close if( $BACKEND eq 'xs' );
    $self->{flags} = $orig_flags;

    return( $self->pass_error ) unless( defined( $result ) );
    return( $result );
}

1;
# NOTE: POD
__END__

=encoding utf-8

=head1 NAME

Module::Generic::File::Magic - File type and MIME detection with 3-level backend cascade

=head1 SYNOPSIS

    use Module::Generic::File::Magic qw( :flags );

    my $magic = Module::Generic::File::Magic->new( flags => MAGIC_MIME_TYPE ) ||
        die( Module::Generic::File::Magic->error );

    # Which backend is active?
    printf "Backend: %s\n", $magic->backend;    # xs, json, or file

    # Detect from a file path
    my $mime = $magic->from_file( '/path/to/archive.tar.gz' ) ||
        die( $magic->error );
    # -> "application/gzip"

    # Detect from an in-memory buffer
    open( my $fh, '<:raw', '/path/to/file' ) or die( $! );
    read( $fh, my $buf, 4096 );
    close( $fh );
    my $mime = $magic->from_buffer( $buf ) || die( $magic->error );

    # Detect from an open filehandle
    open( my $fh, '<:raw', '/path/to/file' ) or die( $! );
    my $mime = $magic->from_filehandle( $fh ) || die( $magic->error );

    # Convenience wrappers
    my $type = $magic->mime_type_from_file( '/path/to/file' );
    my $enc  = $magic->mime_encoding_from_file( '/path/to/file' );
    my $full = $magic->mime_from_file( '/path/to/file' );
    # -> "application/gzip; charset=binary"

    # Control the read size for pure-Perl backends (default: 512 bytes)
    my $magic2 = Module::Generic::File::Magic->new(
        flags    => MAGIC_MIME_TYPE,
        max_read => 1024,
    ) || die( Module::Generic::File::Magic->error );

    # Change max_read at any time
    $magic->max_read(1024);

    # Procedural interface
    use Module::Generic::File::Magic qw( :functions );
    my $mime = magic_mime_type( '/path/to/file' );

=head1 VERSION

    v0.2.0

=head1 DESCRIPTION

C<Module::Generic::File::Magic> detects file types and MIME types using a three-level cascade, automatically selecting the best available backend:

=over 4

=item B<Level 1 - xs> (preferred)

C<libmagic.so.1> is loaded at runtime via C<dlopen(3)> - no C<magic.h>, no C<libmagic-dev> package required at build time. Full libmagic accuracy and performance. The C<MAGIC_COMPRESS>, C<MAGIC_SYMLINK>, and all other flags
are fully supported. C<compile()>, C<check()>, and C<list()> are only available at this level.

=item B<Level 2 - json>

C<libmagic> is absent. The module loads C<lib/Module/Generic/File/magic.json> (generated from the freedesktop.org shared-mime-info database, bundled with the distribution) and runs pure-Perl byte-pattern matching. Covers ~500 MIME types with magic si...

=item B<Level 3 - file>

No pattern matched at level 2. Invokes C<file(1)> in a subprocess as a last resort. C<from_buffer> writes a temporary file via L<File::Temp>.

=back

The active backend is available via C<< $magic->backend >> and the package variable C<$Module::Generic::File::Magic::BACKEND>.

Note that within the json backend, a text-content heuristic is applied before falling through to C<file(1)>.

=head1 CONSTRUCTOR

=head2 new

    my $magic = Module::Generic::File::Magic->new( %opts ) ||
        die( Module::Generic::File::Magic->error );

=over 4

=item * C<flags> - integer bitmask (default: C<MAGIC_NONE>)

=item * C<magic_db> - path to a custom C<.mgc> database (xs backend only)

=item * C<max_read> - maximum bytes read from a file for pure-Perl backends (default: C<512>)

=back

=head1 METHODS

=head2 backend

Returns the name of the active backend: C<"xs">, C<"json">, or C<"file">. Note that the reported value is the I<top-level> configured backend; the actual detection at runtime may cascade through multiple levels.

=head2 check( [ $filename ] )

Validates a magic database. B<xs backend only.>

=head2 close

Releases the C<magic_t> cookie. No-op on non-xs backends.

=head2 compile( $filename )

Compiles a magic source file into a C<.mgc> database. B<xs backend only.>

=head2 flags

Getter/setter for the libmagic flags bitmask.

=head2 from_buffer( $scalar )

Detects type from a raw byte scalar.

=head2 from_file( $path )

Detects type from a file path.

=head2 from_filehandle( $fh )

Detects type from an open filehandle.

=head2 list( [ $filename ] )

Prints magic database entries to stdout. B<xs backend only.>

=head2 magic_db

Getter/setter for the custom magic database path (xs backend only).

=head2 max_read( [ $bytes ] )

Getter/setter for the maximum number of bytes read from a file when using pure-Perl backends. The default is 512 bytes, which covers all signatures in the bundled JSON database. Increase this value for formats whose signatures appear at large offsets...

=head2 mime_encoding_from_buffer( $scalar )

Returns the charset (e.g. C<binary>) for the given buffer.

=head2 mime_encoding_from_file( $path )

Returns the charset (e.g. C<us-ascii>) for the given file.

=head2 mime_encoding_from_filehandle( $fh )

Returns the charset for the given filehandle.

=head2 mime_from_buffer( $scalar )

Returns the full MIME string (e.g. C<application/gzip; charset=binary>) for the given buffer.

=head2 mime_from_file( $path )

Returns the full MIME string for the given file.



( run in 2.087 seconds using v1.01-cache-2.11-cpan-97f6503c9c8 )