OCR-OcrSpace

 view release on metacpan or  search on metacpan

lib/OCR/OcrSpace.pm  view on Meta::CPAN

- Western Latin Character languages only (English, German, French,...)

- Language auto-detect (so it does not really matter what OCR language you select, as long as it uses Latin characters)

- Usually better at single number OCR and alphanumeric OCR (e. g. SUDOKO, Dot Matrix OCR, MRZ OCR,... )

- Usually better at special characters OCR like @+-...

- Image size limit 5000px width and 5000px height

- Parameter: OCREngine=2

- No PDF OCR and Offline OCR yet. If you need this, please contact us for an internal beta.

The returned OCR result JSON response is identical for both engines! So you can easily switch between both engines as needed. If you have any question about using Engine 1 or 2, please ask in our OCR API Forum.

 
=cut

sub get_result {

    #can be simply done by discarding the $self
    # but keeping it like this to allow future maintaince if any
    my ( $params, $raw_request, $result );
    if ( scalar @_ > 1 ) {
        my $self;
        ( $self, $params ) = ( @_ );

        #validate the parameters and get
        $params = $self->_validate( $params );

        #Generate the request
        $raw_request = $self->_generate_request( $params );

        #send the request via gateway
        $result = $self->_process_request( $raw_request );

    } else {
        $params = shift;

        $params = _validate( $params );

        #Generate the request
        $raw_request = _generate_request( $params );

        #send the request via gateway
        $result = _process_request( $raw_request );
    }

    #retun
    return $result // undef;

}

=head2 Sample Ouput success

    {"ParsedResults":[{"TextOverlay":{"Lines":[{"LineText":"Current","Words":[{"WordText":"Current","Left":11.666666030883789,"Top":59.166664123535156,"Height":14.999999046325684,"Width":54.999996185302734}],"MaxHeight":14.999999046325684,"MinTop":59...

=head2 Sample Ouput error

    {"OCRExitCode":99,"IsErroredOnProcessing":true,"ErrorMessage":["Parameter name 'attributes' is invalid. Valid parameters: apikey,url,language,isoverlayrequired,base64image,iscreatesearchablepdf,issearchablepdfhidetextlayer,filetype,addressparsing...

=cut

####################
# internal function
###################
sub _generate_request {
    my $params = ( scalar( @_ ) > 1 ) ? $_[1] : shift;

    my $request_hash = {
        url        => $params->{endpoint},
        body_param => $params->{body_param},
    };

    $request_hash->{file_path} = $params->{file} if ( defined $params->{file} );

    return $request_hash;
}

####################
# internal function
###################
sub _validate {
    my $params = ( scalar( @_ ) > 1 ) ? $_[1] : shift;
    carp "Required parameter `apikey` not passed" unless ( defined $params->{apikey} );
    carp "Required parameter `url or file or base64Image` not passed"
      unless ( defined( $params->{url} || $params->{file} || $params->{base64Image} ) );

    my $valid_params = { endpoint => $params->{ocr_space_url} // $BASE_URL, };
    $valid_params->{url}         = $params->{url}         if ( defined $params->{url} );
    $valid_params->{base64Image} = $params->{base64Image} if ( defined $params->{base64Image} );
    if ( defined $params->{file} ) {
        if ( -f $params->{file} ) {
            $valid_params->{file} = $params->{file};
        } else {
            carp "Unable to open file $params->{file} \n";
        }
    }

    #add optional keys
    foreach (
        qw/
        language                      isOverlayRequired       filetype
        detectOrientation             isCreateSearchablePdf   url
        isSearchablePdfHideTextLayer  scale                   base64Image
        isTable                       OCREngine               apikey/
      )
    {
        $valid_params->{body_param}->{$_} = $params->{$_} if ( defined $params->{$_} );
    }
    return $valid_params;
}

####################
# internal function
###################
sub _process_request {
    my $params = ( scalar( @_ ) > 1 ) ? $_[1] : shift;

    my $file     = $params->{file_path};



( run in 1.234 second using v1.01-cache-2.11-cpan-e1769b4cff6 )