API-INSEE-Sirene

 view release on metacpan or  search on metacpan

lib/API/INSEE/Sirene.pm  view on Meta::CPAN

        nomUniteLegale changementNomUniteLegale nomUsageUniteLegale changementNomUsageUniteLegale
        denominationUniteLegale changementDenominationUniteLegale denominationUsuelle1UniteLegale
        denominationUsuelle2UniteLegale denominationUsuelle3UniteLegale changementDenominationUsuelleUniteLegale
        categorieJuridiqueUniteLegale changementCategorieJuridiqueUniteLegale activitePrincipaleUniteLegale
        nomenclatureActivitePrincipaleUniteLegale changementActivitePrincipaleUniteLegale nicSiegeUniteLegale
        changementNicSiegeUniteLegale economieSocialeSolidaireUniteLegale changementEconomieSocialeSolidaireUniteLegale
        caractereEmployeurUniteLegale changementCaractereEmployeurUniteLegale
    / ],
    siret => [ qw/
        etatAdministratifEtablissement changementEtatAdministratifEtablissement
        enseigne1Etablissement enseigne2Etablissement enseigne3Etablissement changementEnseigneEtablissement
        denominationUsuelleEtablissement changementDenominationUsuelleEtablissement
        activitePrincipaleEtablissement nomenclatureActivitePrincipaleEtablissement changementActivitePrincipaleEtablissement
        caractereEmployeurEtablissement changementCaractereEmployeurEtablissement
    / ],
};

my $useful_fields_legal_unit = [
    qw/
        siren
        dateCreationUniteLegale
        sigleUniteLegale
        categorieEntreprise
        denominationUniteLegale denominationUsuelle1UniteLegale nomUniteLegale
        categorieJuridiqueUniteLegale
        activitePrincipaleUniteLegale nomenclatureActivitePrincipaleUniteLegale
        nicSiegeUniteLegale
    /
];

my $useful_fields_establishment = [
    qw/
        siren siret
        denominationUsuelleEtablissement denominationUniteLegale denominationUsuelle1UniteLegale nomUniteLegale
        activitePrincipaleUniteLegale
        numeroVoieEtablissement typeVoieEtablissement libelleVoieEtablissement
        codePostalEtablissement libelleCommuneEtablissement
    /
];

my $useful_fields_aliases = {
    nicSiege                        => 'nicSiegeUniteLegale',
    nom                             => [ 'denominationUniteLegale', 'nomUniteLegale' ],
    dateCreation                    => 'dateCreationUniteLegale',
    sigle                           => 'sigleUniteLegale',
    categorieJuridique              => 'categorieJuridiqueUniteLegale',
    nomenclatureActivitePrincipale  => 'nomenclatureActivitePrincipaleUniteLegale',
    activitePrincipale              => 'activitePrincipaleUniteLegale',
    numvoie                         => 'numeroVoieEtablissement',
    typevoie                        => 'typeVoieEtablissement',
    nomvoie                         => 'libelleVoieEtablissement',
    codePostal                      => 'codePostalEtablissement',
    nomCommune                      => 'libelleCommuneEtablissement',
    adresseComplete                 => [
                                        'numeroVoieEtablissement',
                                        'typeVoieEtablissement', 'libelleVoieEtablissement',
                                        'codePostalEtablissement', 'libelleCommuneEtablissement'
                                    ],
};

sub new {
    my $class = shift;
    my ($credentials, $timeout, $max_results, $proxy) = @_;

    my $self = bless {
        credentials      => $credentials,
        user_agent       => undef,
        token_expiration => undef,
        max_results      => undef,
        debug_mode       => 0,
        current_endpoint => undef,
    }, $class;

    $self->_initUserAgent();
    $self->setProxy($proxy);
    $self->setMaxResults($max_results);
    $self->setTimeout($timeout);

    return $self;
}

sub setCredentials {
    my ($self, $credentials) = @_;

    $self->{'credentials'} = $credentials;
}

sub setMaxResults {
    my ($self, $max_results) = @_;

    $max_results //= DEFAULT_MAX_RESULTS;
    $self->{'max_results'} = $max_results > HARD_MAX_RESULTS ? HARD_MAX_RESULTS : $max_results;
}

sub setDebugMode {
    my ($self, $debug_value) = @_;

    $self->{'debug_mode'} = $debug_value;
}

sub setProxy {
    my ($self, $proxy) = @_;

    defined $proxy ? $self->{'user_agent'}->proxy([ 'http', 'https' ], $proxy) : $self->{'user_agent'}->env_proxy;
}

sub setTimeout {
    my ($self, $timeout) = @_;

    $timeout //= DEFAULT_TIMEOUT;
    $self->{'user_agent'}->timeout($timeout);
}

sub setCurrentEndpoint {
    my ($self, $endpoint) = @_;

    $self->{'current_endpoint'} = $endpoint;
}

sub _dumpRequest {
    my ($self, $request, $response) = @_;

    my $dump = sprintf "Sent request:\n%s\n", $request->as_string;
    $dump .= sprintf "Received response:\n%s\n", $response->as_string if defined $response;

    return $dump;
}

sub _initUserAgent {
    my $self = shift;

    $self->{'user_agent'} = LWP::UserAgent->new(protocols_allowed => [ 'http', 'https' ]);

    $self->{'user_agent'}->agent("Perl API::INSEE::Sirene V$VERSION");
    $self->{'user_agent'}->default_header('Accept' => 'application/json');
}

sub _getToken {
    my $self = shift;

    croak 'Please provide your credentials.' if !defined $self->{'credentials'};

    my $request = POST API_AUTH_URL,
        Authorization => "Basic $self->{'credentials'}",
        Content       => [ grant_type => 'client_credentials' ];

    my $response = $self->{'user_agent'}->request($request);
    my $json_obj;

    if ($response->content_type =~ m/^application\/json/) {
        $json_obj = decode_json($response->content);
    }
    else {
        return 1, $self->_dumpRequest($request, $response); # the API may return xml intead of json...
    }

    switch ($response->code) {
        case HTTP_OK {
            $self->{'token_expiration'} = time + $json_obj->{'expires_in'};
            $self->{'user_agent'}->default_header( Authorization => "Bearer $json_obj->{'access_token'}" );
            return 0;
        }
        case HTTP_UNAUTHORIZED { # wrong credentials
            return 1 , $json_obj->{'error_description'};
        }
        else { # oh dear we are in trouble
            return 1, $self->_dumpRequest($request, $response);
        }
    }
}

sub _sendRequest {
    my ($self, $parameters) = @_;

    my $request;
    if (!exists $parameters->{'q'}) {
        my @url_parameters;

        foreach my $key (keys %{ $parameters }) {
            push @url_parameters, join '=', $key, $parameters->{$key};
        }

        my $endpoint = join '?', $self->{'current_endpoint'}, join '&', @url_parameters;
        $request = GET join '/', API_BASE_URL, $endpoint;
    }
    else {
        $request = POST join('/', API_BASE_URL, $self->{'current_endpoint'}),
            Content => [ %{ $parameters } ];
    }

    if ($self->{'debug_mode'}) { # Requests will not be sent in debug mode
        return 0, $self->_dumpRequest($request);
    }

    if (!defined $self->{'token_expiration'} || $self->{'token_expiration'} < time) {
        my ($err, $msg) = $self->_getToken();
        croak $msg if $err;
    }

    my $response = $self->{'user_agent'}->request($request);

    switch ($response->code) {
        case [ HTTP_OK, HTTP_NOT_FOUND ] {
            return 0, $response->content;
        }
        case HTTP_MOVED_PERMANENTLY { # duplicated legal unit/ establishment
            return 1, sprintf "%s\n%s", $response->message, $response->header('Location');
        }
        case [
            HTTP_REQUEST_URI_TOO_LARGE, HTTP_TOO_MANY_REQUESTS,
            HTTP_UNAUTHORIZED, HTTP_FORBIDDEN,
            HTTP_SERVICE_UNAVAILABLE
        ] {
# There is no syntax error in request, the http message should be sufficient to understand the problem
            return 1, $response->message;
        }
        else { # case HTTP_BAD_REQUEST || HTTP_INTERNAL_SERVER_ERROR
            return 1, $self->_dumpRequest($request, $response);
        }
    }
}

sub _buildParameters {
    my ($self, $usefull_fields, $desired_fields, $criteria) = @_;

# Parameters names come from the documentation
    my $parameters = {
        date   => strftime('%Y-%m-%d', localtime),
        nombre => $self->{'max_results'},
    };
    $parameters->{'champs'} = $self->_buildFields($usefull_fields, $desired_fields) if (defined $desired_fields && $desired_fields ne 'all');
    $parameters->{'q'}      = sprintf('(%s)', $criteria) if defined $criteria;

    return $parameters;
}

sub _buildFields {
    my ($self, $usefull_fields, $desired_fields) = @_;

    if (defined $desired_fields) {
        return $self->_mapAliases($desired_fields);
    }
    else {
        return join ',', @{ $usefull_fields };
    }
}

sub _mapAliases {
    my ($self, $desired_fields) = @_;

    my @desired_fields = ref $desired_fields eq 'ARRAY' ? @{ $desired_fields } : $desired_fields;

    foreach my $desired_field (@desired_fields) {
        if (exists $useful_fields_aliases->{$desired_field}) {
            if (ref $useful_fields_aliases->{$desired_field} eq 'ARRAY') {
                $desired_field = join ',', @{ $useful_fields_aliases->{$desired_field} };
            }
            else {
                $desired_field = $useful_fields_aliases->{$desired_field};
            }
        }
    }

    return join ',', @desired_fields;
}

sub getCustomCriteria {
    my ($self, $field_name, $value, $search_mode) = @_;

    croak 'No endpoint specified.' if !defined $self->{'current_endpoint'};

    $search_mode //= 'aproximate';
    if (exists $useful_fields_aliases->{$field_name}) {
        if (ref $useful_fields_aliases->{$field_name} eq 'ARRAY') {
            croak "Can't use the alias $field_name in custom criteria";
        }
        $field_name = $useful_fields_aliases->{$field_name};
    }

    if ($search_mode eq 'aproximate') {
        my @criteria;
        my @words = split /[ \/-]/, $value;

        foreach my $word (@words) {
            $word =~ s/&/%26/ig;
            $word = sprintf '(%s:"%s"~ OR %s:*%s*)', $field_name, $word, $field_name, $word;
            $word = "periode$word" if any { $_ eq $field_name } @{ $historized_fields->{$self->{'current_endpoint'}} };


            push @criteria, $word;
        }

        return join ' AND ', @criteria;
    }

    my $criteria;
    $value =~ s/&/%26/ig;

    if ($search_mode eq 'exact') {
        $criteria = sprintf '%s:%s', $field_name, $value;
    }
    elsif ($search_mode eq 'begin') {
        $criteria = sprintf '%s:%s*', $field_name, $value;
    }

    $criteria = "periode($criteria)" if any { $_ eq $field_name } @{ $historized_fields->{$self->{'current_endpoint'}} };

    return $criteria;
}

sub searchByCustomCriteria {
    my ($self, $criteria, $desired_fields) = @_;

    my $parameters;
    switch ($self->{'current_endpoint'}) {
        case 'siren' { $parameters = $self->_buildParameters($useful_fields_legal_unit, $desired_fields, $criteria) }
        case 'siret' { $parameters = $self->_buildParameters($useful_fields_establishment, $desired_fields, $criteria) }
        else { croak 'Bad endpoint specified.' }
    }

    return $self->_sendRequest($parameters);
}

sub getLegalUnitBySIREN {
    my ($self, $siren_number, $desired_fields) = @_;

    return 1, "Invalid SIREN $siren_number -> Must be a ${ \MAX_SIREN_LENGTH } digits number."
        if $siren_number !~ m/^\d{${ \MAX_SIREN_LENGTH }}$/;

    $self->setCurrentEndpoint("siren/$siren_number");
    my $parameters = $self->_buildParameters($useful_fields_legal_unit, $desired_fields);

    return $self->_sendRequest($parameters);
}

sub searchLegalUnitBySIREN {
    my ($self, $siren_number, $desired_fields) = @_;

    return 1, "Invalid SIREN $siren_number -> Must be a ${ \MIN_LENGTH } digits min and ${ \MAX_SIREN_LENGTH } digits number max."
        if $siren_number !~ m/^\d{${ \MIN_LENGTH },${ \MAX_SIREN_LENGTH }}$/;

    $self->setCurrentEndpoint('siren');
    my $criteria = $self->getCustomCriteria('siren', $siren_number, 'begin');

    return $self->searchByCustomCriteria($criteria, $desired_fields);
}

sub getEstablishmentBySIRET {
    my ($self, $siret_number, $desired_fields) = @_;

    return 1, "Invalid SIRET $siret_number -> Must be a ${ \MAX_SIRET_LENGTH } digits number."
        if $siret_number !~ m/^\d{${ \MAX_SIRET_LENGTH }}$/;

    $self->setCurrentEndpoint("siret/$siret_number");
    my $parameters = $self->_buildParameters($useful_fields_establishment, $desired_fields);

    return $self->_sendRequest($parameters);
}

sub getEstablishmentsBySIREN {
    my ($self, $siren_number, $desired_fields) = @_;

    return (1, "Invalid SIREN $siren_number -> Must be a ${ \MAX_SIREN_LENGTH } digits number.")
        if $siren_number !~ m/^\d{${ \MAX_SIREN_LENGTH }}$/;

    $self->setCurrentEndpoint('siret');
    my $criteria = $self->getCustomCriteria('siren', $siren_number);

    return $self->searchByCustomCriteria($criteria, $desired_fields);
}

sub searchEstablishmentBySIRET {
    my ($self, $siret_number, $desired_fields) = @_;

    return 1, "Invalid SIRET $siret_number -> Must be a ${ \MIN_LENGTH } digits min and a ${ \MAX_SIRET_LENGTH } digits number max."
        if $siret_number !~ m/^\d{${ \MIN_LENGTH },${ \MAX_SIRET_LENGTH }}$/;

    $self->setCurrentEndpoint('siret');
    my $criteria = $self->getCustomCriteria('siret', $siret_number);

    return $self->searchByCustomCriteria($criteria, $desired_fields);
}

sub getLegalUnitsByName {
    my ($self, $name, $desired_fields) = @_;

    $self->setCurrentEndpoint('siren');
    my $criteria = $self->getCustomCriteria('denominationUniteLegale', $name);

    return $self->searchByCustomCriteria($criteria, $desired_fields);
}

sub getEstablishmentsByName {
    my ($self, $name, $desired_fields) = @_;

    $self->setCurrentEndpoint('siret');
    my $criteria = $self->getCustomCriteria('denominationUniteLegale', $name);

    return $self->searchByCustomCriteria($criteria, $desired_fields);
}

sub getLegalUnitsByUsualName {
    my ($self, $name, $desired_fields) = @_;


    $self->setCurrentEndpoint('siren');
    my $criteria = $self->getCustomCriteria('denominationUsuelle1UniteLegale', $name);

    return $self->searchByCustomCriteria($criteria, $desired_fields);
}

sub getEstablishmentsByUsualName {
    my ($self, $name, $desired_fields) = @_;

    $self->setCurrentEndpoint('siret');
    my $criteria = $self->getCustomCriteria('denominationUsuelle1UniteLegale', $name);

    return $self->searchByCustomCriteria($criteria, $desired_fields);
}

1;

__END__

=pod

=encoding utf8

=head1 NAME

API::INSEE::Sirene - An interface for the Sirene API of INSEE

=head1 VERSION

Version 4.04

=head1 SYNOPSIS

  use API::INSEE::Sirene;

  my $sirene = API::INSEE::Sirene->new('Y29uc3VtZXIta2V5OmNvbnN1bWVyLXNlY3JldA=='); # your base64 encoded credentials
  $sirene->setMaxResults(30);

  # Examples to get information about an establishment with SIRET number '12345678987654'
  $sirene->getEstablishmentBySIRET(12345678987654, 'all');

  # or
  my $fields_that_interest_me = ['numeroVoieEtablissement', 'typeVoieEtablissement', 'libelleVoieEtablissement',
                                 'codePostalEtablissement', 'libelleCommuneEtablissement'];
  $sirene->getEstablishmentBySIRET(12345678987654, $fields_that_interest_me);

  # or
  $sirene->getEstablishmentBySIRET(12345678987654, 'denominationUniteLegale');

  # or simply
  $sirene->getEstablishmentBySIRET(12345678987654);

  # you can also perform searches whith a partial SIREN/SIRET number using search functions:
  $sirene->searchEstablishmentBySIRET(1234567898);
  $sirene->searchLegalUnitBySIREN(123456);

=head1 DESCRIPTION

This module allows you to interact with the Sirene API of INSEE (Institut National de la Statistique et des Études Économiques) in France.

It contains a set of functions that can perform searches on INSEE's database to get some information about french companies like their SIREN number, company name, company headquarters address, etc.

The terms "enterprise", "legal unit" and "establishment" used in this documentation are defined at the INSEE website in the following pages:

=over 4

=item * B<Enterprise definition:>

L<< https://www.insee.fr/en/metadonnees/definition/c1496 >>

=item * B<Legal unit definition:>

L<< https://www.insee.fr/en/metadonnees/definition/c1044 >>

=item * B<Establishment definition:>

L<< https://www.insee.fr/en/metadonnees/definition/c1377 >>

=back

Here is the documentation with among others all fields names:

=over 4

=item *

L<< https://api.insee.fr/catalogue/site/themes/wso2/subthemes/insee/pages/item-info.jag?name=Sirene&version=V3&provider=insee >>

=back

B<Please note that this API is french so all fields names used in function calls are in french, including the aliases.>

This module has been tested with 3.9 INSEE API version.

=head1 DEPENDENCIES

=over 4

=item * L<< Carp|https://perldoc.perl.org/Carp >>

=item * L<< JSON|https://metacpan.org/pod/JSON >>

=item * L<< List::Util|https://perldoc.perl.org/List::Util >>

=item * L<< HTTP::Request::Common|https://metacpan.org/pod/HTTP::Request::Common >>

=item * L<< HTTP::Status|https://metacpan.org/pod/HTTP::Status >> B<< version < 6.26 >>

=item * L<< LWP::UserAgent|https://metacpan.org/pod/LWP::UserAgent >>

=item * L<< POSIX::strftime|https://metacpan.org/pod/POSIX#strftime >>

=item * L<< Switch|https://metacpan.org/pod/Switch >>

=back

=head1 CONSTANTS

=head2 DEFAULT_MAX_RESULTS

The API's default number of results for each request. You can override it with the C<< setMaxResults >> method. A too big value may impact response time and general performances.

This constant is set to 20 results.

=head2 DEFAULT_TIMEOUT

This constant specifies how many seconds the client module has to wait for server response before giving up. You can override it with the C<< setTimeout >> method.

This constant is set to 20 seconds.

=head2 HARD_MAX_RESULTS

The maximum number of results that you can get. This value can't be increased (restricted by API). If you try to send a request with a higher value, the C<nombre> parameter will be forced to HARD_MAX_RESULTS value.

This constant is set to 1000 results.

=head2 MAX_SIREN_LENGTH

A SIREN number has a maximum length of 9 digits.

=head2 MAX_SIRET_LENGTH

A SIREN number has a maximum length of 14 digits.

=head2 MIN_LENGTH

In order to avoid useless requests with too short SIREN/SIRET numbers, the module requires at least 3 digits to allow you performing a search.



( run in 0.446 second using v1.01-cache-2.11-cpan-13bb782fe5a )