SQL-Type-Guess

 view release on metacpan or  search on metacpan

lib/SQL/Type/Guess.pm  view on Meta::CPAN

    #    "seen" decimal(1,0),
    #    "greeting" varchar(5),
    #    "value" decimal(5,3),
    #    "when" date
    # )

=cut

=head1 METHODS

=head2 C<< SQL:::Type::Guess->new( %OPTIONS ) >>

  my $g= SQL::Type::Guess->new();

Creates a new C<SQL::Type::Guess> instance. The following options are
supported:

=over 4

=item B<column_type>

Hashref of already known column types.

=item B<column_map>

Hashref mapping the combinations SQL type names
to the resulting type name.

=back

=cut

sub new {
    my( $class, %options )= @_;
    
    $options{ column_type } ||= {};
    $options{ column_map } ||= {
        ";date" => 'date',
        ";datetime" => 'datetime',
        ";decimal" => 'decimal(%2$d,%3$d)',
        ";varchar" => 'varchar(%1$d)',
        "date;" => 'date',
        "datetime;" => 'datetime',
        "datetime;datetime" => 'datetime',
        "decimal;" => 'decimal(%2$d,%3$d)',
        "varchar;" => 'varchar(%1$d)',
        "varchar;date" => 'varchar(%1$d)',
        "varchar;datetime" => 'varchar(%1$d)',
        "varchar;decimal" => 'varchar(%1$d)',
        "varchar;varchar" => 'varchar(%1$d)',
        "date;decimal" => 'decimal(%2$d,%3$d)',
        "date;varchar" => 'varchar(%1$d)',
        "date;date" => 'date',
        "datetime;varchar" => 'varchar(%1$d)',
        "decimal;date" => 'decimal(%2$d,%3$d)',
        "decimal;varchar" => 'varchar(%1$d)',
        "decimal;decimal" => 'decimal(%2$d,%3$d)',
        ";" => '',
    };
    
    bless \%options => $class;
}

=head2 C<< $g->column_type >>

    $g->guess({ foo => 1, bar => 'Hello' },{ foo => 1000, bar => 'World' });
    print $g->column_type->{ 'foo' } # decimal(4,0)

Returns a hashref containing the SQL types to store all
values in the columns seen so far.

=cut

sub column_type { $_[0]->{column_type} };

=head2 C<< $g->column_map >>

Returns the hashref used for the type transitions. The current
transitions used for generalizing data are:

  date -> decimal -> varchar

This is not entirely safe, as C<2014-01-01> can't be safely
loaded into an C<decimal> column, but assuming your data is representative
of the data to be stored that shouldn't be much of an issue.

=cut

sub column_map  { $_[0]->{column_map} };

=head2 C<< $g->guess_data_type $OLD_TYPE, @VALUES >>

    $type= $g->guess_data_type( $type, 1,2,3,undef,'Hello','World', );

Returns the data type that encompasses the already established data type in C<$type>
and the new values as passed in via C<@values>.

If there is no preexisting data type, C<$type> can be C<undef> or the empty string.

=cut

our @recognizers = (
    sub { if( ! defined $_[0] or $_[0] eq ''                              ) { return {} }}, # empty value, nothing to change
    sub { if( $_[0] =~ /^((?:19|20)\d\d)-?(0\d|1[012])-?([012]\d|3[01])$/ ) { return { this_value_type => 'date', 'pre' => 8 } }}, # date
    sub { if( $_[0] =~ m!^\s*[01]\d/[0123]\d/(?:19|20)\d\d\s[012]\d:[012345]\d:[0123456]\d(\.\d*)?$! ) { return { this_value_type => 'datetime',        } }}, # US-datetime
    sub { if( $_[0] =~ m!^\s*[0123]\d\.[01]\d\.(?:19|20)\d\d\s[012]\d:[012345]\d:[0123456]\d(\.\d*)?$! ) { return { this_value_type => 'datetime',        } }}, # EU-datetime
    sub { if( $_[0] =~ m!^\s*(?:19|20)\d\d-[01]\d-[0123]\d[\sT][012]\d:[012345]\d:[0123456]\d(\.\d*)?Z?$! ) { return { this_value_type => 'datetime',        } }}, # ISO-datetime
    sub { if( $_[0] =~ m!^\s*[01]\d/[0123]\d/(?:19|20)\d\d$!              ) { return { this_value_type => 'date',            } }}, # US-date
    sub { if( $_[0] =~ m!^\s*[0123]\d\.[01]\d\.(?:19|20)\d\d$!            ) { return { this_value_type => 'date',            } }}, # EU-date
    sub { if( $_[0] =~ /^\s*[+-]?(\d+)\s*$/                               ) { return { this_value_type => 'decimal', 'pre' => length($1), post => 0 } }}, # integer
    sub { if( $_[0] =~ /^\s*[+-]?(\d+)\.(\d+)\s*$/                        ) { return { this_value_type => 'decimal', 'pre' => length($1), post => length($2) } }}, # integer
    sub {                                                                     return { this_value_type => 'varchar', length => length $_[0] }}, # catch-all
);

sub guess_data_type {
    my( $self, $type, @values )= @_;

    my $column_map= $self->column_map;
    for my $value (@values) {
        my $old_type = $type;

 view all matches for this distribution
 view release on metacpan -  search on metacpan

( run in 1.197 second using v1.00-cache-2.02-grep-82fe00e-cpan-9f2165ba459b )