Locale-Unicode-Data

 view release on metacpan or  search on metacpan

scripts/create_database.pl  view on Meta::CPAN

        calendar_available_formats => "INSERT INTO calendar_available_formats (locale, calendar, format_id, format_pattern, count, alt) VALUES(?, ?, ?, ?, ?, ?)",
        calendar_cyclics_l10n => "INSERT INTO calendar_cyclics_l10n (locale, calendar, format_set, format_type, format_length, format_id, format_pattern) VALUES(?, ?, ?, ?, ?, ?, ?)",
        calendar_datetime_formats => "INSERT INTO calendar_datetime_formats (locale, calendar, format_length, format_type, format_pattern) VALUES(?, ?, ?, ?, ?)",
        calendar_eras => "INSERT INTO calendar_eras (calendar, sequence, code, aliases, start, until) VALUES(?, ?, ?, ?, ?, ?)",
        calendar_eras_l10n => "INSERT INTO calendar_eras_l10n (locale, calendar, era_width, era_id, alt, locale_name) VALUES(?, ?, ?, ?, ?, ?)",
        calendar_formats_l10n => "INSERT INTO calendar_formats_l10n (locale, calendar, format_type, format_length, alt, format_id, format_pattern) VALUES(?, ?, ?, ?, ?, ?, ?)",
        calendar_interval_formats => "INSERT INTO calendar_interval_formats (locale, calendar, format_id, greatest_diff_id, format_pattern, alt, part1, separator, part2, repeating_field) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
        calendar_terms => "INSERT INTO calendar_terms (locale, calendar, term_type, term_context, term_width, alt, yeartype, term_name, term_value) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?)",
        calendars => "INSERT INTO calendars (calendar, system, inherits, description) VALUES(?, ?, ?, ?)",
        calendars_l10n => "INSERT INTO calendars_l10n (locale, calendar, locale_name) VALUES(?, ?, ?)",
        casings => "INSERT INTO casings (locale, token, value) VALUES(?, ?, ?)",
        collations_l10n => "INSERT INTO collations_l10n (locale, collation, locale_name) VALUES(?, ?, ?)",
        code_mappings => "INSERT INTO code_mappings (code, alpha3, numeric, fips10, type) VALUES(?, ?, ?, ?, ?)",
        currencies => "INSERT INTO currencies (currency, digits, rounding, cash_digits, cash_rounding, is_obsolete, status) VALUES(?, ?, ?, ?, ?, ?, ?)",
        currencies_info => "INSERT INTO currencies_info (territory, currency, start, until, is_tender, hist_sequence, is_obsolete) VALUES(?, ?, ?, ?, ?, ?, ?)",
        currencies_l10n => "INSERT INTO currencies_l10n (locale, currency, count, locale_name, symbol) VALUES(?, ?, ?, ?, ?)",
        date_fields_l10n => "INSERT INTO date_fields_l10n (locale, field_type, field_length, relative, locale_name) VALUES(?, ?, ?, ?, ?)",
        date_terms => "INSERT INTO date_terms (locale, term_type, term_length, display_name) VALUES(?, ?, ?, ?)",
        day_periods => "INSERT INTO day_periods (locale, day_period, start, until) VALUES(?, ?, ?, ?)",
        language_population => "INSERT INTO language_population (territory, locale, population_percent, literacy_percent, writing_percent, official_status) VALUES(?, ?, ?, ?, ?, ?)",
        languages => "INSERT OR IGNORE INTO languages (language, scripts, territories, parent, alt, status) VALUES(?, ?, ?, ?, ?, ?)",
        languages_match => "INSERT INTO languages_match (desired, supported, distance, is_symetric, is_regexp, sequence) VALUES(?, ?, ?, ?, ?, ?)",
        likely_subtags => "INSERT INTO likely_subtags (locale, target) VALUES(?, ?)",
        locales => "INSERT INTO locales (locale, parent, collations, status) VALUES(?, ?, ?, ?)",
        locales_info => "INSERT INTO locales_info (locale, property, value) VALUES(?, ?, ?)",
        locales_l10n => "INSERT INTO locales_l10n (locale, locale_id, locale_name, alt) VALUES(?, ?, ?, ?)",
        locale_number_systems => "INSERT INTO locale_number_systems (locale, number_system, native, traditional, finance) VALUES(?, ?, ?, ?, ?)",
        metainfos => "INSERT INTO metainfos (property, value) VALUES(?, ?)",
        metazones => "INSERT INTO metazones (metazone, territories, timezones) VALUES(?, ?, ?)",
        metazones_names => "INSERT INTO metazones_names (locale, metazone, width, generic, standard, daylight) VALUES(?, ?, ?, ?, ?, ?)",
        number_formats_l10n => "INSERT INTO number_formats_l10n (locale, number_system, number_type, format_length, format_type, format_id, format_pattern, alt, count) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?)",
        number_symbols_l10n => "INSERT INTO number_symbols_l10n (locale, number_system, property, value, alt) VALUES(?, ?, ?, ?, ?)",
        number_systems => "INSERT INTO number_systems (number_system, digits, type) VALUES(?, ?, ?)",
        number_systems_l10n => "INSERT INTO number_systems_l10n (locale, number_system, locale_name, alt) VALUES(?, ?, ?, ?)",
        person_name_defaults => "INSERT INTO person_name_defaults (locale, value) VALUES(?, ?)",
        plural_ranges => "INSERT INTO plural_ranges (locale, aliases, start, stop, result) VALUES(?, ?, ?, ?, ?)",
        plural_rules => "INSERT INTO plural_rules (locale, aliases, count, rule) VALUES(?, ?, ?, ?)",
        rbnf => "INSERT INTO rbnf (locale, grouping, ruleset, rule_id, rule_value) VALUES(?, ?, ?, ?, ?)",
        refs => "INSERT INTO refs (code, uri, description) VALUES(?, ?, ?)",
        regions => "INSERT OR IGNORE INTO territories (territory, contains, status) VALUES(?, ?, ?)",
        scripts => "INSERT INTO scripts (script, rank, sample_char, id_usage, rtl, lb_letters, has_case, shaping_req, ime, density, origin_country, likely_language, status) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
        scripts_l10n => "INSERT INTO scripts_l10n (locale, script, locale_name, alt) VALUES(?, ?, ?, ?)",
        subdivisions => "INSERT INTO subdivisions (territory, subdivision, parent, is_top_level, status) VALUES(?, ?, ?, ?, ?)",
        subdivisions_l10n => "INSERT INTO subdivisions_l10n (locale, subdivision, locale_name) VALUES(?, ?, ?)",
        territories => "INSERT INTO territories (territory, parent, gdp, literacy_percent, population, languages, contains, currency, calendars, min_days, first_day, weekend, status) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
        territories_l10n => "INSERT INTO territories_l10n (locale, territory, locale_name, alt) VALUES(?, ?, ?, ?)",
        time_formats => "INSERT INTO time_formats (region, territory, locale, time_format, time_allowed) VALUES(?, ?, ?, ?, ?)",
        time_relative_l10n => "INSERT INTO time_relative_l10n (locale, field_type, field_length, relative, format_pattern, count) VALUES(?, ?, ?, ?, ?, ?)",
        timezones => "INSERT INTO timezones (timezone, territory, region, tzid, metazone, tz_bcpid, is_golden, is_primary, is_preferred, is_canonical,  alias) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
        timezones_cities => "INSERT INTO timezones_cities (locale, timezone, city, alt) VALUES(?, ?, ?, ?)",
        # This is defined in the extend_timezones_cities() function
        timezones_cities_supplemental => undef,
        timezones_formats => "INSERT INTO timezones_formats (locale, type, subtype, format_pattern) VALUES(?, ?, ?, ?)",
        timezones_info => "INSERT INTO timezones_info (timezone, metazone, start, until) VALUES(?, ?, ?, ?)",
        timezones_names => "INSERT INTO timezones_names (locale, timezone, width, generic, standard, daylight) VALUES(?, ?, ?, ?, ?, ?)",
        unit_aliases => "INSERT INTO unit_aliases (alias, target, reason) VALUES(?, ?, ?)",
        unit_constants => "INSERT INTO unit_constants (constant, expression, value, description, status) VALUES(?, ?, ?, ?, ?)",
        unit_conversions => "INSERT INTO unit_conversions (source, base_unit, expression, factor, systems, category) VALUES(?, ?, ?, ?, ?, ?)",
        units_l10n => "INSERT INTO units_l10n (locale, format_length, unit_type, unit_id, unit_pattern, pattern_type, locale_name, count, gender, gram_case) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
        unit_prefixes => "INSERT INTO unit_prefixes (unit_id, symbol, power, factor) VALUES(?, ?, ?, ?)",
        unit_prefs => "INSERT INTO unit_prefs (unit_id, territory, category, usage, geq, skeleton) VALUES(?, ?, ?, ?, ?, ?)",
        unit_quantities => "INSERT INTO unit_quantities (base_unit, quantity, status, comment) VALUES(?, ?, ?, ?)",
        variants => "INSERT INTO variants (variant, status) VALUES(?, ?)",
        variants_l10n => "INSERT INTO variants_l10n (locale, variant, locale_name, alt) VALUES(?, ?, ?, ?)",
        week_preferences => "INSERT INTO week_preferences (locale, ordering) VALUES(?, ?)",
    ];
    my $sths = {};
    
    for( my $i = 0; $i < scalar( @$queries ); $i += 2 )
    {
        my $id = $queries->[$i];
        $out->print( "[${id}] " ) if( $DEBUG );
        my $sql = $queries->[$i + 1];
        # It is listed, but we skip it to make the 'tables_to_query_check' happy
        if( !defined( $sql ) )
        {
            delete( $tables_to_query_check->{ $id } );
            next;
        }
        elsif( exists( $sths->{ $id } ) )
        {
            die( "There is already a statement object for ID '${id}' with SQL: ", $sths->{ $id }->{Statement} );
        }
        my $sth = $dbh->prepare( $sql ) ||
            die( "Error preparing query '", $sql, "': ", $dbh->errstr );
        $sths->{ $id } = $sth;
        $out->print( "ok\n" ) if( $DEBUG );
        if( exists( $tables_to_query_check->{ $id } ) )
        {
            delete( $tables_to_query_check->{ $id } );
        }
        else
        {
            warn( "Warning only: No table '$id' found in our a tables-to-query map check." );
        }
    }
    
    if( scalar( keys( %$tables_to_query_check ) ) )
    {
        die( sprintf( "There are %d tables with no statement defined: %s", scalar( keys( %$tables_to_query_check ) ), join( ', ', sort( keys( %$tables_to_query_check ) ) ) ) );
    }
    else
    {
        &log( "All tables have a statement defined." );
    }
    
    # NOTE: Add meta information
    &log( "Add meta information." );
    my $today = DateTime->from_epoch( epoch => $opts->{created_time} );
    # $cldr_version is already declared as a global variable
    # cldr-common-45.0
    if( $opts->{cldr_version} )
    {
        $cldr_version = $opts->{cldr_version};
    }
    elsif( $topdir =~ /\-(\d+(?:\.\d+)*)$/ )
    {
        $cldr_version = $1;
    }
    else
    {

scripts/create_database.pl  view on Meta::CPAN

                $code =~ tr/_/-/;
            }
            if( index( $code, '-' ) != -1 )
            {
                ( $locale, $territory ) = split( '-', $code, 2 );
            }
            else
            {
                $territory = $code;
            }
            # A 3-digits code like 001 that got truncated, because it turned into an integer
            if( $code =~ /^\d{1,2}$/ )
            {
                $code = sprintf( '%03d', $code );
            }
            # The CLDR uses 001 (World) to signify the default value.
            # We set the default value in the SQL schema, so we do not need this.
            # if( $code eq '001' )
            # {
            #     next;
            # }
            # elsif( !exists( $territoryInfo->{ $territory } ) )
            if( !exists( $territoryInfo->{ $territory } ) )
            {
                die( "Unknown territory territory code '${territory}' for property 'region' with value '${code}'. Not previous defined in CLDR as a territory." );
            }
    
            eval
            {
                $sth->bind_param( 1, "$code", SQL_VARCHAR );
                $sth->bind_param( 2, $territory, SQL_VARCHAR );
                $sth->bind_param( 3, $locale, SQL_VARCHAR );
                $sth->bind_param( 4, $pref, SQL_VARCHAR );
                $sth->bind_param( 5, to_array( $allowed ), SQL_VARCHAR );
                $sth->execute;
            } || die( "Error adding time formatting information for region '${code}': ", ( $@ || $sth->errstr ) );
            $n++;
            $out->print( "ok\n" ) if( $DEBUG );
        }
    }
    &log( "Time formatting added to ${n} territories." );
    
    # NOTE: Loading week of preference
    &log( "Loading week of preference." );
    $n = 0;
    my $weekPrefsRest = $suppDoc->findnodes( '//weekData/weekOfPreference' ) ||
        die( "Unable to get week of preferences information from ${supplemental_data_file}" );
    if( !$weekPrefsRest->size )
    {
        die( "No week of preferences information found in ${supplemental_data_file}" );
    }
    $sth = $sths->{week_preferences} || die( "No SQL statement object for week_preferences" );
    # Example: <weekOfPreference ordering="weekOfYear weekOfDate weekOfMonth" locales="fi zh_TW"/>
    while( my $el = $weekPrefsRest->shift )
    {
        my $locales = $el->getAttribute( 'locales' ) ||
            die( "No attribute 'locales' for this element: ", $el->toString() );
        # Example: <weekOfPreference ordering="weekOfYear weekOfDate weekOfMonth" locales="fi zh_TW"/>
        $locales =~ tr/_/-/;
        $locales = [split( /[[:blank:]\h\v]+/, $locales )];
        my $prefs = $el->getAttribute( 'ordering' ) ||
            die( "No attribute 'ordering' for this element: ", $el->toString() );
        $prefs = [split( /[[:blank:]\h\v]+/, $prefs )];
        foreach my $locale ( @$locales )
        {
            # Should not be needed, but better safe than sorry
            if( index( $locale, 'root' ) != -1 )
            {
                if( length( $locale ) > 4 )
                {
                    my $loc = Locale::Unicode->new( $locale );
                    $loc->language( 'und' );
                    $locale = $loc->as_string;
                }
                else
                {
                    $locale = 'und';
                }
            }
    
            $out->print( "[${locale}] " ) if( $DEBUG );
            eval
            {
                $sth->execute( $locale, to_array( $prefs ) );
            } || die( "Error adding week of preference information for locale '${locale}': ", ( $@ || $sth->errstr ) );
            $out->print( "ok\n" ) if( $DEBUG );
            $n++;
        }
    }
    &log( "${n} week of preference information added." );
    
    # NOTE: Loading code mappings
    &log( "Loading code mappings." );
    $n = 0;
    $sth = $sths->{code_mappings} || die( "No SQL statement object for code_mappings" );
    my $code_mappings =
    [
        { type => 'territory', xpath => '//codeMappings/territoryCodes' },
        { type => 'currency', xpath => '//codeMappings/currencyCodes' },
    ];
    foreach my $this ( @$code_mappings )
    {
        my $mapRes = $suppDoc->findnodes( $this->{xpath} ) ||
            die( "Unable to get the $this->{type} information in file ${supplemental_data_file}" );
        if( !$mapRes->size )
        {
            die( "No $this->{type} information found in file ${supplemental_data_file}" );
        }
        while( my $el = $mapRes->shift )
        {
            my $def =
            {
                code => ( $el->getAttribute( 'type' ) || die( "Unable to get attribute 'type' for this code mapping element: ", $el->toString() ) ),
                alpha3 => $el->getAttribute( 'alpha3' ),
                numeric => $el->getAttribute( 'numeric' ),
                fips10 => $el->getAttribute( 'fips10' ),
                type => $this->{type},
            };
            $out->print( "$def->{type} / [$def->{code}] " ) if( $DEBUG );
            eval
            {
                $sth->execute( @$def{qw( code alpha3 numeric fips10 type )} );
            } || die( "Error adding code mapping information for code '$def->{code}' of type $this->{type}: ", ( $@ || $sth->errstr ), "\n", dump( $def ) );
            $out->print( "ok\n" ) if( $DEBUG );
            $n++;
        }
    }
    &log( "${n} code mappings added." );
    
    # NOTE: Loading person name defaults
    &log( "Loading person name defaults." );
    $n = 0;
    my $nameOrderRes = $suppDoc->findnodes( '//personNamesDefaults/nameOrderLocalesDefault' ) ||
        die( "Unable to get the name order locale information from file ${supplemental_data_file}" );
    if( !$nameOrderRes->size )
    {
        die( "No name order locale information found in file ${supplemental_data_file}" );
    }
    $sth = $sths->{person_name_defaults} || die( "No SQL statement object for person_name_defaults" );
    # Example: <nameOrderLocalesDefault order="surnameFirst">hu ja km ko mn si ta te vi yue zh</nameOrderLocalesDefault>
    while( my $el = $nameOrderRes->shift )
    {
        my $value = $el->getAttribute( 'order' ) ||
            die( "No attribute 'order' found for this person name defaults element: ", $el->toString() );

scripts/create_database.pl  view on Meta::CPAN

        my $def =
        {
            source => ( $el->getAttribute( 'source' ) || die( "Unable to get the unit source in the attribute 'source' for this element in file $units_file: ", $el->toString() ) ),
            base_unit => ( $el->getAttribute( 'baseUnit' ) || die( "Unable to get the base unit value in the attribute 'baseUnit' for this element in file $units_file: ", $el->toString() ) ),
        };
        $out->print( "[$def->{base_unit}] " ) if( $DEBUG );
    
        my $this = $el->previousNonBlankSibling;
        if( $this && $this->isa( 'XML::LibXML::Comment' ) )
        {
            my $temp_cat = trim( $this->data );
            if( defined( $temp_cat ) &&
                $temp_cat =~ /^[a-zA-Z][a-zA-Z]+(?:\-[a-zA-Z][a-zA-Z0-9]+)*$/ )
            {
                $cat = $temp_cat;
            }
        }
        $def->{category} = $cat if( defined( $cat ) );
        $out->print( defined( $cat ) ? "-> ${cat} " : '-> no category ' ) if( $DEBUG );
    
        if( $el->hasAttribute( 'factor' ) )
        {
            my $expr = $def->{expression} = $el->getAttribute( 'factor' ) ||
                die( "Unable to get the unit conversion expression from the attribute 'factor' for this element: ", $el->toString() );
            if( $expr =~ s/($units_constants_re)/$unit_constants->{ $1 }/g )
            {
                local $@;
                $def->{factor} = eval( $expr );
                if( $@ )
                {
                    die( "Error evaluating the constant expression '${expr}' (originally '$def->{expression}') in file $units_file: $@" );
                }
            }
            elsif( index( $def->{expression}, '*' ) != -1 ||
                   index( $def->{expression}, '/' ) != -1 )
            {
                local $@;
                $def->{factor} = eval( $def->{expression} );
                if( $@ )
                {
                    die( "Error evaluating the constant expression '$def->{expression}' in file $units_file: $@" );
                }
            }
        }
    
        $def->{systems} = [split( /[[:blank:]\h]+/, ( $el->getAttribute( 'systems' ) || '' ) )];
    
        eval
        {
            $sth->execute( @$def{qw( source base_unit expression factor )}, to_array( $def->{systems} ), $def->{category} );
        } || die( "Error adding unit conversion information for source '$def->{source}' and base unit '$def->{base_unit}' in file $units_file: ", ( $@ || $sth->errstr ), "\n", dump( $def ) );
        $n++;
        $j++;
        $out->print( "ok\n" ) if( $DEBUG );
    }
    &log( "${j} unit conversions added." );
    
    # NOTE: Loading unit preferences
    &log( "Loading unit preferences." );
    $j = 0;
    $sth = $sths->{unit_prefs} || die( "No statement object for 'unit_prefs'" );
    while( my $el = $unitsPrefsRes->shift )
    {
        my $cat = $el->getAttribute( 'category' ) ||
            die( "Unable to get the unit preferences category from attribute 'category' for this element in file $units_file: ", $el->toString() );
        my $usage = $el->getAttribute( 'usage' ) ||
            die( "Unable to get the unit preferences usage from attribute 'usage' for this element in file $units_file: ", $el->toString() );
        my $prefsRes = $el->findnodes( './unitPreference' ) ||
            die( "Unable to get unit preferences for the category '${cat}' and usage '${usage}' for this element in file $units_file: ", $el->toString() );
        # Example: <unitPreference regions="001" geq="10" skeleton="precision-increment/10">meter</unitPreference>
        while( my $el_pref = $prefsRes->shift )
        {
            my $def =
            {
                unit_id => ( $el_pref->textContent || die( "No content found for this preference element in file $units_file: ", $el_pref->toString() ) ),
                category => $cat,
                usage => $usage,
            };
            $out->print( "[$def->{unit_id}] " ) if( $DEBUG );
            if( $el_pref->hasAttribute( 'geq' ) )
            {
                $def->{geq} = $el_pref->getAttribute( 'geq' );
            }
            if( $el_pref->hasAttribute( 'skeleton' ) )
            {
                $def->{skeleton} = $el_pref->getAttribute( 'skeleton' );
            }
            my $regions = [split( /[[:blank:]\h]+/, ( $el_pref->getAttribute( 'regions' ) || '' ) )];
            foreach my $region ( @$regions )
            {
                $def->{territory} = $region;
                $out->print( "${region} " ) if( $DEBUG );
                eval
                {
                    $sth->bind_param( 1, $def->{unit_id}, SQL_VARCHAR );
                    $sth->bind_param( 2, "$def->{territory}", SQL_VARCHAR );
                    $sth->bind_param( 3, $def->{category}, SQL_VARCHAR );
                    $sth->bind_param( 4, $def->{usage}, SQL_VARCHAR );
                    $sth->bind_param( 5, $def->{geq}, SQL_FLOAT );
                    $sth->bind_param( 6, $def->{skeleton}, SQL_VARCHAR );
                    $sth->execute;
                } || die( "Error adding unit preference information for category '$def->{category}', usage '$def->{usage}', unit ID '$def->{unit_id}' and territory '$def->{territory}' in file $units_file: ", ( $@ || $sth->errstr ), "\n", dump( $def )...
                $n++;
                $j++;
            }
            $out->print( "ok\n" ) if( $DEBUG );
        }
    }
    &log( "${j} unit conversions added." );
    
    # NOTE: Loading unit aliases
    &log( "Loading unit aliases." );
    $j = 0;
    $sth = $sths->{unit_aliases} || die( "No statement object for 'unit_aliases'" );
    # Example: <unitAlias type="inch-hg" replacement="inch-ofhg" reason="deprecated"/>
    while( my $el = $unitsAliasesRes->shift )
    {
        my $def =
        {
            alias => ( $el->getAttribute( 'type' ) || die( "Unable to get the unit alias in the attribute 'type' for this element in file $units_file: ", $el->toString() ) ),
            target => ( $el->getAttribute( 'replacement' ) || die( "Unable to get the alias replacement value in the attribute 'replacement' for this element in file $units_file: ", $el->toString() ) ),
            reason => ( $el->getAttribute( 'reason' ) || die( "Unable to get the alias replacement reason value in the attribute 'reason' for this element in file $units_file: ", $el->toString() ) ),
        };
        $out->print( "[$def->{alias} -> $def->{target}] " ) if( $DEBUG );
    
        eval
        {
            $sth->execute( @$def{qw( alias target reason )} );
        } || die( "Error adding unit alias information for alias '$def->{alias}' and target '$def->{target}' in file $units_file: ", ( $@ || $sth->errstr ), "\n", dump( $def ) );
        $n++;
        $j++;



( run in 0.608 second using v1.01-cache-2.11-cpan-8f98c5d2c55 )