unicode results from the CPAN

unicode

Convert-Binary-C

view release on metacpan or search on metacpan

tests/include/pdclib/auxiliary/uctype/CMakeLists.txt
tests/include/pdclib/auxiliary/uctype/derived_properties.c
tests/include/pdclib/auxiliary/uctype/derived_properties.h
tests/include/pdclib/auxiliary/uctype/main.c
tests/include/pdclib/auxiliary/uctype/Makefile
tests/include/pdclib/auxiliary/uctype/test.h
tests/include/pdclib/auxiliary/uctype/text_utilities.c
tests/include/pdclib/auxiliary/uctype/text_utilities.h
tests/include/pdclib/auxiliary/uctype/uctype.c
tests/include/pdclib/auxiliary/uctype/uctype.h
tests/include/pdclib/auxiliary/uctype/unicode_data.c
tests/include/pdclib/auxiliary/uctype/unicode_data.h
tests/include/pdclib/CMakeLists.txt
tests/include/pdclib/COPYING.CC0
tests/include/pdclib/functions/_dlmalloc/malloc-2.8.6.patch
tests/include/pdclib/functions/_dlmalloc/malloc.c
tests/include/pdclib/functions/_PDCLIB/_PDCLIB_atomax.c
tests/include/pdclib/functions/_PDCLIB/_PDCLIB_bigint.c
tests/include/pdclib/functions/_PDCLIB/_PDCLIB_bigint10.c
tests/include/pdclib/functions/_PDCLIB/_PDCLIB_bigint2.c
tests/include/pdclib/functions/_PDCLIB/_PDCLIB_bigint32.c
tests/include/pdclib/functions/_PDCLIB/_PDCLIB_bigint64.c

ppport.h view on Meta::CPAN

parse_label|5.013007|5.013007|x
parse_listexpr|5.013008|5.013008|x
parse_lparen_question_flags|5.017009||Viu
PARSE_OPTIONAL|5.013007|5.013007|
parser_dup|5.009000|5.009000|u
parser_free|5.009005||Viu
parser_free_nexttoke_ops|5.017006||Viu
parse_stmtseq|5.013006|5.013006|x
parse_subsignature|5.031003|5.031003|x
parse_termexpr|5.013008|5.013008|x
parse_unicode_opts|5.008001||Viu
parse_uniprop_string|5.027011||Viu
PATCHLEVEL|5.003007||Viu
path_is_searchable|5.019001||Vniu
Pause|5.003007||Viu
pause|5.005000||Viu
pclose|5.003007||Viu
peep|5.003007||Viu
pending_ident|5.017004||Viu
PERL_ABS|5.008001|5.003007|p
Perl_acos|5.021004|5.021004|n

ppport.h view on Meta::CPAN

PL_threadhook|5.008000||Viu
PL_tmps_floor|5.005000||Viu
PL_tmps_ix|5.005000||Viu
PL_tmps_max|5.005000||Viu
PL_tmps_stack|5.005000||Viu
PL_tokenbuf||5.003007|ponu
PL_top_env|5.005000||Viu
PL_toptarget|5.005000||Viu
PL_TR_SPECIAL_HANDLING_UTF8|5.031006||Viu
PL_underlying_numeric_obj|5.027009||Viu
PL_unicode|5.008001||Viu
PL_unitcheckav|5.009005||Viu
PL_unitcheckav_save|5.009005||Viu
PL_unlockhook|5.007003||Viu
PL_unsafe|5.005000||Viu
PL_UpperLatin1|5.019005||Viu
PLUS|5.003007||Viu
PLUS_t8|5.035004||Viu
PLUS_t8_p8|5.033003||Viu
PLUS_t8_pb|5.033003||Viu
PLUS_tb|5.035004||Viu

ppport.h view on Meta::CPAN

#endif
#ifndef PERL_PV_PRETTY_DUMP
#  define PERL_PV_PRETTY_DUMP            PERL_PV_PRETTY_ELLIPSES|PERL_PV_PRETTY_QUOTE
#endif

#ifndef PERL_PV_PRETTY_REGPROP
#  define PERL_PV_PRETTY_REGPROP         PERL_PV_PRETTY_ELLIPSES|PERL_PV_PRETTY_LTGT|PERL_PV_ESCAPE_RE
#endif

/* Hint: pv_escape
 * Note that unicode functionality is only backported to
 * those perl versions that support it. For older perl
 * versions, the implementation will fall back to bytes.
 */

#ifndef pv_escape
#if defined(NEED_pv_escape)
static char * DPPP_(my_pv_escape)(pTHX_ SV * dsv, char const * const str, const STRLEN count, const STRLEN max, STRLEN * const escaped, const U32 flags);
static
#else
extern char * DPPP_(my_pv_escape)(pTHX_ SV * dsv, char const * const str, const STRLEN count, const STRLEN max, STRLEN * const escaped, const U32 flags);

tests/include/pdclib/auxiliary/uctype/CMakeLists.txt view on Meta::CPAN

project( get_uctypes LANGUAGES C )

set( CMAKE_C_STANDARD 11 )
set( CMAKE_C_STANDARD_REQUIRED ON )
set( CMAKE_C_EXTENSIONS OFF )

set( uctype_SOURCES
     derived_properties.c
     text_utilities.c
     uctype.c
     unicode_data.c
)

set( uctype_HEADERS
     derived_properties.h
     test.h
     text_utilities.h
     uctype.h
     unicode_data.h
)

add_library( uctype STATIC ${uctype_SOURCES} )
target_include_directories( uctype PRIVATE ${CMAKE_SOURCE_DIR} )

add_executable( get_uctypes main.c )
target_include_directories( get_uctypes PRIVATE ${CMAKE_SOURCE_DIR} )
target_link_libraries( get_uctypes uctype )

foreach( file ${uctype_SOURCES} )

tests/include/pdclib/auxiliary/uctype/derived_properties.h view on Meta::CPAN


   This file is part of the Public Domain C Library (PDCLib).
   Permission is granted to use, modify, and / or redistribute at will.
*/

#ifndef DERIVED_PROPERTIES
#define DERIVED_PROPERTIES DERIVED_PROPERTIES

#include <stddef.h>

/* https://www.unicode.org/reports/tr44/#DerivedCoreProperties.txt */

struct derived_properties_t
{
    size_t count;
    char ** name;
    size_t * begin;
    size_t * end;
    size_t * code_points;
};

tests/include/pdclib/auxiliary/uctype/main.c view on Meta::CPAN

/* RLE Compressed Output

   <wctype.h> requires *11* flags:
   iswupper, iswlower, iswalpha, iswdigit, iswblank, iswspace,
   iswcntrl, iswxdigit, iswgraph, iswprint.
   iswalnum (the 12th classification function) is *defined* as
   iswalpha || iswdigit. And iswdigit and iswxdigit are defined
   in a rather restrictive way that can be expressed by simple
   ranges instead of lookup tables. And isgraph is defined as
   isprint && ! isspace (which is trivial to check that it holds
   true for all the records provided by get-unicode-ctype, at
   least up to Unicode 11.0).
   So we have only 8 flags we actually need in a lookup... nicely
   reducing the storage requirement to an unsigned char.

   Another trick is to express toupper / tolower as offsets
   instead of absolute values, which will allow run-time-length
   compression of the data.
*/

struct output_record_t

tests/include/pdclib/auxiliary/uctype/main.c view on Meta::CPAN

        --index;

        if ( lookup_property( age, age->name[ index ], codepoint ) )
        {
            printf( "%s", age->name[ index ] );
            return;
        }
    }
}

static void print_additional_codepoint_info( size_t codepoint, struct unicode_record_t * ur )
{
    printf( " - %s",      ur->name );
    printf( " - %s",      ur->general_category );
    printf( " - %d",      ur->canonical_combining_class );
    printf( " - %s",      ur->bidi_class );
    printf( " - %s",      ( ur->decomposition ? ur->decomposition : "NULL" ) );
    printf( " - %d",      ur->numeric_type );
    printf( " - %d",      ur->numeric_digit );
    printf( " - %s",      ( ur->numeric_value ? ur->numeric_value : "NULL" ) );
    printf( " - %c",      ur->bidi_mirrored );

tests/include/pdclib/auxiliary/uctype/main.c view on Meta::CPAN

    printf( "%d", ( iswdigit( codepoint ) )  ? 1 : 0 );
    printf( "%d", ( iswblank( codepoint ) )  ? 1 : 0 );
    printf( "%d", ( iswspace( codepoint ) )  ? 1 : 0 );
    printf( "%d", ( iswcntrl( codepoint ) )  ? 1 : 0 );
    printf( "%d", ( iswxdigit( codepoint ) ) ? 1 : 0 );
    printf( "%d", ( iswgraph( codepoint ) )  ? 1 : 0 );
    printf( "%d", ( iswprint( codepoint ) )  ? 1 : 0 );
    printf( "%d", ( iswpunct( codepoint ) )  ? 1 : 0 );
}

static void print_codepoint_info( size_t codepoint, struct unicode_record_t * ur, struct derived_properties_t * core, struct derived_properties_t * age )
{
    int rc;
    int equal = 1;

    if ( codepoint % 20 == 0 )
    {
        printf( "   cp      up       low    UlA0_WCXGP.\n" );
    }

    printf( "U+%06zX ", codepoint );

tests/include/pdclib/auxiliary/uctype/main.c view on Meta::CPAN

    if ( ! equal )
    {
        printf( " ERROR: Deviation from SysLib: " );
        print_codepoint_age( codepoint, age );
        print_additional_codepoint_info( codepoint, ur );
    }

    printf( "\n" );
}
#else
static struct output_record_t get_output_record( size_t codepoint, struct unicode_record_t * ur, struct derived_properties_t * core )
{
    struct output_record_t rc;
    char buffer[ 9 ];

    rc.codepoint = codepoint;
    rc.toupper_diff = get_towupper( codepoint, ur ) - codepoint;
    rc.tolower_diff = get_towlower( codepoint, ur ) - codepoint;

    sprintf( buffer, "%zu%zu%zu%zu%zu%zu%zu%zu",
             get_iswupper( codepoint, ur, core ),

tests/include/pdclib/auxiliary/uctype/main.c view on Meta::CPAN

             get_iswpunct( codepoint, ur, core ) );

    rc.flags = strtoul( buffer, NULL, 2 );

    return rc;
}
#endif

int main( int argc, char * argv[] )
{
    struct unicode_data_t * ud;
    struct derived_properties_t * core;
#ifdef TEST
    struct derived_properties_t * age;
#endif

    char * locale = setlocale( LC_CTYPE, "" );

    if ( ! strstr( locale, "UTF-8" ) || strstr( locale, "TR" ) || strstr( locale, "tr" ) )
    {
        fprintf( stderr, "Need non-turkish locale to work correctly.\n'%s' will not do.\n", locale );

tests/include/pdclib/auxiliary/uctype/main.c view on Meta::CPAN

                "Usage: get-uctypes <UnicodeData.txt> <DerivedCoreProperties.txt>"
#ifdef TEST
                " <DerivedAge.txt>"
#endif
                "\n\n"
                "Generates lookup tables for <wctype.h> from files available from\n"
                "the Unicode Consortium.\n"
                "\n"
                "The required files can be retrieved from the following URL:\n"
                "\n"
                "http://www.unicode.org/Public/UCD/latest/ucd/\n"
                "\n" );
        return EXIT_FAILURE;
    }

    if ( ( ud = read_unicode_data( argv[ 1 ] ) ) != NULL )
    {
        if ( ( core = read_derived_properties( argv[ 2 ] ) ) != NULL )
        {
#ifndef TEST
            /* Print (to file) RLE compressed data */
            FILE * fh = fopen( "ctype.dat", "wb" );

            if ( fh )
            {
                size_t codepoint = 0;
                size_t i = 0;
                struct unicode_record_t * ur = &( ud->records[i] );
                /* Name substring indicating a code point _range_ */
                const char * last = ", Last>";

                struct output_record_t previous = get_output_record( codepoint, ur, core );

                fprintf( fh, "%zx ", previous.codepoint );

                for ( codepoint = 1; codepoint < 0x10fffe; ++codepoint )
                {
                    struct output_record_t current;

tests/include/pdclib/auxiliary/uctype/main.c view on Meta::CPAN

            if ( ( age = read_derived_properties( argv[ 3 ] ) ) != NULL )
            {
                /* Print (to screen) raw data comparing our results
                   to the system library.
                   Differences are often because the system library
                   uses older data, which is why we add the age to
                   the output.
                */
                size_t codepoint = 0;
                size_t i = 0;
                struct unicode_record_t * ur = &( ud->records[i] );
                /* Name substring indicating a code point _range_ */
                const char * last = ", Last>";

                for ( codepoint = 0; codepoint < 0x10fffe; ++codepoint )
                {
                    while ( codepoint > ur->code_point )
                    {
                        ur = &( ud->records[++i] );
                    }

tests/include/pdclib/auxiliary/uctype/main.c view on Meta::CPAN

                }

                release_derived_properties( age );
            }

#endif

            release_derived_properties( core );
        }

        release_unicode_data( ud );
    }

    return EXIT_SUCCESS;
}

tests/include/pdclib/auxiliary/uctype/uctype.c view on Meta::CPAN

/* uctype

   This file is part of the Public Domain C Library (PDCLib).
   Permission is granted to use, modify, and / or redistribute at will.
*/

#include "uctype.h"

size_t get_towupper( size_t codepoint, struct unicode_record_t * ur )
{
    return towupper_differs( ur, codepoint ) ? ur->simple_uppercase_mapping : codepoint;
}

size_t get_towlower( size_t codepoint, struct unicode_record_t * ur )
{
    return towlower_differs( ur, codepoint ) ? ur->simple_lowercase_mapping : codepoint;
}

size_t get_iswupper( size_t codepoint, struct unicode_record_t * ur, struct derived_properties_t * core )
{
    return towlower_differs( ur, codepoint ) || lookup_property( core, "Uppercase", codepoint );
}

size_t get_iswlower( size_t codepoint, struct unicode_record_t * ur, struct derived_properties_t * core )
{
    return towupper_differs( ur, codepoint ) || lookup_property( core, "Lowercase", codepoint );
}

size_t get_iswalpha( size_t codepoint, struct unicode_record_t * ur, struct derived_properties_t * core )
{
    return lookup_property( core, "Alphabetic", codepoint ) || ( is_general_category( ur, "Nd" ) && ! get_iswdigit( codepoint ) );
}

size_t get_iswdigit( size_t codepoint )
{
    return codepoint >= 0x0030 && codepoint <= 0x0039;
}

size_t get_iswxdigit( size_t codepoint )
{
    return get_iswdigit( codepoint ) || ( codepoint >= 0x0041 && codepoint <= 0x0046 ) || ( codepoint >= 0x0061 && codepoint <= 0x0066 );
}

size_t get_iswblank( size_t codepoint, struct unicode_record_t * ur )
{
    return ( codepoint == 0x0009 ) || ( is_general_category( ur, "Zs" ) && ! decomposition_contains( ur, "<noBreak>" ) );
}

size_t get_iswspace( size_t codepoint, struct unicode_record_t * ur )
{
    return is_general_category( ur, "Zl" ) || is_general_category( ur, "Zp" ) || ( is_general_category( ur, "Zs" ) && ! decomposition_contains( ur, "<noBreak>" ) ) || ( codepoint == 0x0020 ) || ( codepoint >= 0x0009 && codepoint <= 0x000D );
}

size_t get_iswcntrl( size_t codepoint, struct unicode_record_t * ur )
{
    return is_general_category( ur, "Zl" ) || is_general_category( ur, "Zp" ) || has_name( ur, "<control>" );
}

size_t get_iswgraph( size_t codepoint, struct unicode_record_t * ur )
{
    return ! is_general_category( ur, "Cs" ) && ! has_name( ur, "<control>" ) && ! get_iswspace( codepoint, ur );
}

size_t get_iswprint( size_t codepoint, struct unicode_record_t * ur )
{
    return ! is_general_category( ur, "Zp" ) && ! is_general_category( ur, "Zl" ) && ! is_general_category( ur, "Cs" ) && ! has_name( ur, "<control>" );
}

size_t get_iswpunct( size_t codepoint, struct unicode_record_t * ur, struct derived_properties_t * core )
{
    return ! get_iswalpha( codepoint, ur, core ) && ! get_iswdigit( codepoint ) && ( ! has_name( ur, "<control>" ) && ! get_iswspace( codepoint, ur ) ) && ! is_general_category( ur, "Cs" );
}

#ifdef TEST

#include "test.h"

int main( void )
{

tests/include/pdclib/auxiliary/uctype/uctype.h view on Meta::CPAN

/* uctype data

   This file is part of the Public Domain C Library (PDCLib).
   Permission is granted to use, modify, and / or redistribute at will.
*/

#ifndef UCTYPE
#define UCTYPE

#include "derived_properties.h"
#include "unicode_data.h"

size_t get_towupper( size_t codepoint, struct unicode_record_t * ur );
size_t get_towlower( size_t codepoint, struct unicode_record_t * ur );
size_t get_iswupper( size_t codepoint, struct unicode_record_t * ur, struct derived_properties_t * core );
size_t get_iswlower( size_t codepoint, struct unicode_record_t * ur, struct derived_properties_t * core );
size_t get_iswalpha( size_t codepoint, struct unicode_record_t * ur, struct derived_properties_t * core );
size_t get_iswdigit( size_t codepoint );
size_t get_iswxdigit( size_t codepoint );
size_t get_iswblank( size_t codepoint, struct unicode_record_t * ur );
size_t get_iswspace( size_t codepoint, struct unicode_record_t * ur );
size_t get_iswcntrl( size_t codepoint, struct unicode_record_t * ur );
size_t get_iswgraph( size_t codepoint, struct unicode_record_t * ur );
size_t get_iswprint( size_t codepoint, struct unicode_record_t * ur );
size_t get_iswpunct( size_t codepoint, struct unicode_record_t * ur, struct derived_properties_t * core );

#endif

tests/include/pdclib/auxiliary/uctype/unicode_data.c view on Meta::CPAN

/* unicode data

   This file is part of the Public Domain C Library (PDCLib).
   Permission is granted to use, modify, and / or redistribute at will.
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "text_utilities.h"

#include "unicode_data.h"

#define LINE_BUFFER_SIZE 500u

struct unicode_data_t * read_unicode_data( const char * filename )
{
    FILE * fh;
    char buffer[ LINE_BUFFER_SIZE ];
    struct unicode_data_t * ud = NULL;
    size_t lines;

    if ( ( fh = fopen( filename, "r" ) ) == NULL )
    {
        fprintf( stderr, "Could not open '%s' for reading.\n", filename );
        return NULL;
    }

    if ( ( lines = check_file( fh, LINE_BUFFER_SIZE, ';', sizeof( unicode_record_fields ) / sizeof( int ), unicode_record_fields ) ) != ( size_t )-1 )
    {
        if ( ( ud = malloc( sizeof( struct unicode_data_t ) ) ) )
        {
            ud->size = lines;

            if ( ( ud->records = calloc( lines, sizeof( struct unicode_record_t ) ) ) )
            {
                size_t i;

                for ( i = 0; i < lines; ++i )
                {
                    char * p;

                    fgets( buffer, LINE_BUFFER_SIZE, fh );
                    ud->records[ i ].code_point = strtoul( next_token( buffer, ';' ), NULL, 16 );
                    p = next_token( NULL, ';' );

tests/include/pdclib/auxiliary/uctype/unicode_data.c view on Meta::CPAN

        else
        {
            fprintf( stderr, "Memory allocation failure.\n" );
        }
    }

    fclose( fh );
    return ud;
}

int has_name( struct unicode_record_t * ur, const char * name )
{
    return strcmp( ur->name, name ) == 0;
}

int name_ends_with( struct unicode_record_t * ur, const char * name )
{
    return strstr( ur->name, name ) == ( ur->name + ( strlen( ur->name ) - strlen( name ) ) );
}

int is_general_category( struct unicode_record_t * ur, const char * category )
{
    return strcmp( ur->general_category, category ) == 0;
}

int decomposition_contains( struct unicode_record_t * ur, const char * substring )
{
    return ur->decomposition && strstr( ur->decomposition, substring ) != NULL;
}

int towupper_differs( struct unicode_record_t * ur, size_t codepoint )
{
    return ur->simple_uppercase_mapping && ( ur->simple_uppercase_mapping != codepoint );
}

int towlower_differs( struct unicode_record_t * ur, size_t codepoint )
{
    return ur->simple_lowercase_mapping && ( ur->simple_lowercase_mapping != codepoint );
}

void release_unicode_data( struct unicode_data_t * ud )
{
    size_t i;

    for ( i = 0; i < ud->size; ++i )
    {
        free( ud->records[i].name );
        free( ud->records[i].decomposition );
        free( ud->records[i].numeric_value );
    }

tests/include/pdclib/auxiliary/uctype/unicode_data.c view on Meta::CPAN


#ifdef TEST

#include "test.h"

#include <inttypes.h>

int main( void )
{
    FILE * fh = fopen( "test.txt", "w" );
    struct unicode_data_t * ud;
    int rc;

    TESTCASE( fh != NULL );
    TESTCASE( fprintf( fh, "%04x;%s;%s;%d;%s;;;;;%c;%s;;;;\n", 0, "<control>", "Cc", 0, "BN", 'N', "NULL" ) == 38 );
    TESTCASE( ( rc = fprintf( fh, "%04x;%s;%s;%d;%s;%s;;;%s;%c;;;%04x;;%04x\n", 0x2170, "SMALL ROMAN NUMERAL ONE", "Nl", 0, "L", "<compat> 0069", "1", 'N', 0x2160, 0x2160 ) ) == 69 );

    fclose( fh );
    ud = read_unicode_data( "test.txt" );
    remove( "test.txt" );

    TESTCASE( ud != NULL );
    TESTCASE( ud->size == 2 );

    TESTCASE( ud->records[0].code_point == 0 );
    TESTCASE( strcmp( ud->records[0].name, "<control>" ) == 0 );
    TESTCASE( strcmp( ud->records[0].general_category, "Cc" ) == 0 );
    TESTCASE( ud->records[0].canonical_combining_class == 0 );
    TESTCASE( strcmp( ud->records[0].bidi_class, "BN" ) == 0 );

tests/include/pdclib/auxiliary/uctype/unicode_data.c view on Meta::CPAN

    TESTCASE( ! is_general_category( &( ud->records[1] ), "Foo" ) );

    TESTCASE( decomposition_contains( &( ud->records[1] ), "<compat>" ) );
    TESTCASE( ! decomposition_contains( &( ud->records[1] ), "Foo" ) );

    TESTCASE( ! towupper_differs( &( ud->records[0] ), 0 ) );
    TESTCASE( ! towlower_differs( &( ud->records[0] ), 0 ) );
    TESTCASE( towupper_differs( &( ud->records[1] ), 0x2170 ) );
    TESTCASE( ! towlower_differs( &( ud->records[1] ), 0x2170 ) );

    release_unicode_data( ud );

    return TEST_RESULTS;
}

#endif

tests/include/pdclib/auxiliary/uctype/unicode_data.h view on Meta::CPAN

/* unicode data

   This file is part of the Public Domain C Library (PDCLib).
   Permission is granted to use, modify, and / or redistribute at will.
*/

#ifndef UNICODE_DATA
#define UNICODE_DATA UNICODE_DATA

#include <stddef.h>

/* https://www.unicode.org/reports/tr44/#UnicodeData.txt */

/* We do not need all these fields at this point, but we read them anyway
   so we do not need to change much should the need arise later.
*/
struct unicode_record_t
{
    size_t code_point;
    char * name;
    char general_category[ 3 ];
    int canonical_combining_class;
    char bidi_class[ 4 ];
    char * decomposition;
    int numeric_type;
    int numeric_digit;
    char * numeric_value;
    char bidi_mirrored;
    /*char * unicode_1_name;*/ /* Obsolete as of 6.2.0 */
    /*char * iso_comment;*/ /* ObsoÃ¶ete as of 5.2.0 */
    size_t simple_uppercase_mapping;
    size_t simple_lowercase_mapping;
    size_t simple_titlecase_mapping;
};

struct unicode_data_t
{
    size_t size;
    struct unicode_record_t * records;
};

/* The assumed field widths, for use with check_file(). */
static const int unicode_record_fields[] =
{
    -1, /* code_point */
    -1, /* name */
     3, /* general_category */
    -1, /* canonical_combining_class */
     4, /* bidi_class */
    -1, /* decomposition */
    -1, /* numeric_type */
    -1, /* numeric_digit */
    -1, /* numeric_value */
     2, /* bidi_mirrored */
    -1, /* unicode_1_name */
    -1, /* iso_comment */
    -1, /* simple_uppercase_mapping */
    -1, /* simple_lowercase_mapping */
    -1  /* simple_titlecase_mapping */
};

struct unicode_data_t * read_unicode_data( const char * filename );

int has_name( struct unicode_record_t * ur, const char * name );

int name_ends_with( struct unicode_record_t * ur, const char * name );

int is_general_category( struct unicode_record_t * ur, const char * category );

int decomposition_contains( struct unicode_record_t * ur, const char * substring );

int towupper_differs( struct unicode_record_t * ur, size_t codepoint );

int towlower_differs( struct unicode_record_t * ur, size_t codepoint );

void release_unicode_data( struct unicode_data_t * ud );

#endif

ucpp/README view on Meta::CPAN

to enforce C90 compatibility (it will, however, still recognize some
constructions that are not in plain C90).

ucpp also knows about several extensions to C99:

-- Assertions: this is an extension to the defined() operator, with
   its own namespace. Assertions seem to be used in several places,
   therefore ucpp knows about them. It is recommended to enable
   assertions by default on Solaris systems.
-- Unicode: the C99 norm specifies that extended characters, from
   the ISO-10646 charset (aka "unicode") can be used in identifiers
   with the notations \u and \U. ucpp also accepts (with the proper
   flag) the UTF-8 encoding in the source file for such characters.
-- #include_next directive: it works as a #include, but will look
   for files only in the directories specified in the include path
   after the one the current file was found. This is a GNU-ism that
   is useful for writing transparent wrappers around header files.

Assertions and unicode are activated by specific flags; the #include_next
support is always active.

The ucpp code itself should be compatible with any ISO-C90 compiler.
The cpp.c file is rather big (~ 64kB), it might confuse old 16-bit C
compilers; the macro.c file is somewhat large also (~ 47kB).

The evaluation of #if expressions is subject to some subtleties, see the
section "cross-compilation".

The lexer code makes no assumption about the source character set, but

( run in 0.397 second using v1.01-cache-2.11-cpan-88abd93f124 )