Algorithm-AhoCorasick-XS

 view release on metacpan or  search on metacpan

Matcher.hpp  view on Meta::CPAN

#include "Trie.hpp"
#include <vector>
#include <string>

using std::vector;
using std::string;

namespace AhoCorasick {
    struct match {
        string keyword;
        size_t start;
        size_t end;
    };

    class Matcher {
        vector<string> words;
        Trie *root;

        public:

        Matcher(const vector<string> & keywords);

README.md  view on Meta::CPAN

# NAME

Algorithm::AhoCorasick::XS - fast Aho-Corasick multiple string matcher

# SYNOPSIS

    # *** EARLY RELEASE - API SUBJECT TO CHANGE ***

    my $ac = Algorithm::AhoCorasick::XS->new([qw(he she hers his)]);
    for my $match ($ac->match_details("ahishers")) {
        printf "Word %s appears from %d to %d\n", $match->{word}, $match->{start}, $match->{end}; 
    }
    # Outputs:
    # Word his appears from 1 to 3
    # Word she appears from 3 to 5
    # Word he appears from 4 to 5
    # Word hers appears from 4 to 7

    # I only care about the words matched
    my @words = $ac->matches($input);    # or unique_matches to remove duplicates

README.md  view on Meta::CPAN

    As above but runs `uniq` on the list for you.

- first\_match ( INPUT )

    Returns the first match only (or undef if none). This is efficient - the matcher
    will stop once it encounters the first match, and the rest of the string will be
    ignored.

- match\_details ( INPUT )

    Returns a list of hashrefs, containing the keys `word`, `start` and `end`.
    These correspond to an occurence of a substring - the word, start and end offset
    within the string.

# ENCODING SUPPORT

The matcher runs at the byte level, so you can use any encoding you like. If you
want to match strings regardless of encoding, I recommend that you encode everything
into UTF-8 and apply NFC normalization (or perhaps NFD).

## Passing Unicode strings

If you pass Unicode strings to the matcher, they will be interpreted as a sequence
of UTF-8 bytes. This means the output of `matches`, `match_details` etc. will also
be in terms of bytes.

You can simply call ` decode('UTF-8', ...) ` on the substrings to get their
Unicode versions. The offsets will be in bytes though; converting them to character
offsets in the Unicode string is a little more tricky:

    use Encode qw(decode);
    my $unicode_start = length(decode('UTF-8', bytes::substr($string, 0, $start)));
    my $unicode_end   = $start + length(decode('UTF-8', $word)) - 1;

This will be handled for you in a future version.

# CAVEATS

This is an early release and has not been tested thoroughly, use at your own risk.
The API is subject to change until version 1.0.

If your keyword list contains duplicates, you will get duplicate matches.

lib/Algorithm/AhoCorasick/XS.pm  view on Meta::CPAN

=head1 NAME

Algorithm::AhoCorasick::XS - fast Aho-Corasick multiple string matcher

=head1 SYNOPSIS

 # *** EARLY RELEASE - API SUBJECT TO CHANGE ***

 my $ac = Algorithm::AhoCorasick::XS->new([qw(he she hers his)]);
 for my $match ($ac->match_details("ahishers")) {
     printf "Word %s appears from %d to %d\n", $match->{word}, $match->{start}, $match->{end}; 
 }
 # Outputs:
 # Word his appears from 1 to 3
 # Word she appears from 3 to 5
 # Word he appears from 4 to 5
 # Word hers appears from 4 to 7

 # I only care about the words matched
 my @words = $ac->matches($input);    # or unique_matches to remove duplicates

lib/Algorithm/AhoCorasick/XS.pm  view on Meta::CPAN

As above but runs C<uniq> on the list for you.

=item first_match ( INPUT )

Returns the first match only (or undef if none). This is efficient - the matcher
will stop once it encounters the first match, and the rest of the string will be
ignored.

=item match_details ( INPUT )

Returns a list of hashrefs, containing the keys C<word>, C<start> and C<end>.
These correspond to an occurence of a substring - the word, start and end offset
within the string.

=back

=head1 ENCODING SUPPORT

The matcher runs at the byte level, so you can use any encoding you like. If you
want to match strings regardless of encoding, I recommend that you encode everything
into UTF-8 and apply NFC normalization (or perhaps NFD).

lib/Algorithm/AhoCorasick/XS.pm  view on Meta::CPAN


If you pass Unicode strings to the matcher, they will be interpreted as a sequence
of UTF-8 bytes. This means the output of C<matches>, C<match_details> etc. will also
be in terms of bytes.

You can simply call C< decode('UTF-8', ...) > on the substrings to get their
Unicode versions. The offsets will be in bytes though; converting them to character
offsets in the Unicode string is a little more tricky:

 use Encode qw(decode);
 my $unicode_start = length(decode('UTF-8', bytes::substr($string, 0, $start)));
 my $unicode_end   = $start + length(decode('UTF-8', $word)) - 1;

This will be handled for you in a future version.

=head1 CAVEATS

This is an early release and has not been tested thoroughly, use at your own risk.
The API is subject to change until version 1.0.

If your keyword list contains duplicates, you will get duplicate matches.

ppport.h  view on Meta::CPAN

_invlist_len|||n
_invlist_populate_swatch|||n
_invlist_search|||n
_invlist_subtract|||
_invlist_union_maybe_complement_2nd|||
_invlist_union|||
_is_cur_LC_category_utf8|||
_is_in_locale_category||5.021001|
_is_uni_FOO||5.017008|
_is_uni_perl_idcont||5.017008|
_is_uni_perl_idstart||5.017007|
_is_utf8_FOO||5.017008|
_is_utf8_char_slow||5.021001|n
_is_utf8_idcont||5.021001|
_is_utf8_idstart||5.021001|
_is_utf8_mark||5.017008|
_is_utf8_perl_idcont||5.017008|
_is_utf8_perl_idstart||5.017007|
_is_utf8_xidcont||5.021001|
_is_utf8_xidstart||5.021001|
_load_PL_utf8_foldclosures|||
_make_exactf_invlist|||
_new_invlist_C_array|||
_new_invlist|||
_pMY_CXT|5.007003||p
_setlocale_debug_string|||n
_setup_canned_invlist|||
_swash_inversion_hash|||
_swash_to_invlist|||
_to_fold_latin1|||

ppport.h  view on Meta::CPAN

av_unshift|||
ax|||n
backup_one_LB|||
backup_one_SB|||
backup_one_WB|||
bad_type_gv|||
bad_type_pv|||
bind_match|||
block_end||5.004000|
block_gimme||5.004000|
block_start||5.004000|
blockhook_register||5.013003|
boolSV|5.004000||p
boot_core_PerlIO|||
boot_core_UNIVERSAL|||
boot_core_mro|||
bytes_cmp_utf8||5.013007|
bytes_from_utf8||5.007001|
bytes_to_utf8||5.006001|
cBOOL|5.013000||p
call_argv|5.006000||p

ppport.h  view on Meta::CPAN

dXSTARG|5.006000||p
deb_curcv|||
deb_nocontext|||vn
deb_stack_all|||
deb_stack_n|||
debop||5.005000|
debprofdump||5.005000|
debprof|||
debstackptrs||5.007003|
debstack||5.007003|
debug_start_match|||
deb||5.007003|v
defelem_target|||
del_sv|||
delete_eval_scope|||
delimcpy||5.004000|n
deprecate_commaless_var_list|||
despatch_signals||5.007001|
destroy_matcher|||
die_nocontext|||vn
die_sv||5.013001|

ppport.h  view on Meta::CPAN

leave_adjust_stacks||5.023008|
leave_scope|||
lex_bufutf8||5.011002|
lex_discard_to||5.011002|
lex_grow_linestr||5.011002|
lex_next_chunk||5.011002|
lex_peek_unichar||5.011002|
lex_read_space||5.011002|
lex_read_to||5.011002|
lex_read_unichar||5.011002|
lex_start||5.009005|
lex_stuff_pvn||5.011002|
lex_stuff_pvs||5.013005|
lex_stuff_pv||5.013006|
lex_stuff_sv||5.011002|
lex_unstuff||5.011002|
listkids|||
list|||
load_module_nocontext|||vn
load_module|5.006000||pv
localize|||

ppport.h  view on Meta::CPAN

package|||
packlist||5.008001|
pad_add_anon||5.008001|
pad_add_name_pvn||5.015001|
pad_add_name_pvs||5.015001|
pad_add_name_pv||5.015001|
pad_add_name_sv||5.015001|
pad_add_weakref|||
pad_alloc_name|||
pad_alloc|||
pad_block_start|||
pad_check_dup|||
pad_compname_type||5.009003|
pad_findlex|||
pad_findmy_pvn||5.015001|
pad_findmy_pvs||5.015001|
pad_findmy_pv||5.015001|
pad_findmy_sv||5.015001|
pad_fixup_inner_anons|||
pad_free|||
pad_leavemy|||

ppport.h  view on Meta::CPAN

pv_uni_display||5.007003|
qerror|||
qsortsvu|||
quadmath_format_needed|||n
quadmath_format_single|||n
re_compile||5.009005|
re_croak2|||
re_dup_guts|||
re_exec_indentf|||v
re_indentf|||v
re_intuit_start||5.019001|
re_intuit_string||5.006000|
re_op_compile|||
re_printf|||v
realloc||5.007002|n
reentrant_free||5.024000|
reentrant_init||5.024000|
reentrant_retry||5.024000|vn
reentrant_size||5.024000|
ref_array_or_hash|||
refcounted_he_chain_2hv|||

ppport.h  view on Meta::CPAN

ssc_clear_locale|||n
ssc_cp_and|||
ssc_finalize|||
ssc_init|||
ssc_intersection|||
ssc_is_anything|||n
ssc_is_cp_posixl_init|||n
ssc_or|||
ssc_union|||
stack_grow|||
start_glob|||
start_subparse||5.004000|
stdize_locale|||
strEQ|||
strGE|||
strGT|||
strLE|||
strLT|||
strNE|||
str_to_version||5.006000|
strip_return|||
strnEQ|||
strnNE|||
study_chunk|||
sub_crush_depth|||
sublex_done|||
sublex_push|||
sublex_start|||
sv_2bool_flags||5.013006|
sv_2bool|||
sv_2cv|||
sv_2io|||
sv_2iuv_common|||
sv_2iuv_non_preserve|||
sv_2iv_flags||5.009001|
sv_2iv|||
sv_2mortal|||
sv_2num|||

ppport.h  view on Meta::CPAN

if (exists $opt{'api-info'}) {
  my $f;
  my $count = 0;
  my $match = $opt{'api-info'} =~ m!^/(.*)/$! ? $1 : "^\Q$opt{'api-info'}\E\$";
  for $f (sort { lc $a cmp lc $b } keys %API) {
    next unless $f =~ /$match/;
    print "\n=== $f ===\n\n";
    my $info = 0;
    if ($API{$f}{base} || $API{$f}{todo}) {
      my $base = format_version($API{$f}{base} || $API{$f}{todo});
      print "Supported at least starting from perl-$base.\n";
      $info++;
    }
    if ($API{$f}{provided}) {
      my $todo = $API{$f}{todo} ? format_version($API{$f}{todo}) : "5.003";
      print "Support by $ppport provided back to perl-$todo.\n";
      print "Support needs to be explicitly requested by NEED_$f.\n" if exists $need{$f};
      print "Depends on: ", join(', ', @{$depends{$f}}), ".\n" if exists $depends{$f};
      print "\n$hints{$f}" if exists $hints{$f};
      print "\nWARNING:\n$warnings{$f}" if exists $warnings{$f};
      $info++;

ppport.h  view on Meta::CPAN

            imop = append_elem(OP_LIST, imop, newSVOP(OP_CONST, 0, sv));
            sv = va_arg(*args, SV*);
        }
    }
    {
        const line_t ocopline = PL_copline;
        COP * const ocurcop = PL_curcop;
        const int oexpect = PL_expect;

#if (PERL_BCDVERSION >= 0x5004000)
        utilize(!(flags & PERL_LOADMOD_DENY), start_subparse(FALSE, 0),
                veop, modname, imop);
#elif (PERL_BCDVERSION > 0x5003000)
        utilize(!(flags & PERL_LOADMOD_DENY), start_subparse(),
                veop, modname, imop);
#else
        utilize(!(flags & PERL_LOADMOD_DENY), start_subparse(),
                modname, imop);
#endif
        PL_expect = oexpect;
        PL_copline = ocopline;
        PL_curcop = ocurcop;
    }
}

#endif
#endif

ppport.h  view on Meta::CPAN

#endif
#define load_module DPPP_(my_load_module)
#define Perl_load_module DPPP_(my_load_module)

#if defined(NEED_load_module) || defined(NEED_load_module_GLOBAL)

void
DPPP_(my_load_module)(U32 flags, SV *name, SV *ver, ...)
{
    va_list args;
    va_start(args, ver);
    vload_module(flags, name, ver, &args);
    va_end(args);
}

#endif
#endif
#ifndef newRV_inc
#  define newRV_inc(sv)                  newRV(sv)   /* Replace */
#endif

ppport.h  view on Meta::CPAN

  return rv;
}
#endif
#endif

/* Hint: newCONSTSUB
 * Returns a CV* as of perl-5.7.1. This return value is not supported
 * by Devel::PPPort.
 */

/* newCONSTSUB from IO.xs is in the core starting with 5.004_63 */
#if (PERL_BCDVERSION < 0x5004063) && (PERL_BCDVERSION != 0x5004005)
#if defined(NEED_newCONSTSUB)
static void DPPP_(my_newCONSTSUB)(HV *stash, const char *name, SV *sv);
static
#else
extern void DPPP_(my_newCONSTSUB)(HV *stash, const char *name, SV *sv);
#endif

#ifdef newCONSTSUB
#  undef newCONSTSUB

ppport.h  view on Meta::CPAN

        line_t oldline = PL_curcop->cop_line;
        PL_curcop->cop_line = D_PPP_PL_copline;

        PL_hints &= ~HINT_BLOCK_SCOPE;
        if (stash)
                PL_curstash = PL_curcop->cop_stash = stash;

        newSUB(

#if   (PERL_BCDVERSION < 0x5003022)
                start_subparse(),
#elif (PERL_BCDVERSION == 0x5003022)
                start_subparse(0),
#else  /* 5.003_23  onwards */
                start_subparse(FALSE, 0),
#endif

                newSVOP(OP_CONST, 0, newSVpv((char *) name, 0)),
                newSVOP(OP_CONST, 0, &PL_sv_no),   /* SvPV(&PL_sv_no) == "" -- GMB */
                newSTATEOP(0, Nullch, newSVOP(OP_CONST, 0, sv))
        );

        PL_hints = oldhints;
        PL_curcop->cop_stash = old_cop_stash;
        PL_curstash = old_curstash;

ppport.h  view on Meta::CPAN

#endif

#define Perl_sv_catpvf_mg DPPP_(my_sv_catpvf_mg)

#if defined(NEED_sv_catpvf_mg) || defined(NEED_sv_catpvf_mg_GLOBAL)

void
DPPP_(my_sv_catpvf_mg)(pTHX_ SV *sv, const char *pat, ...)
{
  va_list args;
  va_start(args, pat);
  sv_vcatpvfn(sv, pat, strlen(pat), &args, Null(SV**), 0, Null(bool*));
  SvSETMAGIC(sv);
  va_end(args);
}

#endif
#endif

#ifdef PERL_IMPLICIT_CONTEXT
#if (PERL_BCDVERSION >= 0x5004000) && !defined(sv_catpvf_mg_nocontext)

ppport.h  view on Meta::CPAN

#define sv_catpvf_mg_nocontext DPPP_(my_sv_catpvf_mg_nocontext)
#define Perl_sv_catpvf_mg_nocontext DPPP_(my_sv_catpvf_mg_nocontext)

#if defined(NEED_sv_catpvf_mg_nocontext) || defined(NEED_sv_catpvf_mg_nocontext_GLOBAL)

void
DPPP_(my_sv_catpvf_mg_nocontext)(SV *sv, const char *pat, ...)
{
  dTHX;
  va_list args;
  va_start(args, pat);
  sv_vcatpvfn(sv, pat, strlen(pat), &args, Null(SV**), 0, Null(bool*));
  SvSETMAGIC(sv);
  va_end(args);
}

#endif
#endif
#endif

/* sv_catpvf_mg depends on sv_catpvf_mg_nocontext */

ppport.h  view on Meta::CPAN

#endif

#define Perl_sv_setpvf_mg DPPP_(my_sv_setpvf_mg)

#if defined(NEED_sv_setpvf_mg) || defined(NEED_sv_setpvf_mg_GLOBAL)

void
DPPP_(my_sv_setpvf_mg)(pTHX_ SV *sv, const char *pat, ...)
{
  va_list args;
  va_start(args, pat);
  sv_vsetpvfn(sv, pat, strlen(pat), &args, Null(SV**), 0, Null(bool*));
  SvSETMAGIC(sv);
  va_end(args);
}

#endif
#endif

#ifdef PERL_IMPLICIT_CONTEXT
#if (PERL_BCDVERSION >= 0x5004000) && !defined(sv_setpvf_mg_nocontext)

ppport.h  view on Meta::CPAN

#define sv_setpvf_mg_nocontext DPPP_(my_sv_setpvf_mg_nocontext)
#define Perl_sv_setpvf_mg_nocontext DPPP_(my_sv_setpvf_mg_nocontext)

#if defined(NEED_sv_setpvf_mg_nocontext) || defined(NEED_sv_setpvf_mg_nocontext_GLOBAL)

void
DPPP_(my_sv_setpvf_mg_nocontext)(SV *sv, const char *pat, ...)
{
  dTHX;
  va_list args;
  va_start(args, pat);
  sv_vsetpvfn(sv, pat, strlen(pat), &args, Null(SV**), 0, Null(bool*));
  SvSETMAGIC(sv);
  va_end(args);
}

#endif
#endif
#endif

/* sv_setpvf_mg depends on sv_setpvf_mg_nocontext */

ppport.h  view on Meta::CPAN

#if defined(NEED_warner) || defined(NEED_warner_GLOBAL)

void
DPPP_(my_warner)(U32 err, const char *pat, ...)
{
  SV *sv;
  va_list args;

  PERL_UNUSED_ARG(err);

  va_start(args, pat);
  sv = vnewSVpvf(pat, &args);
  va_end(args);
  sv_2mortal(sv);
  warn("%s", SvPV_nolen(sv));
}

#define warner  Perl_warner

#define Perl_warner_nocontext  Perl_warner

ppport.h  view on Meta::CPAN

#  define CopSTASH_eq(c,hv)              (CopSTASH(c) == (hv))
#endif

#endif /* USE_ITHREADS */

#if (PERL_BCDVERSION >= 0x5006000)
#ifndef caller_cx

# if defined(NEED_caller_cx) || defined(NEED_caller_cx_GLOBAL)
static I32
DPPP_dopoptosub_at(const PERL_CONTEXT *cxstk, I32 startingblock)
{
    I32 i;

    for (i = startingblock; i >= 0; i--) {
	register const PERL_CONTEXT * const cx = &cxstk[i];
	switch (CxTYPE(cx)) {
	default:
	    continue;
	case CXt_EVAL:
	case CXt_SUB:
	case CXt_FORMAT:
	    return i;
	}
    }

ppport.h  view on Meta::CPAN

#endif

/*
 * The grok_* routines have been modified to use warn() instead of
 * Perl_warner(). Also, 'hexdigit' was the former name of PL_hexdigit,
 * which is why the stack variable has been renamed to 'xdigit'.
 */

#ifndef grok_bin
#if defined(NEED_grok_bin)
static UV DPPP_(my_grok_bin)(pTHX_ const char * start, STRLEN * len_p, I32 * flags, NV * result);
static
#else
extern UV DPPP_(my_grok_bin)(pTHX_ const char * start, STRLEN * len_p, I32 * flags, NV * result);
#endif

#ifdef grok_bin
#  undef grok_bin
#endif
#define grok_bin(a,b,c,d) DPPP_(my_grok_bin)(aTHX_ a,b,c,d)
#define Perl_grok_bin DPPP_(my_grok_bin)

#if defined(NEED_grok_bin) || defined(NEED_grok_bin_GLOBAL)
UV
DPPP_(my_grok_bin)(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
{
    const char *s = start;
    STRLEN len = *len_p;
    UV value = 0;
    NV value_nv = 0;

    const UV max_div_2 = UV_MAX / 2;
    bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES;
    bool overflowed = FALSE;

    if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
        /* strip off leading b or 0b.

ppport.h  view on Meta::CPAN

        break;
    }

    if (   ( overflowed && value_nv > 4294967295.0)
#if UVSIZE > 4
        || (!overflowed && value > 0xffffffff  )
#endif
        ) {
        warn("Binary number > 0b11111111111111111111111111111111 non-portable");
    }
    *len_p = s - start;
    if (!overflowed) {
        *flags = 0;
        return value;
    }
    *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
    if (result)
        *result = value_nv;
    return UV_MAX;
}
#endif
#endif

#ifndef grok_hex
#if defined(NEED_grok_hex)
static UV DPPP_(my_grok_hex)(pTHX_ const char * start, STRLEN * len_p, I32 * flags, NV * result);
static
#else
extern UV DPPP_(my_grok_hex)(pTHX_ const char * start, STRLEN * len_p, I32 * flags, NV * result);
#endif

#ifdef grok_hex
#  undef grok_hex
#endif
#define grok_hex(a,b,c,d) DPPP_(my_grok_hex)(aTHX_ a,b,c,d)
#define Perl_grok_hex DPPP_(my_grok_hex)

#if defined(NEED_grok_hex) || defined(NEED_grok_hex_GLOBAL)
UV
DPPP_(my_grok_hex)(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
{
    const char *s = start;
    STRLEN len = *len_p;
    UV value = 0;
    NV value_nv = 0;

    const UV max_div_16 = UV_MAX / 16;
    bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES;
    bool overflowed = FALSE;
    const char *xdigit;

    if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {

ppport.h  view on Meta::CPAN

        break;
    }

    if (   ( overflowed && value_nv > 4294967295.0)
#if UVSIZE > 4
        || (!overflowed && value > 0xffffffff  )
#endif
        ) {
        warn("Hexadecimal number > 0xffffffff non-portable");
    }
    *len_p = s - start;
    if (!overflowed) {
        *flags = 0;
        return value;
    }
    *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
    if (result)
        *result = value_nv;
    return UV_MAX;
}
#endif
#endif

#ifndef grok_oct
#if defined(NEED_grok_oct)
static UV DPPP_(my_grok_oct)(pTHX_ const char * start, STRLEN * len_p, I32 * flags, NV * result);
static
#else
extern UV DPPP_(my_grok_oct)(pTHX_ const char * start, STRLEN * len_p, I32 * flags, NV * result);
#endif

#ifdef grok_oct
#  undef grok_oct
#endif
#define grok_oct(a,b,c,d) DPPP_(my_grok_oct)(aTHX_ a,b,c,d)
#define Perl_grok_oct DPPP_(my_grok_oct)

#if defined(NEED_grok_oct) || defined(NEED_grok_oct_GLOBAL)
UV
DPPP_(my_grok_oct)(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
{
    const char *s = start;
    STRLEN len = *len_p;
    UV value = 0;
    NV value_nv = 0;

    const UV max_div_8 = UV_MAX / 8;
    bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES;
    bool overflowed = FALSE;

    for (; len-- && *s; s++) {
         /* gcc 2.95 optimiser not smart enough to figure that this subtraction

ppport.h  view on Meta::CPAN

        break;
    }

    if (   ( overflowed && value_nv > 4294967295.0)
#if UVSIZE > 4
        || (!overflowed && value > 0xffffffff  )
#endif
        ) {
        warn("Octal number > 037777777777 non-portable");
    }
    *len_p = s - start;
    if (!overflowed) {
        *flags = 0;
        return value;
    }
    *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
    if (result)
        *result = value_nv;
    return UV_MAX;
}
#endif

ppport.h  view on Meta::CPAN

#define Perl_my_snprintf DPPP_(my_my_snprintf)

#if defined(NEED_my_snprintf) || defined(NEED_my_snprintf_GLOBAL)

int
DPPP_(my_my_snprintf)(char *buffer, const Size_t len, const char *format, ...)
{
    dTHX;
    int retval;
    va_list ap;
    va_start(ap, format);
#ifdef HAS_VSNPRINTF
    retval = vsnprintf(buffer, len, format, ap);
#else
    retval = vsprintf(buffer, format, ap);
#endif
    va_end(ap);
    if (retval < 0 || (len > 0 && (Size_t)retval >= len))
        Perl_croak(aTHX_ "panic: my_snprintf buffer overflow");
    return retval;
}

ppport.h  view on Meta::CPAN


#define my_sprintf DPPP_(my_my_sprintf)
#define Perl_my_sprintf DPPP_(my_my_sprintf)

#if defined(NEED_my_sprintf) || defined(NEED_my_sprintf_GLOBAL)

int
DPPP_(my_my_sprintf)(char *buffer, const char* pat, ...)
{
    va_list args;
    va_start(args, pat);
    vsprintf(buffer, pat, args);
    va_end(args);
    return strlen(buffer);
}

#endif
#endif

#ifdef NO_XSLOCKS
#  ifdef dJMPENV

ppport.h  view on Meta::CPAN

    if (escaped != NULL)
        *escaped= pv - str;
    return SvPVX(dsv);
}

#endif
#endif

#ifndef pv_pretty
#if defined(NEED_pv_pretty)
static char * DPPP_(my_pv_pretty)(pTHX_ SV * dsv, char const * const str, const STRLEN count, const STRLEN max, char const * const start_color, char const * const end_color, const U32 flags);
static
#else
extern char * DPPP_(my_pv_pretty)(pTHX_ SV * dsv, char const * const str, const STRLEN count, const STRLEN max, char const * const start_color, char const * const end_color, const U32 flags);
#endif

#ifdef pv_pretty
#  undef pv_pretty
#endif
#define pv_pretty(a,b,c,d,e,f,g) DPPP_(my_pv_pretty)(aTHX_ a,b,c,d,e,f,g)
#define Perl_pv_pretty DPPP_(my_pv_pretty)

#if defined(NEED_pv_pretty) || defined(NEED_pv_pretty_GLOBAL)

char *
DPPP_(my_pv_pretty)(pTHX_ SV *dsv, char const * const str, const STRLEN count,
  const STRLEN max, char const * const start_color, char const * const end_color,
  const U32 flags)
{
    const U8 dq = (flags & PERL_PV_PRETTY_QUOTE) ? '"' : '%';
    STRLEN escaped;

    if (!(flags & PERL_PV_PRETTY_NOCLEAR))
        sv_setpvs(dsv, "");

    if (dq == '"')
        sv_catpvs(dsv, "\"");
    else if (flags & PERL_PV_PRETTY_LTGT)
        sv_catpvs(dsv, "<");

    if (start_color != NULL)
        sv_catpv(dsv, D_PPP_CONSTPV_ARG(start_color));

    pv_escape(dsv, str, count, max, &escaped, flags | PERL_PV_ESCAPE_NOCLEAR);

    if (end_color != NULL)
        sv_catpv(dsv, D_PPP_CONSTPV_ARG(end_color));

    if (dq == '"')
        sv_catpvs(dsv, "\"");
    else if (flags & PERL_PV_PRETTY_LTGT)
        sv_catpvs(dsv, ">");

t/details.t  view on Meta::CPAN

use strict;
use warnings;
use Test::More 0.96;
use Algorithm::AhoCorasick::XS;

subtest details => sub {
    my $ac = Algorithm::AhoCorasick::XS->new([qw(he she hers his)]);
    my @details = $ac->match_details("ahishers");

    is( $details[0]{word}, "his" );
    is( $details[0]{start}, 1 );
    is( $details[0]{end}, 3 );

    is( $details[1]{word}, "she" );
    is( $details[1]{start}, 3 );
    is( $details[1]{end}, 5 );

    is( $details[2]{word}, "he" );
    is( $details[2]{start}, 4 );
    is( $details[2]{end}, 5 );

    is( $details[3]{word}, "hers" );
    is( $details[3]{start}, 4 );
    is( $details[3]{end}, 7 );
};

subtest empty => sub {
    my $ac = Algorithm::AhoCorasick::XS->new([qw(he she hers his)]);
    my @details = $ac->match_details("xyxyxyxyxyxyxy");
    is_deeply( \@details, [] );
};

done_testing;

t/utf8.t  view on Meta::CPAN

my $matcher = Algorithm::AhoCorasick::XS->new(\@words);
my @found = $matcher->matches($string);

is( scalar @found, 2 );
is( decode('UTF-8', $found[0]), "明日" );
is( decode('UTF-8', $found[1]), "なれる" );

my @details = $matcher->match_details($string);
is( scalar @details, 2 );
for my $d (@details) {
    is( $d->{word}, bytes::substr($string, $d->{start}, $d->{end} - $d->{start} + 1) );
}

done_testing;

typemap  view on Meta::CPAN

    EXTEND(SP, extend_size);
    for (ix_$var = 0; ix_$var < size_$var; ix_$var++) {
        // Build a hashref from the match object
        const AhoCorasick::match &m = ${var}[ix_$var];
        HV *hv = newHV();

        STRLEN klen = m.keyword.length();

        // 7=strlen("keyword"), 0=tell hv_store to calculate the hash
        hv_store(hv, "word", 4, newSVpv(m.keyword.c_str(), klen), 0);
        hv_store(hv, "start", 5, newSViv(m.start), 0);
        hv_store(hv, "end", 3, newSViv(m.end), 0);

        ST(ix_$var) = newRV_noinc((SV *)hv);
    }
    XSRETURN(size_$var);
  }



( run in 1.053 second using v1.01-cache-2.11-cpan-0d8aa00de5b )