Geo-libpostal
view release on metacpan or search on metacpan
libpostal.xs view on Meta::CPAN
#define PERL_NO_GET_CONTEXT
#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"
#include <string.h>
#include <libpostal/libpostal.h>
short LP_SETUP = 0,
LP_SETUP_LANGCLASS = 0,
LP_SETUP_PARSER = 0;
MODULE = Geo::libpostal PACKAGE = Geo::libpostal PREFIX = lp_
PROTOTYPES: ENABLED
void
lp__teardown()
PPCODE:
if (LP_SETUP) {
libpostal_teardown();
LP_SETUP = 0;
}
if (LP_SETUP_LANGCLASS) {
libpostal_teardown_language_classifier();
LP_SETUP_LANGCLASS = 0;
}
if (LP_SETUP_PARSER) {
libpostal_teardown_parser();
LP_SETUP_PARSER = 0;
}
/* return undef */
EXTEND(SP, 1);
PUSHs(sv_newmortal());
void
lp_expand_address(address, ...)
SV *address
PREINIT:
char *src, *option_name;
size_t src_len, option_len, i, j, num_expansions, num_langs, exp_len, lang_len, components;
AV *languages_av;
SV **lang;
char **languages = NULL;
PPCODE:
/* lazy load libpostal */
if (!LP_SETUP) {
if (!libpostal_setup()) {
croak("libpostal_setup() failed");
}
LP_SETUP = 1;
}
if (!LP_SETUP_LANGCLASS) {
if(!libpostal_setup_language_classifier()) {
croak("libpostal_setup_language_classifier failed");
}
LP_SETUP_LANGCLASS = 1;
}
/* call fetch() if a tied variable to populate the sv */
SvGETMAGIC(address);
/* check for undef */
if (!SvOK(address) || !SvCUR(address))
{
croak("expand_address() requires a scalar argument to expand!");
}
/* copy the sv without the magic struct and populate src_len*/
src = SvPV_nomg(address, src_len);
libpostal_normalize_options_t options = libpostal_get_default_options();
/* parse optional args */
if (((items - 1) % 2) != 0)
croak("Odd number of options in call to expand_address()");
for (i = 1; i < items; i += 2) {
if (!SvOK(ST(i)) || !SvCUR(ST(i)))
croak("expand_address() option names cannot be empty");
SvGETMAGIC(ST(i));
option_name = SvPV_nomg(ST(i), option_len);
SvGETMAGIC(ST(i+1));
/* process arrayref of lang codes option */
if (!strncmp("languages", option_name, option_len)) {
/* check its an arrayref */
if (!SvROK(ST(i+1)) || SvTYPE(SvRV(ST(i+1))) != SVt_PVAV)
croak("expand_address() languages option must be an arrayref");
/* dereference the arrayref */
languages_av = (AV*)SvRV(ST(i+1));
/* av_len returns the highest index, not the length */
num_langs = av_len(languages_av) + 1;
languages = malloc(sizeof(char *) * num_langs);
/* loop through the array assigning the languages */
for (j = 0; j < num_langs; j++) {
lang = av_fetch(languages_av, j, 0);
/* must check for null pointers */
libpostal.xs view on Meta::CPAN
else if (!strncmp("delete_numeric_hyphens", option_name, option_len)) {
options.delete_numeric_hyphens = SvTRUE(ST(i+1));
}
else if (!strncmp("split_alpha_from_numeric", option_name, option_len)) {
options.split_alpha_from_numeric = SvTRUE(ST(i+1));
}
else if (!strncmp("replace_word_hyphens", option_name, option_len)) {
options.replace_word_hyphens = SvTRUE(ST(i+1));
}
else if (!strncmp("delete_word_hyphens", option_name, option_len)) {
options.delete_word_hyphens = SvTRUE(ST(i+1));
}
else if (!strncmp("delete_final_periods", option_name, option_len)) {
options.delete_final_periods = SvTRUE(ST(i+1));
}
else if (!strncmp("delete_acronym_periods", option_name, option_len)) {
options.delete_acronym_periods = SvTRUE(ST(i+1));
}
else if (!strncmp("drop_english_possessives", option_name, option_len)) {
options.drop_english_possessives = SvTRUE(ST(i+1));
}
else if (!strncmp("delete_apostrophes", option_name, option_len)) {
options.delete_apostrophes = SvTRUE(ST(i+1));
}
else if (!strncmp("expand_numex", option_name, option_len)) {
options.expand_numex = SvTRUE(ST(i+1));
}
else if (!strncmp("roman_numerals", option_name, option_len)) {
options.roman_numerals = SvTRUE(ST(i+1));
}
else {
croak("Unrecognised parameter: '%"SVf"'", ST(i));
}
}
char **expansions = libpostal_expand_address(src, options, &num_expansions);
/* extend stack pointer with num of return values */
EXTEND(SP, num_expansions);
/* push return values onto stack pointer */
for (i = 0; i < num_expansions; i++) {
exp_len = strlen(expansions[i]);
PUSHs( sv_2mortal(newSVpvn(expansions[i], exp_len)) );
}
/* Free data */
if (languages != NULL) {
for (i = 0; i < num_langs; i++) {
free(languages[i]);
}
free(languages);
}
libpostal_expansion_array_destroy(expansions, num_expansions);
void
lp_parse_address(address, ...)
SV *address
PREINIT:
char *src, *option_name;
size_t src_len, option_len, i, label_len, component_len;
PPCODE:
/* lazy load libpostal */
if (!LP_SETUP) {
if (!libpostal_setup()) {
croak("libpostal_setup() failed");
}
LP_SETUP = 1;
}
if (!LP_SETUP_PARSER) {
if(!libpostal_setup_parser()) {
croak("libpostal_setup_parser() failed");
}
LP_SETUP_PARSER = 1;
}
/* call fetch() if a tied variable to populate the sv */
SvGETMAGIC(address);
/* check for undef */
if (!SvOK(address) || !SvCUR(address))
{
croak("parse_address() requires a scalar argument to parse!");
}
/* copy the sv without the magic struct and populate src_len*/
src = SvPV_nomg(address, src_len);
libpostal_address_parser_options_t options = libpostal_get_address_parser_default_options();
/* parse optional args
* N.B. These are ignored by libpostal
* */
if (((items - 1) % 2) != 0)
croak("Odd number of options in call to parse_address()");
for (i = 1; i < items; i += 2) {
if (!SvOK(ST(i)))
croak("parse_address() option names cannot be undef");
SvGETMAGIC(ST(i));
option_name = SvPV_nomg(ST(i), option_len);
SvGETMAGIC(ST(i+1));
if (option_len && !strncmp("language", option_name, option_len)) {
options.language = SvPV_nomg(ST(i), option_len);
}
else if (option_len && !strncmp("country", option_name, option_len)) {
options.country = SvPV_nomg(ST(i), option_len);
}
else {
croak("Unrecognised parameter: '%"SVf"'", ST(i));
}
}
libpostal_address_parser_response_t *parsed = libpostal_parse_address(src, options);
/* extend stack pointer with num of return values */
EXTEND(SP, parsed->num_components * 2);
/* push return values onto stack pointer */
( run in 0.955 second using v1.01-cache-2.11-cpan-5511b514fd6 )