Lingua-Identify-CLD2
view release on metacpan or search on metacpan
xsp/CLD2.xsp view on Meta::CPAN
#include <compact_lang_det.h>
#include "myinit.h"
#include "myconversions.h"
%module{Lingua::Identify::CLD2};
%package{Lingua::Identify::CLD2};
%{
void
ExtDetectLanguage(buffer, is_plain_text, flags, content_language_hint, tld_hint, encoding_hint, language_hint, returnVectors)
SV *buffer
bool is_plain_text
int flags
char* content_language_hint
char* tld_hint
int encoding_hint
SV* language_hint
bool returnVectors
PPCODE:
char *buffer_str;
STRLEN buffer_length;
buffer_str = SvPVutf8(buffer, buffer_length);
CLDHints cld_hints_struct;
cld_hints_struct.content_language_hint = content_language_hint && content_language_hint[0] ? content_language_hint : NULL;
cld_hints_struct.tld_hint = tld_hint && tld_hint[0] ? tld_hint : NULL;
cld_hints_struct.encoding_hint = encoding_hint;
cld_hints_struct.language_hint = scalar_to_language(language_hint);
Language language3[3] = {UNKNOWN_LANGUAGE, UNKNOWN_LANGUAGE, UNKNOWN_LANGUAGE};
int percent3[3] = {0, 0, 0};
double normalized_score3[3] = {0.0, 0.0, 0.0};
ResultChunkVector resultchunkvector;
ResultChunkVector* p_resultchunkvector = returnVectors ? &resultchunkvector : 0;
int text_bytes = 0;
bool is_reliable = 0;
int valid_prefix_bytes = 0;
Language language =
ExtDetectLanguageSummaryCheckUTF8(buffer_str, (int)buffer_length, is_plain_text, &cld_hints_struct, flags,
language3, percent3, normalized_score3, p_resultchunkvector,
&text_bytes, &is_reliable, &valid_prefix_bytes);
if (valid_prefix_bytes < buffer_length) {
croak("input contains invalid UTF-8 around byte %d of %d", valid_prefix_bytes, buffer_length);
}
HV *hv = newHV();
SV *rv = sv_2mortal(newRV_noinc((SV *)hv));
hv_stores(hv, "language_name", newSVpv(CLD2::LanguageName(language), 0));
hv_stores(hv, "language_code", newSVpv(CLD2::LanguageCode(language), 0));
if (returnVectors) {
AV *reschunk_av = resultchunk_vector_to_array(aTHX_ resultchunkvector);
hv_stores(hv, "resultchunkvector", newRV_noinc((SV *)reschunk_av));
}
hv_stores(hv, "text_bytes", newSViv(text_bytes));
hv_stores(hv, "is_reliable", newSViv((int)is_reliable));
hv_stores(hv, "languages", newRV_noinc((SV*)languages_to_array(language3, percent3, normalized_score3)));
XPUSHs(rv);
char *
LanguageName(int language_id)
ALIAS:
LanguageName = 0
LanguageCode = 1
LanguageDeclaredName = 2
CODE:
switch (ix) {
case 0:
RETVAL = (char *)CLD2::LanguageName((CLD2::Language)language_id);
break;
case 1:
RETVAL = (char *)CLD2::LanguageCode((CLD2::Language)language_id);
( run in 0.969 second using v1.01-cache-2.11-cpan-71847e10f99 )