HTML5-DOM
view release on metacpan or search on metacpan
=item *
L<detectBomAndCut|/detectBomAndCut>
=item *
L<detect|/detect>
=back
Returns array with encoding id and new text without BOM, if success.
If fail, then encoding id equal HTML5::DOM::Encoding->NOT_DETERMINED.
my ($encoding_id, $new_text) = HTML5::DOM::Encoding::detectAuto("ололо");
my $encoding = HTML5::DOM::Encoding::id2name($encoding_id);
print $encoding; # UTF-8
=head3 detect
my $encoding_id = HTML5::DOM::Encoding::detect($text, $max_length = 0);
my $encoding_id = HTML5::DOM::Encoding::detectByPrescanStream('
<meta http-equiv="content-type" content="text/html; charset=windows-1251">
');
my $encoding = HTML5::DOM::Encoding::id2name($encoding_id);
print $encoding; # WINDOWS-1251
=head3 detectBomAndCut
my ($encoding_id, $new_text) = HTML5::DOM::Encoding::detectBomAndCut($text, $max_length = 0);
Returns array with encoding id and new text without BOM.
If fail, then encoding id equal HTML5::DOM::Encoding->NOT_DETERMINED.
my ($encoding_id, $new_text) = HTML5::DOM::Encoding::detectBomAndCut("\xEF\xBB\xBFололо");
my $encoding = HTML5::DOM::Encoding::id2name($encoding_id);
print $encoding; # UTF-8
print $new_text; # ололо
=head1 NAMESPACES
See L<detectByPrescanStream|/detectByPrescanStream>.
=head4 encoding_prescan_limit
Limit string length to determine encoding by C<E<lt>metaE<gt>> tags. (default 1024, from spec)
See L<detectByPrescanStream|/detectByPrescanStream>.
=head4 encoding_use_bom
Allow use detecding BOM to determine input HTML encoding. (default 1)
See L<detectBomAndCut|/detectBomAndCut>.
=head4 utf8
Default: C<"auto">
If 1, then all returned strings have utf8 flag (chars).
If 0, then all returned strings haven't utf8 flag (bytes).
lib/HTML5/DOM.pod view on Meta::CPAN
=item *
L<detectBomAndCut|/detectBomAndCut>
=item *
L<detect|/detect>
=back
Returns array with encoding id and new text without BOM, if success.
If fail, then encoding id equal HTML5::DOM::Encoding->NOT_DETERMINED.
my ($encoding_id, $new_text) = HTML5::DOM::Encoding::detectAuto("ололо");
my $encoding = HTML5::DOM::Encoding::id2name($encoding_id);
print $encoding; # UTF-8
=head3 detect
my $encoding_id = HTML5::DOM::Encoding::detect($text, $max_length = 0);
lib/HTML5/DOM.pod view on Meta::CPAN
my $encoding_id = HTML5::DOM::Encoding::detectByPrescanStream('
<meta http-equiv="content-type" content="text/html; charset=windows-1251">
');
my $encoding = HTML5::DOM::Encoding::id2name($encoding_id);
print $encoding; # WINDOWS-1251
=head3 detectBomAndCut
my ($encoding_id, $new_text) = HTML5::DOM::Encoding::detectBomAndCut($text, $max_length = 0);
Returns array with encoding id and new text without BOM.
If fail, then encoding id equal HTML5::DOM::Encoding->NOT_DETERMINED.
my ($encoding_id, $new_text) = HTML5::DOM::Encoding::detectBomAndCut("\xEF\xBB\xBFололо");
my $encoding = HTML5::DOM::Encoding::id2name($encoding_id);
print $encoding; # UTF-8
print $new_text; # ололо
=head1 NAMESPACES
lib/HTML5/DOM.pod view on Meta::CPAN
See L<detectByPrescanStream|/detectByPrescanStream>.
=head4 encoding_prescan_limit
Limit string length to determine encoding by C<E<lt>metaE<gt>> tags. (default 1024, from spec)
See L<detectByPrescanStream|/detectByPrescanStream>.
=head4 encoding_use_bom
Allow use detecding BOM to determine input HTML encoding. (default 1)
See L<detectBomAndCut|/detectBomAndCut>.
=head4 utf8
Default: C<"auto">
If 1, then all returned strings have utf8 flag (chars).
If 0, then all returned strings haven't utf8 flag (bytes).
third_party/modest/include/myhtml/api.h view on Meta::CPAN
* @param[in] text
* @param[in] text length
* @param[out] detected encoding
*
* @return true if encoding found, otherwise false
*/
bool
myencoding_detect_unicode(const char *text, size_t length, myencoding_t *encoding);
/**
* Detect Unicode character encoding by BOM
*
* Now available for detect UTF-8, UTF-16LE, UTF-16BE
*
* @param[in] text
* @param[in] text length
* @param[out] detected encoding
*
* @return true if encoding found, otherwise false
*/
bool
myencoding_detect_bom(const char *text, size_t length, myencoding_t *encoding);
/**
* Detect Unicode character encoding by BOM. Cut BOM if will be found
*
* Now available for detect UTF-8, UTF-16LE, UTF-16BE
*
* @param[in] text
* @param[in] text length
* @param[out] detected encoding
* @param[out] new text position
* @param[out] new size position
*
* @return true if encoding found, otherwise false
third_party/modest/source/myhtml/api.h view on Meta::CPAN
* @param[in] text
* @param[in] text length
* @param[out] detected encoding
*
* @return true if encoding found, otherwise false
*/
bool
myencoding_detect_unicode(const char *text, size_t length, myencoding_t *encoding);
/**
* Detect Unicode character encoding by BOM
*
* Now available for detect UTF-8, UTF-16LE, UTF-16BE
*
* @param[in] text
* @param[in] text length
* @param[out] detected encoding
*
* @return true if encoding found, otherwise false
*/
bool
myencoding_detect_bom(const char *text, size_t length, myencoding_t *encoding);
/**
* Detect Unicode character encoding by BOM. Cut BOM if will be found
*
* Now available for detect UTF-8, UTF-16LE, UTF-16BE
*
* @param[in] text
* @param[in] text length
* @param[out] detected encoding
* @param[out] new text position
* @param[out] new size position
*
* @return true if encoding found, otherwise false
if (opts->encoding == MyENCODING_AUTO) {
encoding = MyENCODING_NOT_DETERMINED;
if (*html_length) {
// Search encoding in meta-tags
if (opts->encoding_use_meta) {
size_t size = opts->encoding_prescan_limit < *html_length ? opts->encoding_prescan_limit : *html_length;
encoding = myencoding_prescan_stream_to_determine_encoding(*html_str, size);
}
if (encoding == MyENCODING_NOT_DETERMINED) {
// Check BOM
if (!opts->encoding_use_bom || !myencoding_detect_and_cut_bom(*html_str, *html_length, &encoding, html_str, html_length)) {
// Check heuristic
if (!myencoding_detect(*html_str, *html_length, &encoding)) {
// Can't determine encoding, use default
encoding = opts->default_encoding;
}
}
}
} else {
encoding = opts->default_encoding;
( run in 0.301 second using v1.01-cache-2.11-cpan-e9daa2b36ef )