HTML5-DOM

 view release on metacpan or  search on metacpan

README.pod  view on Meta::CPAN

=item *

L<detectBomAndCut|/detectBomAndCut>

=item *

L<detect|/detect>

=back

Returns array with encoding id and new text without BOM, if success. 

If fail, then encoding id equal HTML5::DOM::Encoding->NOT_DETERMINED.

 my ($encoding_id, $new_text) = HTML5::DOM::Encoding::detectAuto("ололо");
 my $encoding = HTML5::DOM::Encoding::id2name($encoding_id);
 print $encoding; # UTF-8

=head3 detect

 my $encoding_id = HTML5::DOM::Encoding::detect($text, $max_length = 0);

README.pod  view on Meta::CPAN

 my $encoding_id = HTML5::DOM::Encoding::detectByPrescanStream('
    <meta http-equiv="content-type" content="text/html; charset=windows-1251">
 ');
 my $encoding = HTML5::DOM::Encoding::id2name($encoding_id);
 print $encoding; # WINDOWS-1251

=head3 detectBomAndCut

 my ($encoding_id, $new_text) = HTML5::DOM::Encoding::detectBomAndCut($text, $max_length = 0);

Returns array with encoding id and new text without BOM. 

If fail, then encoding id equal HTML5::DOM::Encoding->NOT_DETERMINED.

 my ($encoding_id, $new_text) = HTML5::DOM::Encoding::detectBomAndCut("\xEF\xBB\xBFололо");
 my $encoding = HTML5::DOM::Encoding::id2name($encoding_id);
 print $encoding; # UTF-8
 print $new_text; # ололо

=head1 NAMESPACES

README.pod  view on Meta::CPAN

See L<detectByPrescanStream|/detectByPrescanStream>.

=head4 encoding_prescan_limit

Limit string length to determine encoding by C<E<lt>metaE<gt>> tags. (default 1024, from spec)

See L<detectByPrescanStream|/detectByPrescanStream>.

=head4 encoding_use_bom

Allow use detecding BOM to determine input HTML encoding. (default 1)

See L<detectBomAndCut|/detectBomAndCut>.

=head4 utf8

Default: C<"auto">

If 1, then all returned strings have utf8 flag (chars).

If 0, then all returned strings haven't utf8 flag (bytes).

lib/HTML5/DOM.pod  view on Meta::CPAN

=item *

L<detectBomAndCut|/detectBomAndCut>

=item *

L<detect|/detect>

=back

Returns array with encoding id and new text without BOM, if success. 

If fail, then encoding id equal HTML5::DOM::Encoding->NOT_DETERMINED.

 my ($encoding_id, $new_text) = HTML5::DOM::Encoding::detectAuto("ололо");
 my $encoding = HTML5::DOM::Encoding::id2name($encoding_id);
 print $encoding; # UTF-8

=head3 detect

 my $encoding_id = HTML5::DOM::Encoding::detect($text, $max_length = 0);

lib/HTML5/DOM.pod  view on Meta::CPAN

 my $encoding_id = HTML5::DOM::Encoding::detectByPrescanStream('
    <meta http-equiv="content-type" content="text/html; charset=windows-1251">
 ');
 my $encoding = HTML5::DOM::Encoding::id2name($encoding_id);
 print $encoding; # WINDOWS-1251

=head3 detectBomAndCut

 my ($encoding_id, $new_text) = HTML5::DOM::Encoding::detectBomAndCut($text, $max_length = 0);

Returns array with encoding id and new text without BOM. 

If fail, then encoding id equal HTML5::DOM::Encoding->NOT_DETERMINED.

 my ($encoding_id, $new_text) = HTML5::DOM::Encoding::detectBomAndCut("\xEF\xBB\xBFололо");
 my $encoding = HTML5::DOM::Encoding::id2name($encoding_id);
 print $encoding; # UTF-8
 print $new_text; # ололо

=head1 NAMESPACES

lib/HTML5/DOM.pod  view on Meta::CPAN

See L<detectByPrescanStream|/detectByPrescanStream>.

=head4 encoding_prescan_limit

Limit string length to determine encoding by C<E<lt>metaE<gt>> tags. (default 1024, from spec)

See L<detectByPrescanStream|/detectByPrescanStream>.

=head4 encoding_use_bom

Allow use detecding BOM to determine input HTML encoding. (default 1)

See L<detectBomAndCut|/detectBomAndCut>.

=head4 utf8

Default: C<"auto">

If 1, then all returned strings have utf8 flag (chars).

If 0, then all returned strings haven't utf8 flag (bytes).

third_party/modest/include/myhtml/api.h  view on Meta::CPAN

 * @param[in]  text
 * @param[in]  text length
 * @param[out] detected encoding
 *
 * @return true if encoding found, otherwise false
 */
bool
myencoding_detect_unicode(const char *text, size_t length, myencoding_t *encoding);

/**
 * Detect Unicode character encoding by BOM
 *
 * Now available for detect UTF-8, UTF-16LE, UTF-16BE
 *
 * @param[in]  text
 * @param[in]  text length
 * @param[out] detected encoding
 *
 * @return true if encoding found, otherwise false
 */
bool
myencoding_detect_bom(const char *text, size_t length, myencoding_t *encoding);

/**
 * Detect Unicode character encoding by BOM. Cut BOM if will be found
 *
 * Now available for detect UTF-8, UTF-16LE, UTF-16BE
 *
 * @param[in]  text
 * @param[in]  text length
 * @param[out] detected encoding
 * @param[out] new text position
 * @param[out] new size position
 *
 * @return true if encoding found, otherwise false

third_party/modest/source/myhtml/api.h  view on Meta::CPAN

 * @param[in]  text
 * @param[in]  text length
 * @param[out] detected encoding
 *
 * @return true if encoding found, otherwise false
 */
bool
myencoding_detect_unicode(const char *text, size_t length, myencoding_t *encoding);

/**
 * Detect Unicode character encoding by BOM
 *
 * Now available for detect UTF-8, UTF-16LE, UTF-16BE
 *
 * @param[in]  text
 * @param[in]  text length
 * @param[out] detected encoding
 *
 * @return true if encoding found, otherwise false
 */
bool
myencoding_detect_bom(const char *text, size_t length, myencoding_t *encoding);

/**
 * Detect Unicode character encoding by BOM. Cut BOM if will be found
 *
 * Now available for detect UTF-8, UTF-16LE, UTF-16BE
 *
 * @param[in]  text
 * @param[in]  text length
 * @param[out] detected encoding
 * @param[out] new text position
 * @param[out] new size position
 *
 * @return true if encoding found, otherwise false

utils.c  view on Meta::CPAN

	if (opts->encoding == MyENCODING_AUTO) {
		encoding = MyENCODING_NOT_DETERMINED;
		if (*html_length) {
			// Search encoding in meta-tags
			if (opts->encoding_use_meta) {
				size_t size = opts->encoding_prescan_limit < *html_length ? opts->encoding_prescan_limit : *html_length;
				encoding = myencoding_prescan_stream_to_determine_encoding(*html_str, size);
			}
			
			if (encoding == MyENCODING_NOT_DETERMINED) {
				// Check BOM
				if (!opts->encoding_use_bom || !myencoding_detect_and_cut_bom(*html_str, *html_length, &encoding, html_str, html_length)) {
					// Check heuristic
					if (!myencoding_detect(*html_str, *html_length, &encoding)) {
						// Can't determine encoding, use default
						encoding = opts->default_encoding;
					}
				}
			}
		} else {
			encoding = opts->default_encoding;



( run in 0.301 second using v1.01-cache-2.11-cpan-e9daa2b36ef )