HTML5-DOM

 view release on metacpan or  search on metacpan

third_party/modest/include/myhtml/api.h  view on Meta::CPAN


/**
 * Convert Unicode Codepoint to UTF-16LE
 *
 * I advise not to use UTF-16! Use UTF-8 and be happy!
 *
 * @param[in] Codepoint
 * @param[in] Data to set characters. Data length is 2 or 4 bytes
 *   data length must be always available 4 bytes
 *
 * @return size character set
 */
size_t
myencoding_codepoint_to_ascii_utf_16(size_t codepoint, char *data);

/**
 * Detect character encoding
 *
 * Now available for detect UTF-8, UTF-16LE, UTF-16BE
 * and Russians: windows-1251,  koi8-r, iso-8859-5, x-mac-cyrillic, ibm866
 * Other in progress
 *
 * @param[in]  text
 * @param[in]  text length
 * @param[out] detected encoding
 *
 * @return true if encoding found, otherwise false
 */
bool
myencoding_detect(const char *text, size_t length, myencoding_t *encoding);

/**
 * Detect Russian character encoding
 *
 * Now available for detect windows-1251,  koi8-r, iso-8859-5, x-mac-cyrillic, ibm866
 *
 * @param[in]  text
 * @param[in]  text length
 * @param[out] detected encoding
 *
 * @return true if encoding found, otherwise false
 */
bool
myencoding_detect_russian(const char *text, size_t length, myencoding_t *encoding);

/**
 * Detect Unicode character encoding
 *
 * Now available for detect UTF-8, UTF-16LE, UTF-16BE
 *
 * @param[in]  text
 * @param[in]  text length
 * @param[out] detected encoding
 *
 * @return true if encoding found, otherwise false
 */
bool
myencoding_detect_unicode(const char *text, size_t length, myencoding_t *encoding);

/**
 * Detect Unicode character encoding by BOM
 *
 * Now available for detect UTF-8, UTF-16LE, UTF-16BE
 *
 * @param[in]  text
 * @param[in]  text length
 * @param[out] detected encoding
 *
 * @return true if encoding found, otherwise false
 */
bool
myencoding_detect_bom(const char *text, size_t length, myencoding_t *encoding);

/**
 * Detect Unicode character encoding by BOM. Cut BOM if will be found
 *
 * Now available for detect UTF-8, UTF-16LE, UTF-16BE
 *
 * @param[in]  text
 * @param[in]  text length
 * @param[out] detected encoding
 * @param[out] new text position
 * @param[out] new size position
 *
 * @return true if encoding found, otherwise false
 */
bool
myencoding_detect_and_cut_bom(const char *text, size_t length, myencoding_t *encoding,
                                   const char **new_text, size_t *new_size);

/**
 * Detect encoding by name
 * Names like: windows-1258 return MyENCODING_WINDOWS_1258
 *             cp1251 or windows-1251 return MyENCODING_WINDOWS_1251
 *
 * See https://encoding.spec.whatwg.org/#names-and-labels
 *
 * @param[in]  name
 * @param[in]  name length
 * @param[out] detected encoding
 *
 * @return true if encoding found, otherwise false
 */
bool
myencoding_by_name(const char *name, size_t length, myencoding_t *encoding);

/**
 * Get Encoding name by myencoding_t (by id)
 *
 * @param[in]  myencoding_t, encoding id
 * @param[out] return name length
 *
 * @return encoding name, otherwise NULL value
 */
const char*
myencoding_name_by_id(myencoding_t encoding, size_t *length);

/**
 * Detect encoding in meta tag (<meta ...>) before start parsing
 *
 * See https://html.spec.whatwg.org/multipage/syntax.html#prescan-a-byte-stream-to-determine-its-encoding
 *
 * @param[in]  html data bytes
 * @param[in]  html data length
 *
 * @return detected encoding if encoding found, otherwise MyENCODING_NOT_DETERMINED
 */
myencoding_t
myencoding_prescan_stream_to_determine_encoding(const char *data, size_t data_size);

/**
 * Extracting character encoding from string. Find "charset=" and see encoding. 
 * For example: "text/html; charset=windows-1251". Return MyENCODING_WINDOWS_1251
 *
 *



( run in 1.379 second using v1.01-cache-2.11-cpan-524268b4103 )