LibYAML-FFI
view release on metacpan or search on metacpan
ffi/reader.c view on Meta::CPAN
(octet & 0xF0) == 0xE0 ? octet & 0x0F :
(octet & 0xF8) == 0xF0 ? octet & 0x07 : 0;
/* Check and decode the trailing octets. */
for (k = 1; k < width; k ++)
{
octet = parser->raw_buffer.pointer[k];
/* Check if the octet is valid. */
if ((octet & 0xC0) != 0x80)
return yaml_parser_set_reader_error(parser,
"invalid trailing UTF-8 octet",
parser->offset+k, octet);
/* Decode the octet. */
value = (value << 6) + (octet & 0x3F);
}
/* Check the length of the sequence against the value. */
if (!((width == 1) ||
(width == 2 && value >= 0x80) ||
(width == 3 && value >= 0x800) ||
(width == 4 && value >= 0x10000)))
return yaml_parser_set_reader_error(parser,
"invalid length of a UTF-8 sequence",
parser->offset, -1);
/* Check the range of the value. */
if ((value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF)
return yaml_parser_set_reader_error(parser,
"invalid Unicode character",
parser->offset, value);
break;
case YAML_UTF16LE_ENCODING:
case YAML_UTF16BE_ENCODING:
low = (parser->encoding == YAML_UTF16LE_ENCODING ? 0 : 1);
high = (parser->encoding == YAML_UTF16LE_ENCODING ? 1 : 0);
/*
* The UTF-16 encoding is not as simple as one might
* naively think. Check RFC 2781
* (http://www.ietf.org/rfc/rfc2781.txt).
*
* Normally, two subsequent bytes describe a Unicode
* character. However a special technique (called a
* surrogate pair) is used for specifying character
* values larger than 0xFFFF.
*
* A surrogate pair consists of two pseudo-characters:
* high surrogate area (0xD800-0xDBFF)
* low surrogate area (0xDC00-0xDFFF)
*
* The following formulas are used for decoding
* and encoding characters using surrogate pairs:
*
* U = U' + 0x10000 (0x01 00 00 <= U <= 0x10 FF FF)
* U' = yyyyyyyyyyxxxxxxxxxx (0 <= U' <= 0x0F FF FF)
* W1 = 110110yyyyyyyyyy
* W2 = 110111xxxxxxxxxx
*
* where U is the character value, W1 is the high surrogate
* area, W2 is the low surrogate area.
*/
/* Check for incomplete UTF-16 character. */
if (raw_unread < 2) {
if (parser->eof) {
return yaml_parser_set_reader_error(parser,
"incomplete UTF-16 character",
parser->offset, -1);
}
incomplete = 1;
break;
}
/* Get the character. */
value = parser->raw_buffer.pointer[low]
+ (parser->raw_buffer.pointer[high] << 8);
/* Check for unexpected low surrogate area. */
if ((value & 0xFC00) == 0xDC00)
return yaml_parser_set_reader_error(parser,
"unexpected low surrogate area",
parser->offset, value);
/* Check for a high surrogate area. */
if ((value & 0xFC00) == 0xD800) {
width = 4;
/* Check for incomplete surrogate pair. */
if (raw_unread < 4) {
if (parser->eof) {
return yaml_parser_set_reader_error(parser,
"incomplete UTF-16 surrogate pair",
parser->offset, -1);
}
incomplete = 1;
break;
}
/* Get the next character. */
value2 = parser->raw_buffer.pointer[low+2]
+ (parser->raw_buffer.pointer[high+2] << 8);
/* Check for a low surrogate area. */
( run in 0.437 second using v1.01-cache-2.11-cpan-df04353d9ac )