view release on metacpan or search on metacpan
BhkENABLE|||x
BhkENTRY_set|||x
BhkENTRY|||xi
BhkFLAGS|||xi
BIN|5.003007|5.003007|
bind_match|5.003007||Viu
block_end|5.004000|5.004000|
block_gimme|5.004000|5.004000|u
blockhook_register|5.013003|5.013003|x
block_start|5.004000|5.004000|
BOM_UTF8|5.025005|5.003007|p
boolSV|5.004000|5.003007|p
boot_core_mro|5.009005||Viu
boot_core_PerlIO|5.007002||Viu
boot_core_UNIVERSAL|5.003007||Viu
_byte_dump_string|5.025006||cViu
BYTEORDER|5.003007|5.003007|
bytes_cmp_utf8|5.013007|5.013007|
bytes_from_utf8|5.007001|5.007001|x
bytes_from_utf8_loc|5.027001|5.027001|nxu
bytes_to_utf8|5.006001|5.006001|x
#ifndef isUTF8_CHAR
# define isUTF8_CHAR(s, e) ( \
(e) <= (s) || ! is_utf8_string(s, UTF8_SAFE_SKIP(s, e)) \
? 0 \
: UTF8SKIP(s))
#endif
#endif
#if 'A' == 65
#ifndef BOM_UTF8
# define BOM_UTF8 "\xEF\xBB\xBF"
#endif
#ifndef REPLACEMENT_CHARACTER_UTF8
# define REPLACEMENT_CHARACTER_UTF8 "\xEF\xBF\xBD"
#endif
#elif '^' == 95
#ifndef BOM_UTF8
# define BOM_UTF8 "\xDD\x73\x66\x73"
#endif
#ifndef REPLACEMENT_CHARACTER_UTF8
# define REPLACEMENT_CHARACTER_UTF8 "\xDD\x73\x73\x71"
#endif
#elif '^' == 176
#ifndef BOM_UTF8
# define BOM_UTF8 "\xDD\x72\x65\x72"
#endif
#ifndef REPLACEMENT_CHARACTER_UTF8
# define REPLACEMENT_CHARACTER_UTF8 "\xDD\x72\x72\x70"
#endif
#else
# error Unknown character set
#endif
tidy-html5.c view on Meta::CPAN
TidyDocImpl* doc;
};
TY_PRIVATE StreamIn* TY_(initStreamIn)( TidyDocImpl* doc, int encoding );
TY_PRIVATE void TY_(freeStreamIn)(StreamIn* in);
TY_PRIVATE StreamIn* TY_(FileInput)( TidyDocImpl* doc, FILE* fp, int encoding );
TY_PRIVATE StreamIn* TY_(BufferInput)( TidyDocImpl* doc, TidyBuffer* content, int encoding );
TY_PRIVATE StreamIn* TY_(UserInput)( TidyDocImpl* doc, TidyInputSource* source, int encoding );
TY_PRIVATE int TY_(ReadBOMEncoding)(StreamIn *in);
TY_PRIVATE unsigned int TY_(ReadChar)( StreamIn* in );
TY_PRIVATE void TY_(UngetChar)( unsigned int c, StreamIn* in );
TY_PRIVATE Bool TY_(IsEOF)( StreamIn* in );
/************************
** Sink
************************/
struct _StreamOut
tidy-html5.c view on Meta::CPAN
TY_PRIVATE StreamOut* TY_(FileOutput)( TidyDocImpl *doc, FILE* fp, int encoding, unsigned int newln );
TY_PRIVATE StreamOut* TY_(BufferOutput)( TidyDocImpl *doc, TidyBuffer* buf, int encoding, unsigned int newln );
TY_PRIVATE StreamOut* TY_(UserOutput)( TidyDocImpl *doc, TidyOutputSink* sink, int encoding, unsigned int newln );
TY_PRIVATE StreamOut* TY_(StdErrOutput)(void);
/* StreamOut* StdOutOutput(void); */
TY_PRIVATE void TY_(ReleaseStreamOut)( TidyDocImpl *doc, StreamOut* out );
TY_PRIVATE void TY_(WriteChar)( unsigned int c, StreamOut* out );
TY_PRIVATE void TY_(outBOM)( StreamOut *out );
TY_PRIVATE ctmbstr TY_(GetEncodingNameFromTidyId)(unsigned int id);
TY_PRIVATE ctmbstr TY_(GetEncodingOptNameFromTidyId)(unsigned int id);
TY_PRIVATE int TY_(GetCharEncodingFromOptName)(ctmbstr charenc);
/************************
** Misc
************************/
/* character encodings
tidy-html5.c view on Meta::CPAN
"Normalerweise schreibt Tidy die Ausgabe auf die Standard-Ausgabe <code>stdout</code>."
},
{/* Important notes for translators:
- Use only <code></code>, <var></var>, <em></em>, <strong></strong>, and
<br/>.
- Entities, tags, attributes, etc., should be enclosed in <code></code>.
- Option values should be enclosed in <var></var>.
- It's very important that <br/> be self-closing!
- The strings "Tidy" and "HTML Tidy" are the program name and must not
be translated. */
TidyOutputBOM, 0,
"Diese Option bestimmt, ob Tidy eine Byte-Reihenfolge-Markierung (BOM) an den Anfang der Ausgabe schreiben soll. "
"Dies betrifft nur UTF-8 und UTF-16 Ausgabekodierungen, wo das BOM-Zeichen entweder den Wert U+EFBBBF (UTF-8) oder "
"U+FEFF (UTF-16) hat. "
"<br/>"
"Wenn die Option den Wert <var>auto</var> hat, wird Tidy die BOM nur dann in die Ausgabe schreiben, "
"wenn sie bereits zu Beginn der Eingabedaten vorhanden war. "
"<br/>"
"XML/XMLHTML-Ausgaben in UTF-16 Kodierung erhalten immer eine BOM. "
},
{/* Important notes for translators:
- Use only <code></code>, <var></var>, <em></em>, <strong></strong>, and
<br/>.
- Entities, tags, attributes, etc., should be enclosed in <code></code>.
- Option values should be enclosed in <var></var>.
- It's very important that <br/> be self-closing!
- The strings "Tidy" and "HTML Tidy" are the program name and must not
be translated. */
/* option-name: indent-with-tabs */
tidy-html5.c view on Meta::CPAN
"markup is written to <code>stdout</code>. "
},
{/* Important notes for translators:
- Use only <code></code>, <var></var>, <em></em>, <strong></strong>, and
<br/>.
- Entities, tags, attributes, etc., should be enclosed in <code></code>.
- Option values should be enclosed in <var></var>.
- It's very important that <br/> be self-closing!
- The strings "Tidy" and "HTML Tidy" are the program name and must not
be translated. */
TidyOutputBOM, 0,
"This option specifies if Tidy should write a Unicode Byte Order Mark "
"character (BOM; also known as Zero Width No-Break Space; has value of "
"U+FEFF) to the beginning of the output, and only applies to UTF-8 and "
"UTF-16 output encodings. "
"<br/>"
"If set to <var>auto</var> this option causes Tidy to write a BOM to "
"the output only if a BOM was present at the beginning of the input. "
"<br/>"
"A BOM is always written for XML/XHTML output using UTF-16 output "
"encodings. "
},
{/* Important notes for translators:
- Use only <code></code>, <var></var>, <em></em>, <strong></strong>, and
<br/>.
- Entities, tags, attributes, etc., should be enclosed in <code></code>.
- Option values should be enclosed in <var></var>.
- It's very important that <br/> be self-closing!
- The strings "Tidy" and "HTML Tidy" are the program name and must not
be translated. */
tidy-html5.c view on Meta::CPAN
{ TidyOutCharEncoding, 0,
"Cette option précise l'encodage de caractère utilisé par Tidy pour la sortie. <br/>Notez que cela "
"peut être différent de <code>input-encoding</code> seulement pour les encodages latins (<var>ascii</"
"var>, <var>latin0</var>, <var>latin1</var>, <var>mac</var>, <var>win1252</var>, <var>ibm858</var>)."
"<br/>Voir <code>char-encoding</code> pour plus d'informations."
},
{ TidyOutFile, 0,
"Cette option précise le fichier de sortie que Tidy utilise pour la structure de balises. En "
"principe la structure est écrite vers <code>stdout</code>. "
},
{ TidyOutputBOM, 0,
"Cette option précise si Tidy doit écrire un caractère indicateur d'ordre des octets (Unicode Byte "
"Order Mark ou BOM; connu aussi sous Zero Width No-Break Space; a la valeur de U+FEFF) au début de "
"la sortie, ce qui ne s'applique qu'aux sorties encodées en UTF-8 et UTF-16. <br/>Si l'option vaut "
"<var>auto</var>, Tidy écrira un BOM vers la sortie seulement si un BOM était présent au début de "
"l'entrée. <br/>Un BOM est toujours écrit pour la sortie XML/XHTML des sorties encodées en UTF-16. "
},
{ TidyPPrintTabs, 0,
"Cette option précise si tidy doit indenter avec des tabulations plutôt que des espaces, en "
"supposant que <code>indent</code> vaut <var>yes</var>. <br/>Définir cette option à <var>yes</var> "
"indente avec des tabulations plutôt que des espaces, valeur par défaut. <br />Utilisez l'option "
"<code>indent-spaces</code> pour redéfinir le nombre de tabulations en sortie par niveau "
"d'indentation. Notez que lorsque <code>indent-with-tabs</code> est activé, la valeur par défaut de "
"<code>indent-spaces</code> est réinitialisée à <var>1</var>. <br/>Notez que <code>tab-size</code> "
"contrôle la conversion des tabulations d'entrée en des espaces de sortie. Définissez-la à zéro pour "
"conserver les tabulations en entrée."
tidy-html5.c view on Meta::CPAN
{ TidyOutCharEncoding, 0,
"Esta opção especifica a codificação de caracteres que o Tidy usa para a saÃda. <br/>Note que "
"isso só pode ser diferente de <code>input-encoding</code> para codificações latinas "
"(<var>ascii</var>, <var>latin0</var>, <var>latin1</var>, <var>mac</var>, <var>win1252</var>, "
"<var>ibm858</var>). <br/>Veja <code>char-encoding</code> para mais informações"
},
{ TidyOutFile, 0,
"Essa opção especifica o arquivo de saÃda que o Tidy usa para marcação. "
"Normalmente, a marcação é escrita para <code>stdout</code>. "
},
{ TidyOutputBOM, 0,
"Esta opção especifica se o Tidy deve escrever um caractere Unicode de marca "
"de ordem de byte (BOM, também conhecido como Zero Width No Break Break, tem "
"valor de U+FEFF) no inÃcio da saÃda, e aplica-se apenas a codificações de "
"saÃda UTF-8 e UTF-16. <br/>Se configurado para <var>auto</var>, esta opção "
"faz com que Tidy escreva um BOM para a saÃda somente se um BOM estiver "
"presente no inÃcio da entrada. <br/>Um BOM está sempre escrita para saÃda "
"XML/XHTML usando codificações de saÃda UTF-16. "
},
{ TidyPPrintTabs, 0,
"Esta opção especifica se o Tidy deve aplicar recuo com tabulações em vez de "
"espaços, presumindo que <code>indent</code> seja <var>yes</var>. <br/>Defina-"
"o para <var>yes</var> para recuar usando tabulações em vez dos espaços "
"padrão. <br/>Use a opção <code>indent-spaces</code> para controlar o número "
"de saÃdas de tabulação por nÃvel de recuo. Note que quando <code>indent-with-"
"tabs</code> está habilitado, o valor padrão de <code>indent-spaces</code> é "
"reiniciado para <var>1</var>. <br/>Note que <code>tab-size</code> controla a "
tidy-html5.c view on Meta::CPAN
Bool HTML5Mode; /* current mode is html5 */
Bool xmlDetected; /* true if XML was used/detected */
/* Memory allocator */
TidyAllocator* allocator;
/* Miscellaneous */
void* appData;
unsigned int nClassId;
Bool inputHadBOM;
#if PRESERVE_FILE_TIMES
struct utimbuf filetimes;
#endif
tmbstr givenDoctype;
};
/** The basic struct for communicating a message within LibTidy. All of the
** relevant information pertaining to a message can be retrieved with the
** accessor functions and one of these records.
tidy-html5.c view on Meta::CPAN
TY_PRIVATE int TY_(DecodeUTF8BytesToChar)( unsigned int* c, unsigned int firstByte, ctmbstr successorBytes,
TidyInputSource* inp, int* count );
TY_PRIVATE int TY_(EncodeCharToUTF8Bytes)( unsigned int c, tmbstr encodebuf,
TidyOutputSink* outp, int* count );
TY_PRIVATE unsigned int TY_(GetUTF8)( ctmbstr str, unsigned int *ch );
TY_PRIVATE tmbstr TY_(PutUTF8)( tmbstr buf, unsigned int c );
#define UNICODE_BOM_BE 0xFEFF /* big-endian (default) UNICODE BOM */
#define UNICODE_BOM UNICODE_BOM_BE
#define UNICODE_BOM_LE 0xFFFE /* little-endian UNICODE BOM */
#define UNICODE_BOM_UTF8 0xEFBBBF /* UTF-8 UNICODE BOM */
TY_PRIVATE Bool TY_(IsValidUTF16FromUCS4)( tchar ucs4 );
TY_PRIVATE Bool TY_(IsHighSurrogate)( tchar ch );
TY_PRIVATE Bool TY_(IsLowSurrogate)( tchar ch );
TY_PRIVATE Bool TY_(IsCombinedChar)( tchar ch );
TY_PRIVATE Bool TY_(IsValidCombinedChar)( tchar ch );
TY_PRIVATE tchar TY_(CombineSurrogatePair)( tchar high, tchar low );
tidy-html5.c view on Meta::CPAN
{ TidyMergeSpans, MC, "merge-spans", IN, TidyAutoState, ParsePickList, &autoBoolPicks },
{ TidyMetaCharset, DT, "add-meta-charset", BL, no, ParsePickList, &boolPicks }, /* 20161004 - Issue #456 */
{ TidyMuteReports, DD, "mute", ST, 0, ParseList, NULL },
{ TidyMuteShow, DD, "mute-id", BL, no, ParsePickList, &boolPicks },
{ TidyNCR, ME, "ncr", BL, yes, ParsePickList, &boolPicks },
{ TidyNewline, CE, "newline", IN, DLF, ParsePickList, &newlinePicks },
{ TidyNumEntities, ME, "numeric-entities", BL, no, ParsePickList, &boolPicks },
{ TidyOmitOptionalTags, PP, "omit-optional-tags", BL, no, ParsePickList, &boolPicks },
{ TidyOutCharEncoding, CE, "output-encoding", IN, UTF8, ParseCharEnc, &charEncPicks },
{ TidyOutFile, IO, "output-file", ST, 0, ParseString, NULL },
{ TidyOutputBOM, CE, "output-bom", IN, TidyAutoState, ParsePickList, &autoBoolPicks },
{ TidyPPrintTabs, PP, "indent-with-tabs", BL, no, ParseTabs, &boolPicks }, /* 20150515 - Issue #108 */
{ TidyPreserveEntities, ME, "preserve-entities", BL, no, ParsePickList, &boolPicks },
{ TidyPreTags, MT, "new-pre-tags", ST, 0, ParseList, NULL },
{ TidyPriorityAttributes, PP, "priority-attributes", ST, 0, ParseList, NULL },
{ TidyPunctWrap, PP, "punctuation-wrap", BL, no, ParsePickList, &boolPicks },
{ TidyQuiet, DD, "quiet", BL, no, ParsePickList, &boolPicks },
{ TidyQuoteAmpersand, ME, "quote-ampersand", BL, yes, ParsePickList, &boolPicks },
{ TidyQuoteMarks, ME, "quote-marks", BL, no, ParsePickList, &boolPicks },
{ TidyQuoteNbsp, ME, "quote-nbsp", BL, yes, ParsePickList, &boolPicks },
{ TidyReplaceColor, MX, "replace-color", BL, no, ParsePickList, &boolPicks },
tidy-html5.c view on Meta::CPAN
cfg(doc, TidyOutCharEncoding) != UTF16LE &&
cfg(doc, TidyOutCharEncoding) != RAW &&
cfgBool(doc, TidyXmlOut) )
{
TY_(SetOptionBool)( doc, TidyXmlDecl, yes );
}
/* XML requires end tags */
if ( cfgBool(doc, TidyXmlOut) )
{
/* XML requires a BOM on output if using UTF-16 encoding */
unsigned int enc = cfg( doc, TidyOutCharEncoding );
if ( enc == UTF16LE || enc == UTF16BE || enc == UTF16 )
TY_(SetOptionInt)( doc, TidyOutputBOM, yes );
TY_(SetOptionBool)( doc, TidyQuoteAmpersand, yes );
TY_(SetOptionBool)( doc, TidyOmitOptionalTags, no );
}
}
/* A service to ParseList(), keeps option values nicely formatted and
coordinates additions to the internal lists. Within Tidy, this function
might be used to programmatically add individual values to items that use
tidy-html5.c view on Meta::CPAN
}
StreamIn* TY_(UserInput)( TidyDocImpl* doc, TidyInputSource* source, int encoding )
{
StreamIn *in = TY_(initStreamIn)( doc, encoding );
memcpy( &in->source, source, sizeof(TidyInputSource) );
in->iotype = UserIO;
return in;
}
int TY_(ReadBOMEncoding)(StreamIn *in)
{
unsigned int c, c1;
unsigned int bom;
c = ReadByte(in);
if (c == EndOfStream)
return -1;
c1 = ReadByte( in );
if (c1 == EndOfStream)
{
UngetByte(in, c);
return -1;
}
/* todo: dont warn about mismatch for auto input encoding */
/* todo: let the user override the encoding found here */
bom = (c << 8) + c1;
if ( bom == UNICODE_BOM_BE )
{
/* big-endian UTF-16 */
if ( in->encoding != UTF16 && in->encoding != UTF16BE )
TY_(ReportEncodingWarning)(in->doc, ENCODING_MISMATCH, UTF16BE);
return UTF16BE; /* return decoded BOM */
}
else if (bom == UNICODE_BOM_LE)
{
/* little-endian UTF-16 */
if (in->encoding != UTF16 && in->encoding != UTF16LE)
TY_(ReportEncodingWarning)(in->doc, ENCODING_MISMATCH, UTF16LE);
return UTF16LE; /* return decoded BOM */
}
else
{
unsigned int c2 = ReadByte(in);
if (c2 == EndOfStream)
{
UngetByte(in, c1);
UngetByte(in, c);
return -1;
}
if (((c << 16) + (c1 << 8) + c2) == UNICODE_BOM_UTF8)
{
/* UTF-8 */
if (in->encoding != UTF8)
TY_(ReportEncodingWarning)(in->doc, ENCODING_MISMATCH, UTF8);
return UTF8;
}
else
UngetByte( in, c2 );
}
tidy-html5.c view on Meta::CPAN
return n;
}
}
else
n = c;
return n;
}
/* Output a Byte Order Mark if required */
void TY_(outBOM)( StreamOut *out )
{
if ( out->encoding == UTF8
|| out->encoding == UTF16LE
|| out->encoding == UTF16BE
|| out->encoding == UTF16
)
{
/* this will take care of encoding the BOM correctly */
TY_(WriteChar)( UNICODE_BOM, out );
}
}
/* this is in intermediate fix for various problems in the */
/* long term code and data in charsets.c should be used */
static struct _enc2iana
{
unsigned int id;
ctmbstr name;
ctmbstr tidyOptName;
tidy-html5.c view on Meta::CPAN
* Issue #186 - Now FreeNode depend on the doctype, so the lexer is needed
* to determine which hash is to be used, so free it last.
\*/
TY_(FreeLexer)( doc );
doc->givenDoctype = NULL;
doc->lexer = TY_(NewLexer)( doc );
/* doc->lexer->root = &doc->root; */
doc->root.line = doc->lexer->lines;
doc->root.column = doc->lexer->columns;
doc->inputHadBOM = no;
doc->xmlDetected = no;
bomEnc = TY_(ReadBOMEncoding)(in);
if (bomEnc != -1)
{
in->encoding = bomEnc;
TY_(SetOptionInt)(doc, TidyInCharEncoding, bomEnc);
}
/* Tidy doesn't alter the doctype for generic XML docs */
if ( xmlIn )
{
tidy-html5.c view on Meta::CPAN
return yes;
}
return no;
}
int tidyDocSaveStream( TidyDocImpl* doc, StreamOut* out )
{
Bool showMarkup = cfgBool( doc, TidyShowMarkup );
Bool forceOutput = cfgBool( doc, TidyForceOutput );
Bool outputBOM = ( cfgAutoBool(doc, TidyOutputBOM) == TidyYesState );
Bool smartBOM = ( cfgAutoBool(doc, TidyOutputBOM) == TidyAutoState );
Bool xmlOut = cfgBool( doc, TidyXmlOut );
Bool xhtmlOut = cfgBool( doc, TidyXhtmlOut );
TidyTriState bodyOnly = cfgAutoBool( doc, TidyBodyOnly );
Bool dropComments = cfgBool(doc, TidyHideComments);
Bool makeClean = cfgBool(doc, TidyMakeClean);
Bool asciiChars = cfgBool(doc, TidyAsciiChars);
Bool makeBare = cfgBool(doc, TidyMakeBare);
Bool escapeCDATA = cfgBool(doc, TidyEscapeCdata);
Bool ppWithTabs = cfgBool(doc, TidyPPrintTabs);
tidy-html5.c view on Meta::CPAN
/* attribute values / non-text tokens */
TY_(NormalizeSpaces)(doc->lexer, &doc->root);
else
TY_(ReplacePreformattedSpaces)(doc, &doc->root);
TY_(SortAttributes)(doc, &doc->root, sortAttrStrat);
if ( showMarkup && (doc->errors == 0 || forceOutput) )
{
/* Output a Byte Order Mark if required */
if ( outputBOM || (doc->inputHadBOM && smartBOM) )
TY_(outBOM)( out );
/* No longer necessary. No DOCTYPE == HTML 3.2,
** which gives you only the basic character entities,
** which are safe in any browser.
** if ( !TY_(FindDocType)(doc) )
** TY_(SetOptionBool)( doc, TidyNumEntities, yes );
*/
doc->docOut = out;
if ( xmlOut && !xhtmlOut )
tidy-html5.h view on Meta::CPAN
TidyMergeSpans, /**< Merge multiple SPANs */
TidyMetaCharset, /**< Adds/checks/fixes meta charset in the head, based on document type */
TidyMuteReports, /**< Filter these messages from output. */
TidyMuteShow, /**< Show message ID's in the error table */
TidyNCR, /**< Allow numeric character references */
TidyNewline, /**< Output line ending (default to platform) */
TidyNumEntities, /**< Use numeric entities */
TidyOmitOptionalTags, /**< Suppress optional start tags and end tags */
TidyOutCharEncoding, /**< Output character encoding (if different) */
TidyOutFile, /**< File name to write markup to */
TidyOutputBOM, /**< Output a Byte Order Mark (BOM) for UTF-16 encodings */
TidyPPrintTabs, /**< Indent using tabs istead of spaces */
TidyPreserveEntities, /**< Preserve entities */
TidyPreTags, /**< Declared pre tags */
TidyPriorityAttributes, /**< Attributes to place first in an element */
TidyPunctWrap, /**< consider punctuation and breaking spaces for wrapping */
TidyQuiet, /**< No 'Parsing X', guessed DTD or summary */
TidyQuoteAmpersand, /**< Output naked ampersand as & */
TidyQuoteMarks, /**< Output " marks as " */
TidyQuoteNbsp, /**< Output non-breaking space as entity */
TidyReplaceColor, /**< Replace hex color attribute values with names */
tidy-html5.h view on Meta::CPAN
/** @}
** @name Configuration Options Pick List and Parser Enumerations
**
** These enums define enumerated states for the configuration options that
** take values that are not simple yes/no, strings, or simple integers.
**
** @{ */
/** AutoBool values used by ParseBool, ParseTriState, ParseIndent, ParseBOM
** @remark This enum's starting value is guaranteed to remain stable.
*/
typedef enum
{
TidyNoState = 0, /**< maps to 'no' */
TidyYesState, /**< maps to 'yes' */
TidyAutoState /**< Automatic */
} TidyTriState;