HTML-Valid

 view release on metacpan or  search on metacpan

tidy-html5.c  view on Meta::CPAN

 A document in ISO-2022 based encoding uses some ESC sequences called
 "designator" to switch character sets. The designators defined and
 used in ISO-2022-JP are:

    "ESC" + "(" + ?     for ISO646 variants

    "ESC" + "$" + ?     and
    "ESC" + "$" + "(" + ?   for multibyte character sets
*/
typedef enum
{
  FSM_ASCII,
  FSM_ESC,
  FSM_ESCD,
  FSM_ESCDP,
  FSM_ESCP,
  FSM_NONASCII
} ISO2022State;

/************************
** Source
************************/

enum
{
    CHARBUF_SIZE=5,
    LASTPOS_SIZE=64
};

/* non-raw input is cleaned up*/
struct _StreamIn
{
    ISO2022State    state;     /* FSM for ISO2022 */
    Bool   pushed;
    TidyAllocator *allocator;
    tchar* charbuf;
    unsigned int   bufpos;
    unsigned int   bufsize;
    int    tabs;
    int    lastcols[LASTPOS_SIZE];
    unsigned short curlastpos; /* current last position in lastcols */ 
    unsigned short firstlastpos; /* first valid last position in lastcols */ 
    int    curcol;
    int    curline;
    int    encoding;
    IOType iotype;

    TidyInputSource source;

    /* Pointer back to document for error reporting */
    TidyDocImpl* doc;
};

TY_PRIVATE StreamIn* TY_(initStreamIn)( TidyDocImpl* doc, int encoding );
TY_PRIVATE void TY_(freeStreamIn)(StreamIn* in);

TY_PRIVATE StreamIn* TY_(FileInput)( TidyDocImpl* doc, FILE* fp, int encoding );
TY_PRIVATE StreamIn* TY_(BufferInput)( TidyDocImpl* doc, TidyBuffer* content, int encoding );
TY_PRIVATE StreamIn* TY_(UserInput)( TidyDocImpl* doc, TidyInputSource* source, int encoding );

TY_PRIVATE int       TY_(ReadBOMEncoding)(StreamIn *in);
TY_PRIVATE unsigned int      TY_(ReadChar)( StreamIn* in );
TY_PRIVATE void      TY_(UngetChar)( unsigned int c, StreamIn* in );
TY_PRIVATE Bool      TY_(IsEOF)( StreamIn* in );


/************************
** Sink
************************/

struct _StreamOut
{
    int   encoding;
    ISO2022State   state;     /* for ISO 2022 */
    unsigned int  nl;
    IOType iotype;
    TidyOutputSink sink;
};

TY_PRIVATE StreamOut* TY_(FileOutput)( TidyDocImpl *doc, FILE* fp, int encoding, unsigned int newln );
TY_PRIVATE StreamOut* TY_(BufferOutput)( TidyDocImpl *doc, TidyBuffer* buf, int encoding, unsigned int newln );
TY_PRIVATE StreamOut* TY_(UserOutput)( TidyDocImpl *doc, TidyOutputSink* sink, int encoding, unsigned int newln );

TY_PRIVATE StreamOut* TY_(StdErrOutput)(void);
/* StreamOut* StdOutOutput(void); */
TY_PRIVATE void       TY_(ReleaseStreamOut)( TidyDocImpl *doc, StreamOut* out );

TY_PRIVATE void TY_(WriteChar)( unsigned int c, StreamOut* out );
TY_PRIVATE void TY_(outBOM)( StreamOut *out );

TY_PRIVATE ctmbstr TY_(GetEncodingNameFromTidyId)(unsigned int id);
TY_PRIVATE ctmbstr TY_(GetEncodingOptNameFromTidyId)(unsigned int id);
TY_PRIVATE int TY_(GetCharEncodingFromOptName)(ctmbstr charenc);

/************************
** Misc
************************/

/* character encodings
*/
#define RAW         0
#define ASCII       1
#define LATIN0      2
#define LATIN1      3
#define UTF8        4
#define ISO2022     5
#define MACROMAN    6
#define WIN1252     7
#define IBM858      8
#define UTF16LE     9
#define UTF16BE     10
#define UTF16       11
#define BIG5        12
#define SHIFTJIS    13

/* Function for conversion from Windows-1252 to Unicode */
TY_PRIVATE unsigned int TY_(DecodeWin1252)(unsigned int c);

/* Function to convert from MacRoman to Unicode */
TY_PRIVATE unsigned int TY_(DecodeMacRoman)(unsigned int c);

#ifdef __cplusplus
}
#endif


/* Use numeric constants as opposed to escape chars (\r, \n)
** to avoid conflict Mac compilers that may re-define these.
*/
/* #define '\r'    0xD */
/* #define '\n'    0xA */

#if defined(MAC_OS_CLASSIC)
#  define DEFAULT_NL_CONFIG TidyCR
#elif defined(_WIN32) || defined(OS2_OS)
#  define DEFAULT_NL_CONFIG TidyCRLF
#else
#  define DEFAULT_NL_CONFIG TidyLF
#endif


#endif /* __STREAMIO_H__ */
#ifndef __CONFIG_H__
#define __CONFIG_H__

/**************************************************************************//**
 * @file
 * Read configuration files and manage configuration properties.
 *

tidy-html5.c  view on Meta::CPAN

        "<code>&lt;/tr&gt;</code>, <code>&lt;/td&gt;</code>, und "
        "<code>&lt;/th&gt;</code>. "
        "<br/>"
        "Diese Option wird für XML-Ausgaben ignoriert. "
    },
    {/* Important notes for translators:
      - Use only <code></code>, <var></var>, <em></em>, <strong></strong>, and
      <br/>.
      - Entities, tags, attributes, etc., should be enclosed in <code></code>.
      - Option values should be enclosed in <var></var>.
      - It's very important that <br/> be self-closing!
      - The strings "Tidy" and "HTML Tidy" are the program name and must not
      be translated. */
      /* option-name: output-encoding */
        TidyOutCharEncoding,          0,
        "Diese Option setzt die Zeichenkodierung für die Tidy-Ausgabe. "
        "Einige Kodierungen beeinflussen wie manche Symbole in Entities "
        "umgewandelt werden, wenn auch die Ausgabe einiger Entities "
        "immer von weiteren Konfigurationseinstellungen abhängt. "
        "<br/>"
        "Mit den Kodierungen <var>ascii</var>, <var>mac</var>, und <var>win1252</var> "
        "werden alle Zeichen mit Werten über 127 als Entities ausgegeben. "
        "<br/>"
        "Wird <var>raw</var> eingestellt, übernimmt Tidy Werte über 127 ohne sie "
        "in Entities umzuwandeln. "
        "<br/>"
        "<var>latin1</var> veranlasst Tidy, Zeichen über 255 als Entities zu schreiben. "
        "<br/>"
        "UTF wie <var>utf8</var> bedingt die Ausgabe in der entsprechenden "
        "UTF-Kodierung"
        "<br/>"
        "Asiatische Ausgabekodierungen, wie <var>iso2022</var> erzeugen "
        "die adäquate Ausgabe unter der Annahme, dass auch "
        "<code>input-encoding</code> entsprechend gesetzt ist. "
        "<br/>"
        "Tidy ist kein Konverter für Zeichenkodierungen. Wenn Latin und UTF-"
        "Kodierungen auch frei gemischt werden können, ist es doch nicht "
        "möglich, asiatische Kodierungen mit Tidy in Latin umzuwandeln. "
    },
    {/* Important notes for translators:
      - Use only <code></code>, <var></var>, <em></em>, <strong></strong>, and
      <br/>.
      - Entities, tags, attributes, etc., should be enclosed in <code></code>.
      - Option values should be enclosed in <var></var>.
      - It's very important that <br/> be self-closing!
      - The strings "Tidy" and "HTML Tidy" are the program name and must not
      be translated. */
      /* option-name: output-file */
        TidyOutFile,                  0,
        "Diese Option bestimmt welche Ausgabedatei von Tidy geschrieben wird. "
        "Normalerweise schreibt Tidy die Ausgabe auf die Standard-Ausgabe <code>stdout</code>."
    },
    {/* Important notes for translators:
        - Use only <code></code>, <var></var>, <em></em>, <strong></strong>, and
          <br/>.
        - Entities, tags, attributes, etc., should be enclosed in <code></code>.
        - Option values should be enclosed in <var></var>.
        - It's very important that <br/> be self-closing!
        - The strings "Tidy" and "HTML Tidy" are the program name and must not
          be translated. */
      TidyOutputBOM,                0,
        "Diese Option bestimmt, ob Tidy eine Byte-Reihenfolge-Markierung (BOM) an den Anfang der Ausgabe schreiben soll. "
        "Dies betrifft nur UTF-8 und UTF-16 Ausgabekodierungen, wo das BOM-Zeichen entweder den Wert U+EFBBBF (UTF-8) oder "
        "U+FEFF (UTF-16) hat. "
        "<br/>"
        "Wenn die Option den Wert <var>auto</var> hat, wird Tidy die BOM nur dann in die Ausgabe schreiben, "
        "wenn sie bereits zu Beginn der Eingabedaten vorhanden war. "
        "<br/>"
        "XML/XMLHTML-Ausgaben in UTF-16 Kodierung erhalten immer eine BOM. "
    },
    {/* Important notes for translators:
      - Use only <code></code>, <var></var>, <em></em>, <strong></strong>, and
      <br/>.
      - Entities, tags, attributes, etc., should be enclosed in <code></code>.
      - Option values should be enclosed in <var></var>.
      - It's very important that <br/> be self-closing!
      - The strings "Tidy" and "HTML Tidy" are the program name and must not
      be translated. */
      /* option-name: indent-with-tabs */
        TidyPPrintTabs,               0,
        "Diese Option bestimmt, ob Tidy Einrückungen mit Tabulatoren anstelle von "
        "Leerzeichen vornimmt, unter der Annahme, dass auch <code>indent</code> den "
        "Wert <var>yes</var> hat. "
        "<br/>"
        "Setzen Sie den Wert <var>yes</var>, um mit Tabulatoren einzurücken, "
        "anstelle der standardmäßigen Leerzeichen. "
        "<br/>"
        "Stellen Sie die Anzal der ausgegebenen Tabulatoren nach Einrückungstiefe "
        "mit der Option <code>indent-spaces</code> ein. "
        "Beachten Sie, dass das Setzen der Option <code>indent-with-tabs</code> "
        "zur Folge hat, dass der Standardwert für <code>indent-spaces</code> auf "
        "<var>1</var> zurückgesetzt wird. "
        "<br/>"
        "Beachten Sie auch, dass die Option <code>tab-size</code> steuert, wie "
        "Tabulatoren der Eingabe in Leerzeichen umgewandelt werden. "
        "Setzen Sie hier 0 ein, um die Tabulatoren der Eingabe beizubehalten. "
    },
    {/* Important notes for translators:
      - Use only <code></code>, <var></var>, <em></em>, <strong></strong>, and
      <br/>.
      - Entities, tags, attributes, etc., should be enclosed in <code></code>.
      - Option values should be enclosed in <var></var>.
      - It's very important that <br/> be self-closing!
      - The strings "Tidy" and "HTML Tidy" are the program name and must not
      be translated. */
        TidyPreserveEntities,         0,
        "Diese Option bestimmt, ob Tidy wohlgeformte Entities, wie in der Eingabe vorgefunden, "
        "beibehält. "
    },
    {/* Important notes for translators:
      - Use only <code></code>, <var></var>, <em></em>, <strong></strong>, and
      <br/>.
      - Entities, tags, attributes, etc., should be enclosed in <code></code>.
      - Option values should be enclosed in <var></var>.
      - It's very important that <br/> be self-closing!
      - The strings "Tidy" and "HTML Tidy" are the program name and must not
      be translated. */
      /* option-name: new-pre-tags*/
        TidyPreTags,                  0,
        "Mit dieser Option werden neue Tags eingeführt, die in der exakt gleichen "
        "Weise verarbeitet werden, wie das <code>&lt;pre&gt;</code> Element von HTML. "
        "Der Wert der Option muss eine leerzeichen- oder kommaseparierte Liste von " 
        "Tag-Namen sein. "
        "<br/>"
        "Solange Sie neue Tags nicht deklarieren, verweigert Tidy das Generieren "
        "einer bereinigten Datei, wenn die Eingabe bisher unbekannte Tags enthält."
        "<br/>"
        "Beachten Sie, dass Sie bis auf weiteres noch keine neuen CDATA-Elemente einführen können. "
        "<br/>"

tidy-html5.c  view on Meta::CPAN

        "<code>&lt;/th&gt;</code>. "
        "<br/>"
        "This option is ignored for XML output. "
    },
    {/* Important notes for translators:
      - Use only <code></code>, <var></var>, <em></em>, <strong></strong>, and
      <br/>.
      - Entities, tags, attributes, etc., should be enclosed in <code></code>.
      - Option values should be enclosed in <var></var>.
      - It's very important that <br/> be self-closing!
      - The strings "Tidy" and "HTML Tidy" are the program name and must not
      be translated. */
        TidyOutCharEncoding,          0,
        "This option specifies the character encoding Tidy uses for "
        "output. Some of the output encodings affect whether or not "
        "some characters are translated to entities, although in all "
        "cases, some entities will be written according to other Tidy "
        "configuration options. "
        "<br/>"
        "For <var>ascii</var>, <var>mac</var>, and <var>win1252</var> "
        "output encodings, entities will be used for all characters "
        "with values over 127. "
        "<br/>"
        "For <var>raw</var> output, Tidy will write values above 127 "
        "without translating them to entities. "
        "<br/>"
        "Output using <var>latin1</var> will cause Tidy to write "
        "character values higher than 255 as entities. "
        "<br/>"
        "The UTF family such as <var>utf8</var> will write output "
        "in the respective UTF encoding. "
        "<br/>"
        "Asian output encodings such as <var>iso2022</var> will write "
        "output in the specified encoding, assuming a corresponding "
        "<code>input-encoding</code> was specified. "
        "<br/>"
        "Tidy is not an encoding converter. Although the Latin and UTF "
        "encodings can be mixed freely, it is not possible to convert Asian "
        "encodings to Latin encodings with Tidy. "
    },
    {/* Important notes for translators:
      - Use only <code></code>, <var></var>, <em></em>, <strong></strong>, and
      <br/>.
      - Entities, tags, attributes, etc., should be enclosed in <code></code>.
      - Option values should be enclosed in <var></var>.
      - It's very important that <br/> be self-closing!
      - The strings "Tidy" and "HTML Tidy" are the program name and must not
      be translated. */
        TidyOutFile,                  0,
        "This option specifies the output file Tidy uses for markup. Normally "
        "markup is written to <code>stdout</code>. "
    },
    {/* Important notes for translators:
        - Use only <code></code>, <var></var>, <em></em>, <strong></strong>, and
          <br/>.
        - Entities, tags, attributes, etc., should be enclosed in <code></code>.
        - Option values should be enclosed in <var></var>.
        - It's very important that <br/> be self-closing!
        - The strings "Tidy" and "HTML Tidy" are the program name and must not
          be translated. */
      TidyOutputBOM,                0,
        "This option specifies if Tidy should write a Unicode Byte Order Mark "
        "character (BOM; also known as Zero Width No-Break Space; has value of "
        "U+FEFF) to the beginning of the output, and only applies to UTF-8 and "
        "UTF-16 output encodings. "
        "<br/>"
        "If set to <var>auto</var> this option causes Tidy to write a BOM to "
        "the output only if a BOM was present at the beginning of the input. "
        "<br/>"
        "A BOM is always written for XML/XHTML output using UTF-16 output "
        "encodings. "
    },
    {/* Important notes for translators:
      - Use only <code></code>, <var></var>, <em></em>, <strong></strong>, and
      <br/>.
      - Entities, tags, attributes, etc., should be enclosed in <code></code>.
      - Option values should be enclosed in <var></var>.
      - It's very important that <br/> be self-closing!
      - The strings "Tidy" and "HTML Tidy" are the program name and must not
      be translated. */
        TidyPPrintTabs,               0,
        "This option specifies if Tidy should indent with tabs instead of spaces, "
        "assuming <code>indent</code> is <var>yes</var>. "
        "<br/>"
        "Set it to <var>yes</var> to indent using tabs instead of the default "
        "spaces. "
        "<br/>"
        "Use the option <code>indent-spaces</code> to control the number of tabs "
        "output per level of indent. Note that when <code>indent-with-tabs</code> "
        "is enabled the default value of <code>indent-spaces</code> is reset to "
        "<var>1</var>. "
        "<br/>"
        "Note <code>tab-size</code> controls converting input tabs to spaces. Set "
        "it to zero to retain input tabs. "
    },
    {/* Important notes for translators:
      - Use only <code></code>, <var></var>, <em></em>, <strong></strong>, and
      <br/>.
      - Entities, tags, attributes, etc., should be enclosed in <code></code>.
      - Option values should be enclosed in <var></var>.
      - It's very important that <br/> be self-closing!
      - The strings "Tidy" and "HTML Tidy" are the program name and must not
      be translated. */
        TidyPreserveEntities,         0,
        "This option specifies if Tidy should preserve well-formed entities "
        "as found in the input. "
    },
    {/* Important notes for translators:
      - Use only <code></code>, <var></var>, <em></em>, <strong></strong>, and
      <br/>.
      - Entities, tags, attributes, etc., should be enclosed in <code></code>.
      - Option values should be enclosed in <var></var>.
      - It's very important that <br/> be self-closing!
      - The strings "Tidy" and "HTML Tidy" are the program name and must not
      be translated. */
        TidyPreTags,                  0,
        "This option specifies new tags that are to be processed in exactly the "
        "same way as HTML's <code>&lt;pre&gt;</code> element. This option takes a "
        "space or comma separated list of tag names. "
        "<br/>"
        "Unless you declare new tags, Tidy will refuse to generate a tidied file if "
        "the input includes previously unknown tags. "
        "<br/>"
        "Note you cannot as yet add new CDATA elements. "
        "<br/>"
        "This option is ignored in XML mode. "
    },
    {/* Important notes for translators:
      - Use only <code></code>, <var></var>, <em></em>, <strong></strong>, and
      <br/>.

tidy-html5.c  view on Meta::CPAN

    },
    { TidyMark,                                      0,        
        "Cette option précise si Tidy doit ajouter un élément <code>meta</code> dans l'entête de document, "
        "pour indiquer que le document a été nettoyé par Tidy. <br/>Tidy n'ajoutera pas cet élément "
        "<code>meta</code> s'il existe déjà. "
    },
    { TidyMergeDivs,                                 0,        
        "Cette option peut être utilisée pour modifier le comportement de l'option <code>clean</code>, "
        "lorsqu'elle vaut <var>yes</var>. <br/>Cette option précise si Tidy doit concaténer les éléments "
        "<code>&lt;div&gt;</code> imbriqués, comme <code>&lt;div&gt;&lt;div&gt;...&lt;/div&gt;&lt;/div&gt;</"
        "code>. <br/>Si la valeur vaut <var>auto</var>, les attributs internes du <code>&lt;div&gt;</code> "
        "sont déplacés vers son div externe. Les <code>&lt;div&gt;</code> imbriqués avec des attributs "
        "<code>id</code> ne sont <em>pas</em> concaténés. <br/>Si la valeur vaut <var>yes</var>, les "
        "attributs du <code>&lt;div&gt;</code> interne ne sont pas pris en compte, à l'exception de "
        "<code>class</code> et de <code>style</code>. "
    },
    { TidyMergeEmphasis,                             0,        
        "Cette option précise si Tidy doit concaténer les éléments <code>&lt;b&gt;</code> et <code>&lt;i&gt;"
        "</code>; par exemple, pour le cas <br/><code>&lt;b class=\"rtop-2\"&gt;foo &lt;b class=\"r2-2\"&gt;"
        "bar&lt;/b&gt; baz&lt;/b&gt;</code>, <br/>Tidy produira la sortie <code>&lt;b class=\"rtop-2\"&gt;"
        "foo bar baz&lt;/b&gt;</code>. "
    },
    { TidyMergeSpans,                                0,        
        "Cette option peut être utlisée pour modifier le comportement de <code>clean</code>, si elle vaut "
        "<var>yes</var>.<br/>Cette option précise si Tidy doit concaténer les <code>&lt;span&gt;</code> "
        "imbriqués, comme suit : <code>&lt;span&gt;&lt;span&gt;...&lt;/span&gt;&lt;/span&gt;</code>. <br/"
        ">L'algorithme est le même que celui de <code>merge-divs</code>. "
    },
    { TidyNCR,                                       0, "Cette option précise si Tidy doit autoriser les références numériques de caractères. "                  },
    { TidyNewline,                                   0,        
        "La valeur par défaut est appropriée à la plateforme d'exécution de Tidy. <br/>Généralement "
        "<var>CRLF</var> sur PC-DOS, Windows et OS/2; <var>'\r'</var> sur Classic Mac OS; et <var>'\n'</var> "
        "partout ailleurs (Linux, Mac OS X, et Unix). "
    },
    { TidyNumEntities,                               0,        
        "Cette option précise si Tidy doit afficher les entités autres que les entités HTML intégrées "
        "(<code>&amp;amp;</code>, <code>&amp;lt;</code>, <code>&amp;gt;</code>, et <code>&amp;quot;</code>) "
        "dans la forme numérique plutôt que dans la forme nommée. <br/>Seules les entités compatibles avec "
        "la déclaration DOCTYPE générée sont utilisées. <br/>Les entités qui peuvent être représentées dans "
        "l'encodage de sortie sont traduites avec leur correspondance. "
    },
    { TidyOmitOptionalTags,                          0,        
        "Cette option précise si Tidy doit omettre les balises optionnelles ouvrantes et fermantes, "
        "lorsqu'il produit une sortie. <br/>Activer cette option entraîne l'omission en sortie de toutes les "
        "balises correspondantes aux éléments <code>&lt;html&gt;</code>, <code>&lt;head&gt;</code>, and "
        "<code>&lt;body&gt;</code>, de même que les balises de fermeture comme <code>&lt;/p&gt;</code>, "
        "<code>&lt;/li&gt;</code>, <code>&lt;/dt&gt;</code>, <code>&lt;/dd&gt;</code>, <code>&lt;/option&gt;"
        "</code>, <code>&lt;/tr&gt;</code>, <code>&lt;/td&gt;</code>, et <code>&lt;/th&gt;</code>. <br/"
        ">Cette option est ignorée pour la sortie XML. "
    },
    { TidyOutCharEncoding,                           0,        
        "Cette option précise l'encodage de caractère utilisé par Tidy pour la sortie. <br/>Notez que cela "
        "peut être différent de <code>input-encoding</code> seulement pour les encodages latins (<var>ascii</"
        "var>, <var>latin0</var>, <var>latin1</var>, <var>mac</var>, <var>win1252</var>, <var>ibm858</var>)."
        "<br/>Voir <code>char-encoding</code> pour plus d'informations."
    },
    { TidyOutFile,                                   0,        
        "Cette option précise le fichier de sortie que Tidy utilise pour la structure de balises. En "
        "principe la structure est écrite vers <code>stdout</code>. "
    },
    { TidyOutputBOM,                                 0,        
        "Cette option précise si Tidy doit écrire un caractère indicateur d'ordre des octets (Unicode Byte "
        "Order Mark ou BOM; connu aussi sous Zero Width No-Break Space; a la valeur de U+FEFF) au début de "
        "la sortie, ce qui ne s'applique qu'aux sorties encodées en UTF-8 et UTF-16. <br/>Si l'option vaut "
        "<var>auto</var>, Tidy écrira un BOM vers la sortie seulement si un BOM était présent au début de "
        "l'entrée. <br/>Un BOM est toujours écrit pour la sortie XML/XHTML des sorties encodées en UTF-16. "
    },
    { TidyPPrintTabs,                                0,        
        "Cette option précise si tidy doit indenter avec des tabulations plutôt que des espaces, en "
        "supposant que <code>indent</code> vaut <var>yes</var>. <br/>Définir cette option à <var>yes</var> "
        "indente avec des tabulations plutôt que des espaces, valeur par défaut. <br />Utilisez l'option "
        "<code>indent-spaces</code> pour redéfinir le nombre de tabulations en sortie par niveau "
        "d'indentation. Notez que lorsque <code>indent-with-tabs</code> est activé, la valeur par défaut de "
        "<code>indent-spaces</code> est réinitialisée à <var>1</var>. <br/>Notez que <code>tab-size</code> "
        "contrôle la conversion des tabulations d'entrée en des espaces de sortie. Définissez-la à zéro pour "
        "conserver les tabulations en entrée."
    },
    { TidyPreserveEntities,                          0, "Cette option précise si Tidy doit préserver les entités bien formées telles que trouvées en entrée. "   },
    { TidyPreTags,                                   0,        
        "Cette option précise les nouvelles balises qui doivent être traitées exactement de la même façon "
        "que l'élément HTML <code>&lt;pre&gt;</code>. Cette option prend une liste de nom de balises "
        "séparées par un espace ou une virgule. <br/>Sauf si vous déclarez de nouvelles balises, Tidy "
        "refusera de générer un fichier Tidy en sortie si l'entrée inclut préalablement des balises "
        "inconnues. <br/>Notez que vous ne pouvez encore ajouter de nouveaux éléments CDATA. <br/>Cette "
        "option est ignorée avec le mode XML. "
    },
    { TidyPunctWrap,                                 0,        
        "Cette option précise si Tidy doit passer à la ligne après certains caractères de ponctuation "
        "Unicode ou chinois."
    },
    { TidyQuiet,                                     0,        
        "Cette option précise si Tidy doit afficher le résumé du nombre des erreurs et avertissements, ou "
        "les messages de bienvenue et d'information."
    },
    { TidyQuoteAmpersand,                            0,        
        "Cette option précise si Tidy doit afficher en sortie les caractères <code>&amp;</code> en les "
        "écrivant <code>&amp;amp;</code>. "
    },
    { TidyQuoteMarks,                                0,        
        "Cette option précise si Tidy doit afficher les caractères <code>&quot;</code> comme <code>&amp;quot;"
        "</code> comme préféré par certains environnements d'édition. <br/>Le caractère apostrophe <code>'</"
        "code> est écrit <code>&amp;#39;</code> car de nombreux navigateurs webs ne supportent pas encore "
        "l'écriture <code>&amp;apos;</code>. "
    },
    { TidyQuoteNbsp,                                 0,        
        "Cette option précise si Tidy doit afficher les espaces insécables en tant qu'entités, plutôt qu'en "
        "utilisant la valeur de caractère unicode 160 (décimale). "
    },
    { TidyReplaceColor,                              0,        
        "Cette option précise si Tidy doit remplacer les valeurs numériques dans les attributs de couleur "
        "par les noms de couleurs HTML/XHTML lorsque définies, par exemple en remplaçant <code>#ffffff</"
        "code> par <code>white</code>. "
    },
    { TidyShowErrors,                                0,        
        "Cette option précise le nombre utilisé par Tidy pour déterminer si les erreurs suivantes doivent "
        "être montrées. Si la valeur est <var>0</var>, aucune erreur n'est affichée. "
    },
    { TidyShowInfo,                                  0, "Cette option précise si Tidy doit afficher les messages de niveau info-level."                          },
    { TidyShowMarkup,                                0,        
        "Cette option précise si Tidy doit générer un affichage embelli de la structure de balises. Notez "
        "que Tidy ne générera pas un affichage embelli s'il trouve des erreurs significatives (voir "
        "<code>force-output</code>). "
    },
    { TidyShowWarnings,                              0,        
        "Cette option précise si Tidy doit supprimer les avertissements. Cela peut être utile lorsque "
        "quelques erreurs sont cachées dans une masse d'avertissements. "

tidy-html5.c  view on Meta::CPAN

        "especifica se o Tidy deve mesclar <code>&lt;div&gt;</code> aninhados, como "
        "<code>&lt;div&gt;&lt;div&gt;...&lt;/div&gt;&lt;/div&gt;</code>. <br/>Se "
        "configurado para <var>auto</var>, os atributos do <code>&lt;div&gt;</code> "
        "internos são movidos para o externo. <code>&lt;div&gt;</code> aninhados com "
        "os atributos <code>id</code> <em>não</em> são mesclados. <br/>Se configurado "
        "para <var>yes</var>, os atributos de <code>&lt;div&gt;</code> interno são "
        "descartados com a exceção de <code>class</code> e <code>style</code>. "
    },
    { TidyMergeEmphasis,                             0,        
        "Esta opção especifica se o Tidy deve mesclar elementos <code>&lt;b&gt;</code>"
        " e <code>&lt;i&gt;</code> aninhados; por exemplo, para o caso de "
        "<br/><code>&lt;b class=\"rtop-2\"&gt;foo &lt;b class=\"r2-2\""
        "&gt;bar&lt;/b&gt; baz&lt;/b&gt;</code>, <br/>Tidy vai emitir <code>&lt;b "
        "class=\"rtop-2\"&gt;foo bar baz&lt;/b&gt;</code>. "
    },
    { TidyMergeSpans,                                0,        
        "Esta opção pode ser usada para modificar o comportamento de "
        "<code>clean</code> quando configurado para <var>yes</var>. <br/>Esta opção "
        "especifica se o Tidy deve mesclar <code>&lt;span&gt;</code> aninhados como "
        "<code>&lt;span&gt;&lt;span&gt;...&lt;/span&gt;&lt;/span&gt;</code>. <br/>O "
        "algoritmo é idêntico àquele usado por <code>merge-divs</code>. "
    },
    { TidyNCR,                                       0,        
        "Esta opção especifica se o Tidy deve permitir referências de caracteres "
        "numéricos. "
    },
    { TidyNewline,                                   0,        
        "O padrão é apropriado para a plataforma atual. <br/>Geralmente, CRLF no PC-"
        "DOS, Windows e OS/2; '\r' no Mac OS Clássico; e '\n' nos demais (Linux, Mac OS X "
        "e Unix). "
    },
    { TidyNumEntities,                               0,        
        "Esta opção especifica se o Tidy deve produzir entidades diferentes das "
        "entidades HTML integradas (<code>&amp;amp;</code>, <code>&amp;lt;</code>, "
        "<code>&amp;gt;</code> e <code>&amp;quot;</code>) no formulário numérico em "
        "vez do formulário nomeado. <br/>Somente são usadas entidades compatíveis com "
        "a declaração DOCTYPE gerada. <br/>As entidades que podem ser representadas "
        "na codificação de saída são traduzidas correspondentemente. "
    },
    { TidyOmitOptionalTags,                          0,        
        "Esta opção especifica se o Tidy deve omitir tags de início e de fim "
        "opcionais ao gerar saída. <br/>Configurar essa opção causa todas as tags "
        "para os elementos <code>&lt;html&gt;</code>, <code>&lt;head&gt;</code> e "
        "<code>&lt;body&gt;</code> serem omitidos da saída, assim como tags de saída "
        "como <code>&lt;/p&gt;</code>, <code>&lt;/li&gt;</code>, "
        "<code>&lt;/dt&gt;</code>, <code>&lt;/dd&gt;</code>, "
        "<code>&lt;/option&gt;</code>, <code>&lt;/tr&gt;</code>, "
        "<code>&lt;/td&gt;</code> e <code>&lt;/th&gt;</code>. <br/>Essa opção é "
        "ignorada para saída XML. "
    },
    { TidyOutCharEncoding,                           0,        
        "Esta opção especifica a codificação de caracteres que o Tidy usa para a saída. <br/>Note que "
        "isso só pode ser diferente de <code>input-encoding</code> para codificações latinas "
        "(<var>ascii</var>, <var>latin0</var>, <var>latin1</var>, <var>mac</var>, <var>win1252</var>, "
        "<var>ibm858</var>). <br/>Veja <code>char-encoding</code> para mais informações"
    },
    { TidyOutFile,                                   0,        
        "Essa opção especifica o arquivo de saída que o Tidy usa para marcação. "
        "Normalmente, a marcação é escrita para <code>stdout</code>. "
    },
    { TidyOutputBOM,                                 0,        
        "Esta opção especifica se o Tidy deve escrever um caractere Unicode de marca "
        "de ordem de byte (BOM, também conhecido como Zero Width No Break Break, tem "
        "valor de U+FEFF) no início da saída, e aplica-se apenas a codificações de "
        "saída UTF-8 e UTF-16. <br/>Se configurado para <var>auto</var>, esta opção "
        "faz com que Tidy escreva um BOM para a saída somente se um BOM estiver "
        "presente no início da entrada. <br/>Um BOM está sempre escrita para saída "
        "XML/XHTML usando codificações de saída UTF-16. "
    },
    { TidyPPrintTabs,                                0,        
        "Esta opção especifica se o Tidy deve aplicar recuo com tabulações em vez de "
        "espaços, presumindo que <code>indent</code> seja <var>yes</var>. <br/>Defina-"
        "o para <var>yes</var> para recuar usando tabulações em vez dos espaços "
        "padrão. <br/>Use a opção <code>indent-spaces</code> para controlar o número "
        "de saídas de tabulação por nível de recuo. Note que quando <code>indent-with-"
        "tabs</code> está habilitado, o valor padrão de <code>indent-spaces</code> é "
        "reiniciado para <var>1</var>. <br/>Note que <code>tab-size</code> controla a "
        "conversão de tabulações de entrada em espaços. Coloque-o em zero para reter "
        "as tabulações de entrada. "
    },
    { TidyPreserveEntities,                          0,        
        "Esta opção especifica se o Tidy deve preservar entidades bem-formadas como "
        "localizado na entrada. "
    },
    { TidyPreTags,                                   0,        
        "Esta opção especifica novas tags que devem ser processadas exatamente da "
        "mesma forma que o elemento <code>&lt;pre&gt;</code> de HTML. Esta opção "
        "possui uma lista separada por vírgulas ou espaços de nomes de tag. <br/>A "
        "menos que você declare novas tags, o Tidy se recusará a gerar um arquivo "
        "arrumado se a entrada incluir tags anteriormente desconhecidas. <br/>Note "
        "que você ainda não pode adicionar novos elementos CDATA. <br/>Esta opção é "
        "ignorada no modo XML. "
    },
    { TidyPunctWrap,                                 0,        
        "Essa opção especifica se o Tidy deve quebrar linha após alguns Unicode ou "
        "caracteres de pontuação chineses. "
    },
    { TidyQuiet,                                     0,        
        "Essa opção especifica se o Tidy deve emitir o resumo dos números de erros e "
        "avisos, ou as mensagens de boas-vidas ou informacionais. "
    },
    { TidyQuoteAmpersand,                            0,        
        "Essa opção especifica se o Tidy deve emitir caracteres <code>&amp;</code> "
        "sem adornos como <code>&amp;amp;</code>. "
    },
    { TidyQuoteMarks,                                0,        
        "Esta opção especifica se o Tidy deve enviar caracteres <code>&quot;</code> "
        "como <code>&quot;</code> como é preferido por alguns ambientes de edição. "
        "<br/>O caractere do apóstrofe <code>'</code> é escrito como "
        "<code>&amp;#39;</code> porque muitos navegadores web ainda não oferecem "
        "suporte a <code>&amp;apos;</code>. "
    },
    { TidyQuoteNbsp,                                 0,        
        "Esta opção especifica se o Tidy deve produzir caracteres de espaço rígido "
        "como entidades, em vez de como o caractere Unicode de valor 160 (decimal). "
    },
    { TidyReplaceColor,                              0,        
        "Esta opção especifica se o Tidy deve substituir os valores numéricos nos atributos de cor com "
        "nomes de cor HTML/XHTML onde definido, p.ex. substituir <code>#ffffff</code> com <code>white</"
        "code>. "
    },
    { TidyShowErrors,                                0,        
        "Esta opção especifica o número que o Tidy usa para determinar se outros "
        "erros devem ser exibidos. Se configurado para <var>0</var>, nenhum erro será "
        "mostrado. "
    },
    { TidyShowInfo,                                  0,        

tidy-html5.c  view on Meta::CPAN

 *  Change the previous on/off unsigned int flag badForm
 *  to a BIT flag to support other than <form>
 *  errors. This could be extended more...
\*/
#define flg_BadForm     0x00000001
#define flg_BadMain     0x00000002

struct _TidyDocImpl
{
    /* The Document Tree (and backing store buffer) */
    Node                root;       /* This MUST remain the first declared 
                                       variable in this structure */
    Lexer*              lexer;

    /* Config + Markup Declarations */
    TidyConfigImpl          config;
    TidyTagImpl             tags;
    TidyAttribImpl          attribs;
    TidyAccessImpl          access;
    TidyMutedMessages       muted;

    /* The Pretty Print buffer */
    TidyPrintImpl       pprint;

    /* I/O */
    StreamIn*                docIn;
    StreamOut*               docOut;
    StreamOut*               errout;
    TidyReportFilter         reportFilter;
    TidyReportCallback       reportCallback;
    TidyMessageCallback      messageCallback;
    TidyOptCallback          pOptCallback;
    TidyConfigCallback       pConfigCallback;
    TidyConfigChangeCallback pConfigChangeCallback;
    TidyPPProgress           progressCallback;

    /* Parse + Repair Results */
    unsigned int                optionErrors;
    unsigned int                errors;
    unsigned int                warnings;
    unsigned int                accessErrors;
    unsigned int                infoMessages;
    unsigned int                docErrors;
    int                 parseStatus;

    unsigned int                badAccess;   /* for accessibility errors */
    unsigned int                badLayout;   /* for bad style errors */
    unsigned int                badChars;    /* for bad char encodings */
    unsigned int                badForm;     /* bit field, for badly placed form tags, or other format errors */
    unsigned int                footnotes;   /* bit field, for other footnotes, until formalized */

    Bool                HTML5Mode;   /* current mode is html5 */
    Bool                xmlDetected; /* true if XML was used/detected */

    /* Memory allocator */
    TidyAllocator*      allocator;

    /* Miscellaneous */
    void*               appData;
    unsigned int                nClassId;
    Bool                inputHadBOM;

#if PRESERVE_FILE_TIMES
    struct utimbuf      filetimes;
#endif
    tmbstr              givenDoctype;
};

/** The basic struct for communicating a message within LibTidy. All of the
**  relevant information pertaining to a message can be retrieved with the
**  accessor functions and one of these records.
*/
struct _TidyMessageImpl
{
    TidyDocImpl         *tidyDoc;     /* document instance this message is attributed to */
    Node                *tidyNode;    /* the node reporting the message, if applicable */
    unsigned int                code;         /* the message code */
    int                 line;         /* the line message applies to */
    int                 column;       /* the column the message applies to */
    TidyReportLevel     level;        /* the severity level of the message */
    Bool                allowMessage; /* indicates whether or not a filter rejected a message */
    Bool                muted;        /* indicates whether or not a configuration mutes this message */
    
    int                 argcount;    /* the number of arguments */
    struct printfArg*   arguments;   /* the arguments' values and types */

    ctmbstr             messageKey;             /* the message code as a key string */

    ctmbstr             messageFormatDefault;   /* the built-in format string */
    ctmbstr             messageFormat;          /* the localized format string */

    tmbstr              messageDefault;         /* the message, formatted, default language */
    tmbstr              message;                /* the message, formatted, localized */

    tmbstr              messagePosDefault;      /* the position part, default language */
    tmbstr              messagePos;             /* the position part, localized */

    ctmbstr             messagePrefixDefault;   /* the prefix part, default language */
    ctmbstr             messagePrefix;          /* the prefix part, localized */
    
    tmbstr              messageOutputDefault;   /* the complete string Tidy would output */
    tmbstr              messageOutput;          /* the complete string, localized */
};


#define tidyDocToImpl( tdoc )           ((TidyDocImpl*)(tdoc))
#define tidyImplToDoc( doc )            ((TidyDoc)(doc))

#define tidyMessageToImpl( tmessage )   ((TidyMessageImpl*)(tmessage))
#define tidyImplToMessage( message )    ((TidyMessage)(message))

#define tidyNodeToImpl( tnod )          ((Node*)(tnod))
#define tidyImplToNode( node )          ((TidyNode)(node))

#define tidyAttrToImpl( tattr )         ((AttVal*)(tattr))
#define tidyImplToAttr( attval )        ((TidyAttr)(attval))

#define tidyOptionToImpl( topt )        ((const TidyOptionImpl*)(topt))
#define tidyImplToOption( option )      ((TidyOption)(option))


tidy-html5.c  view on Meta::CPAN

/* return offset of cc from beginning of s1,
** -1 if not found.
*/
/* TY_PRIVATE int TY_(tmbstrnchr)( ctmbstr s1, unsigned int len1, tmbchar cc ); */

TY_PRIVATE ctmbstr TY_(tmbsubstrn)( ctmbstr s1, unsigned int len1, ctmbstr s2 );
/* TY_PRIVATE ctmbstr TY_(tmbsubstrncase)( ctmbstr s1, unsigned int len1, ctmbstr s2 ); */
TY_PRIVATE ctmbstr TY_(tmbsubstr)( ctmbstr s1, ctmbstr s2 );

/* transform string to lower case */
TY_PRIVATE tmbstr TY_(tmbstrtolower)( tmbstr s );

/* Transform ASCII chars in string to upper case */
TY_PRIVATE tmbstr TY_(tmbstrtoupper)( tmbstr s );

/* TY_PRIVATE Bool TY_(tmbsamefile)( ctmbstr filename1, ctmbstr filename2 ); */

TY_PRIVATE int TY_(tmbvsnprintf)(tmbstr buffer, size_t count, ctmbstr format, va_list args)
#ifdef __GNUC__
__attribute__((format(printf, 3, 0)))
#endif
;
TY_PRIVATE int TY_(tmbsnprintf)(tmbstr buffer, size_t count, ctmbstr format, ...)
#ifdef __GNUC__
__attribute__((format(printf, 3, 4)))
#endif
;

#ifdef __cplusplus
}  /* extern "C" */
#endif

#endif /* __TMBSTR_H__ */
#ifndef __UTF8_H__
#define __UTF8_H__

/* utf8.h -- convert characters to/from UTF-8

  (c) 1998-2006 (W3C) MIT, ERCIM, Keio University
  See tidy.h for the copyright notice.

*/

/* #include "tidyplatform.h" */
/* #include "tidybuffio.h" */

/* UTF-8 encoding/decoding support
** Does not convert character "codepoints", i.e. to/from 10646.
*/

TY_PRIVATE int TY_(DecodeUTF8BytesToChar)( unsigned int* c, unsigned int firstByte, ctmbstr successorBytes,
                                TidyInputSource* inp, int* count );

TY_PRIVATE int TY_(EncodeCharToUTF8Bytes)( unsigned int c, tmbstr encodebuf,
                                TidyOutputSink* outp, int* count );


TY_PRIVATE unsigned int  TY_(GetUTF8)( ctmbstr str, unsigned int *ch );
TY_PRIVATE tmbstr TY_(PutUTF8)( tmbstr buf, unsigned int c );

#define UNICODE_BOM_BE   0xFEFF   /* big-endian (default) UNICODE BOM */
#define UNICODE_BOM      UNICODE_BOM_BE
#define UNICODE_BOM_LE   0xFFFE   /* little-endian UNICODE BOM */
#define UNICODE_BOM_UTF8 0xEFBBBF /* UTF-8 UNICODE BOM */


TY_PRIVATE Bool    TY_(IsValidUTF16FromUCS4)( tchar ucs4 );
TY_PRIVATE Bool    TY_(IsHighSurrogate)( tchar ch );
TY_PRIVATE Bool    TY_(IsLowSurrogate)( tchar ch );

TY_PRIVATE Bool    TY_(IsCombinedChar)( tchar ch );
TY_PRIVATE Bool    TY_(IsValidCombinedChar)( tchar ch );

TY_PRIVATE tchar   TY_(CombineSurrogatePair)( tchar high, tchar low );
TY_PRIVATE Bool    TY_(SplitSurrogatePair)( tchar utf16, tchar* high, tchar* low );



#endif /* __UTF8_H__ */
/* version information

  (c) 2007-2015 (W3C) MIT, ERCIM, Keio University
  See tidy.h for the copyright notice.

*/

#ifdef RELEASE_DATE
static const char TY_(release_date)[] = RELEASE_DATE;
#else
static const char TY_(release_date)[] = "2015/01/22";
#endif
#ifdef LIBTIDY_VERSION
#ifdef RC_NUMBER
static const char TY_(library_version)[] = LIBTIDY_VERSION "." RC_NUMBER;
#else
static const char TY_(library_version)[] = LIBTIDY_VERSION;
#endif
#else
static const char TY_(library_version)[] = "5.0.0";
#endif

/* eof */
/* access.c -- carry out accessibility checks

  Copyright University of Toronto
  Portions (c) 1998-2009 (W3C) MIT, ERCIM, Keio University
  See tidy.h for the copyright notice.
  
*/


/* #include "tidy-int.h" */
/* #include "access.h" */
/* #include "message.h" */
/* #include "tags.h" */
/* #include "attrs.h" */
/* #include "tmbstr.h" */


/* 
    The accessibility checks to perform depending on user's desire.

    1. priority 1
    2. priority 1 & 2

tidy-html5.c  view on Meta::CPAN

    { TidyBodyOnly,                DD, "show-body-only",              IN, no,              ParsePickList,     &autoBoolPicks      },
    { TidyBreakBeforeBR,           PP, "break-before-br",             BL, no,              ParsePickList,     &boolPicks          },
    { TidyCharEncoding,            CE, "char-encoding",               IN, UTF8,            ParseCharEnc,      &charEncPicks       },
    { TidyCoerceEndTags,           MR, "coerce-endtags",              BL, yes,             ParsePickList,     &boolPicks          },
    { TidyCSSPrefix,               MR, "css-prefix",                  ST, 0,               ParseCSS1Selector, NULL,           "c" },
    { TidyCustomTags,              IR, "new-custom-tags",             ST, 0,               ParseList,         NULL                }, /* 20170309 - Issue #119 */
    { TidyDecorateInferredUL,      MX, "decorate-inferred-ul",        BL, no,              ParsePickList,     &boolPicks          },
    { TidyDoctype,                 DT, "doctype",                     ST, TidyDoctypeAuto, ParseDocType,      &doctypePicks       },
#ifndef DOXYGEN_SHOULD_SKIP_THIS
    { TidyDoctypeMode,             IR, "doctype-mode",                IN, TidyDoctypeAuto, NULL,              &doctypePicks       },
#endif
    { TidyDropEmptyElems,          MC, "drop-empty-elements",         BL, yes,             ParsePickList,     &boolPicks          },
    { TidyDropEmptyParas,          MC, "drop-empty-paras",            BL, yes,             ParsePickList,     &boolPicks          },
    { TidyDropPropAttrs,           MC, "drop-proprietary-attributes", BL, no,              ParsePickList,     &boolPicks          },
    { TidyDuplicateAttrs,          MR, "repeated-attributes",         IN, TidyKeepLast,    ParsePickList,     &repeatAttrPicks    },
    { TidyEmacs,                   DD, "gnu-emacs",                   BL, no,              ParsePickList,     &boolPicks          },
#ifndef DOXYGEN_SHOULD_SKIP_THIS
    { TidyEmacsFile,               IR, "gnu-emacs-file",              ST, 0,               ParseString,       NULL                },
#endif
    { TidyEmptyTags,               MT, "new-empty-tags",              ST, 0,               ParseList,         NULL                },
    { TidyEncloseBlockText,        MR, "enclose-block-text",          BL, no,              ParsePickList,     &boolPicks          },
    { TidyEncloseBodyText,         MR, "enclose-text",                BL, no,              ParsePickList,     &boolPicks          },
    { TidyErrFile,                 IO, "error-file",                  ST, 0,               ParseString,       NULL                },
    { TidyEscapeCdata,             MX, "escape-cdata",                BL, no,              ParsePickList,     &boolPicks          },
    { TidyEscapeScripts,           MR, "escape-scripts",              BL, yes,             ParsePickList,     &boolPicks          }, /* 20160227 - Issue #348 */
    { TidyFixBackslash,            MR, "fix-backslash",               BL, yes,             ParsePickList,     &boolPicks          },
    { TidyFixComments,             MR, "fix-bad-comments",            IN, TidyAutoState,   ParsePickList,     &autoBoolPicks      },
    { TidyFixUri,                  MR, "fix-uri",                     BL, yes,             ParsePickList,     &boolPicks          },
    { TidyForceOutput,             DG, "force-output",                BL, no,              ParsePickList,     &boolPicks          },
    { TidyGDocClean,               MC, "gdoc",                        BL, no,              ParsePickList,     &boolPicks          },
    { TidyHideComments,            MX, "hide-comments",               BL, no,              ParsePickList,     &boolPicks          },
    { TidyHtmlOut,                 DT, "output-html",                 BL, no,              ParsePickList,     &boolPicks          },
    { TidyInCharEncoding,          CE, "input-encoding",              IN, UTF8,            ParseCharEnc,      &charEncPicks       },
    { TidyIndentAttributes,        PP, "indent-attributes",           BL, no,              ParsePickList,     &boolPicks          },
    { TidyIndentCdata,             PP, "indent-cdata",                BL, no,              ParsePickList,     &boolPicks          },
    { TidyIndentContent,           PP, "indent",                      IN, TidyNoState,     ParsePickList,     &autoBoolPicks      },
    { TidyIndentSpaces,            PP, "indent-spaces",               IN, 2,               ParseInt,          NULL                },
    { TidyInlineTags,              MT, "new-inline-tags",             ST, 0,               ParseList,         NULL                },
    { TidyJoinClasses,             MX, "join-classes",                BL, no,              ParsePickList,     &boolPicks          },
    { TidyJoinStyles,              MX, "join-styles",                 BL, yes,             ParsePickList,     &boolPicks          },
    { TidyKeepFileTimes,           IO, "keep-time",                   BL, no,              ParsePickList,     &boolPicks          },
    { TidyKeepTabs,                PP, "keep-tabs",                   BL, no,              ParsePickList,     &boolPicks          }, /* 20171103 - Issue #403 */
    { TidyLiteralAttribs,          MR, "literal-attributes",          BL, no,              ParsePickList,     &boolPicks          },
    { TidyLogicalEmphasis,         MC, "logical-emphasis",            BL, no,              ParsePickList,     &boolPicks          },
    { TidyLowerLiterals,           MR, "lower-literals",              BL, yes,             ParsePickList,     &boolPicks          },
    { TidyMakeBare,                MC, "bare",                        BL, no,              ParsePickList,     &boolPicks          },
    { TidyMakeClean,               MC, "clean",                       BL, no,              ParsePickList,     &boolPicks          },
    { TidyMark,                    PP, "tidy-mark",                   BL, yes,             ParsePickList,     &boolPicks          },
    { TidyMergeDivs,               MC, "merge-divs",                  IN, TidyAutoState,   ParsePickList,     &autoBoolPicks      },
    { TidyMergeEmphasis,           MX, "merge-emphasis",              BL, yes,             ParsePickList,     &boolPicks          },
    { TidyMergeSpans,              MC, "merge-spans",                 IN, TidyAutoState,   ParsePickList,     &autoBoolPicks      },
    { TidyMetaCharset,             DT, "add-meta-charset",            BL, no,              ParsePickList,     &boolPicks          }, /* 20161004 - Issue #456 */
    { TidyMuteReports,             DD, "mute",                        ST, 0,               ParseList,         NULL                },
    { TidyMuteShow,                DD, "mute-id",                     BL, no,              ParsePickList,     &boolPicks          },
    { TidyNCR,                     ME, "ncr",                         BL, yes,             ParsePickList,     &boolPicks          },
    { TidyNewline,                 CE, "newline",                     IN, DLF,             ParsePickList,     &newlinePicks       },
    { TidyNumEntities,             ME, "numeric-entities",            BL, no,              ParsePickList,     &boolPicks          },
    { TidyOmitOptionalTags,        PP, "omit-optional-tags",          BL, no,              ParsePickList,     &boolPicks          },
    { TidyOutCharEncoding,         CE, "output-encoding",             IN, UTF8,            ParseCharEnc,      &charEncPicks       },
    { TidyOutFile,                 IO, "output-file",                 ST, 0,               ParseString,       NULL                },
    { TidyOutputBOM,               CE, "output-bom",                  IN, TidyAutoState,   ParsePickList,     &autoBoolPicks      },
    { TidyPPrintTabs,              PP, "indent-with-tabs",            BL, no,              ParseTabs,         &boolPicks          }, /* 20150515 - Issue #108 */
    { TidyPreserveEntities,        ME, "preserve-entities",           BL, no,              ParsePickList,     &boolPicks          },
    { TidyPreTags,                 MT, "new-pre-tags",                ST, 0,               ParseList,         NULL                },
    { TidyPriorityAttributes,      PP, "priority-attributes",         ST, 0,               ParseList,         NULL                },
    { TidyPunctWrap,               PP, "punctuation-wrap",            BL, no,              ParsePickList,     &boolPicks          },
    { TidyQuiet,                   DD, "quiet",                       BL, no,              ParsePickList,     &boolPicks          },
    { TidyQuoteAmpersand,          ME, "quote-ampersand",             BL, yes,             ParsePickList,     &boolPicks          },
    { TidyQuoteMarks,              ME, "quote-marks",                 BL, no,              ParsePickList,     &boolPicks          },
    { TidyQuoteNbsp,               ME, "quote-nbsp",                  BL, yes,             ParsePickList,     &boolPicks          },
    { TidyReplaceColor,            MX, "replace-color",               BL, no,              ParsePickList,     &boolPicks          },
    { TidyShowErrors,              DD, "show-errors",                 IN, 6,               ParseInt,          NULL                },
    { TidyShowFilename,            DD, "show-filename",               BL, no,              ParsePickList,     &boolPicks          },
    { TidyShowInfo,                DD, "show-info",                   BL, yes,             ParsePickList,     &boolPicks          },
    { TidyShowMarkup,              DD, "markup",                      BL, yes,             ParsePickList,     &boolPicks          },
    { TidyShowMetaChange,          DG, "show-meta-change",            BL, no,              ParsePickList,     &boolPicks          }, /* 20170609 - Issue #456 */
    { TidyShowWarnings,            DD, "show-warnings",               BL, yes,             ParsePickList,     &boolPicks          },
    { TidySkipNested,              MR, "skip-nested",                 BL, yes,             ParsePickList,     &boolPicks          }, /* 1642186 - Issue #65 */
    { TidySortAttributes,          PP, "sort-attributes",             IN, TidySortAttrNone,ParsePickList,     &sorterPicks        },
    { TidyStrictTagsAttr,          MR, "strict-tags-attributes",      BL, no,              ParsePickList,     &boolPicks          }, /* 20160209 - Issue #350 */
    { TidyStyleTags,               MR, "fix-style-tags",              BL, yes,             ParsePickList,     &boolPicks          },
    { TidyTabSize,                 PP, "tab-size",                    IN, 8,               ParseInt,          NULL                },
    { TidyUpperCaseAttrs,          MR, "uppercase-attributes",        IN, TidyUppercaseNo, ParsePickList,     &attributeCasePicks },
    { TidyUpperCaseTags,           MR, "uppercase-tags",              BL, no,              ParsePickList,     &boolPicks          },
    { TidyUseCustomTags,           MR, "custom-tags",                 IN, TidyCustomNo,    ParsePickList,     &customTagsPicks    }, /* 20170309 - Issue #119 */
    { TidyVertSpace,               PP, "vertical-space",              IN, no,              ParsePickList,     &autoBoolPicks      }, /* #228 - tri option */
    { TidyWarnPropAttrs,           DG, "warn-proprietary-attributes", BL, yes,             ParsePickList,     &boolPicks          },
    { TidyWord2000,                MC, "word-2000",                   BL, no,              ParsePickList,     &boolPicks          },
    { TidyWrapAsp,                 PP, "wrap-asp",                    BL, yes,             ParsePickList,     &boolPicks          },
    { TidyWrapAttVals,             PP, "wrap-attributes",             BL, no,              ParsePickList,     &boolPicks          },
    { TidyWrapJste,                PP, "wrap-jste",                   BL, yes,             ParsePickList,     &boolPicks          },
    { TidyWrapLen,                 PP, "wrap",                        IN, 68,              ParseInt,          NULL                },
    { TidyWrapPhp,                 PP, "wrap-php",                    BL, no,              ParsePickList,     &boolPicks          },
    { TidyWrapScriptlets,          PP, "wrap-script-literals",        BL, no,              ParsePickList,     &boolPicks          },
    { TidyWrapSection,             PP, "wrap-sections",               BL, yes,             ParsePickList,     &boolPicks          },
    { TidyWriteBack,               IO, "write-back",                  BL, no,              ParsePickList,     &boolPicks          },
    { TidyXhtmlOut,                DT, "output-xhtml",                BL, no,              ParsePickList,     &boolPicks          },
    { TidyXmlDecl,                 DT, "add-xml-decl",                BL, no,              ParsePickList,     &boolPicks          },
    { TidyXmlOut,                  DT, "output-xml",                  BL, no,              ParsePickList,     &boolPicks          },
    { TidyXmlPIs,                  MR, "assume-xml-procins",          BL, no,              ParsePickList,     &boolPicks          },
    { TidyXmlSpace,                DT, "add-xml-space",               BL, no,              ParsePickList,     &boolPicks          },
    { TidyXmlTags,                 DT, "input-xml",                   BL, no,              ParsePickList,     &boolPicks          },
    { N_TIDY_OPTIONS,              XX, NULL,                          XY, 0,               NULL,              NULL                }
};


/*****************************************************************************
 ** Deleted Options Configuration
 **
 ** Keep track of options that have been removed from Tidy, so that we can
 ** suggests a replacement. When a deleted option is used, client programs
 ** will have the opportunity to consume the option first via the callback,
 ** and if not handled by the callback, will be handled by Tidy, generally
 ** by setting an alternate or new option, in `subDeprecatedOption()`.
 ******************************************************************************/

static const struct {
    ctmbstr name;                /**< name of the deprecated option */
    TidyOptionId replacementId;  /**< Id of the replacement option, or 0 if none. */
} deprecatedOptions[] = {
/*    { "show-body-only", TidyBodyOnly }, */

tidy-html5.c  view on Meta::CPAN



/* ensure that config is self consistent */
void TY_(AdjustConfig)( TidyDocImpl* doc )
{
    if ( cfgBool(doc, TidyEncloseBlockText) )
        TY_(SetOptionBool)( doc, TidyEncloseBodyText, yes );

    if ( cfgAutoBool(doc, TidyIndentContent) == TidyNoState )
        TY_(SetOptionInt)( doc, TidyIndentSpaces, 0 );

    /* disable wrapping */
    if ( cfg(doc, TidyWrapLen) == 0 )
        TY_(SetOptionInt)( doc, TidyWrapLen, 0x7FFFFFFF );

    /* Word 2000 needs o:p to be declared as inline */
    if ( cfgBool(doc, TidyWord2000) )
    {
        doc->config.defined_tags |= tagtype_inline;
        TY_(DefineTag)( doc, tagtype_inline, "o:p" );
    }

    /* #480701 disable XHTML output flag if both output-xhtml and xml input are set */
    if ( cfgBool(doc, TidyXmlTags) )
        TY_(SetOptionBool)( doc, TidyXhtmlOut, no );

    /* XHTML is written in lower case */
    if ( cfgBool(doc, TidyXhtmlOut) )
    {
        TY_(SetOptionBool)( doc, TidyXmlOut, yes );
        TY_(SetOptionBool)( doc, TidyUpperCaseTags, no );
        TY_(SetOptionInt)( doc, TidyUpperCaseAttrs, no );
        /* TY_(SetOptionBool)( doc, TidyXmlPIs, yes ); */
    }

    /* if XML in, then XML out */
    if ( cfgBool(doc, TidyXmlTags) )
    {
        TY_(SetOptionBool)( doc, TidyXmlOut, yes );
        TY_(SetOptionBool)( doc, TidyXmlPIs, yes );
    }

    /* #427837 - fix by Dave Raggett 02 Jun 01
    ** generate <?xml version="1.0" encoding="iso-8859-1"?>
    ** if the output character encoding is Latin-1 etc.
    */
    if ( cfg(doc, TidyOutCharEncoding) != ASCII &&
         cfg(doc, TidyOutCharEncoding) != UTF8 &&
         cfg(doc, TidyOutCharEncoding) != UTF16 &&
         cfg(doc, TidyOutCharEncoding) != UTF16BE &&
         cfg(doc, TidyOutCharEncoding) != UTF16LE &&
         cfg(doc, TidyOutCharEncoding) != RAW &&
         cfgBool(doc, TidyXmlOut) )
    {
        TY_(SetOptionBool)( doc, TidyXmlDecl, yes );
    }

    /* XML requires end tags */
    if ( cfgBool(doc, TidyXmlOut) )
    {
        /* XML requires a BOM on output if using UTF-16 encoding */
        unsigned int enc = cfg( doc, TidyOutCharEncoding );
        if ( enc == UTF16LE || enc == UTF16BE || enc == UTF16 )
            TY_(SetOptionInt)( doc, TidyOutputBOM, yes );
        
        TY_(SetOptionBool)( doc, TidyQuoteAmpersand, yes );
        TY_(SetOptionBool)( doc, TidyOmitOptionalTags, no );
    }
}


/* A service to ParseList(), keeps option values nicely formatted and
   coordinates additions to the internal lists. Within Tidy, this function
   might be used to programmatically add individual values to items that use
   this service.
 */
void TY_(DeclareListItem)( TidyDocImpl* doc, const TidyOptionImpl* opt, ctmbstr name )
{
    ctmbstr prvval = cfgStr( doc, opt->id );
    tmbstr catval = NULL;
    ctmbstr theval = name;
    if ( prvval )
    {
        unsigned int len = TY_(tmbstrlen)(name) + TY_(tmbstrlen)(prvval) + 3;
        catval = TY_(tmbstrndup)( doc->allocator, prvval, len );
        TY_(tmbstrcat)( catval, ", " );
        TY_(tmbstrcat)( catval, name );
        theval = catval;
    }

    switch ( opt->id )
    {
        case TidyPriorityAttributes:
            TY_(DefinePriorityAttribute)( doc, name );
            break;

        case TidyMuteReports:
            TY_(DefineMutedMessage)( doc, opt, name );
            break;

        case TidyInlineTags:
        case TidyBlockTags:
        case TidyEmptyTags:
        case TidyPreTags:
        case TidyCustomTags:
            TY_(DeclareUserTag)( doc, opt, name );
            break;

        default:
            break;
    }

    SetOptionValue( doc, opt->id, theval );
    if ( catval )
        TidyDocFree( doc, catval );
}


/* a space or comma separated list of items */
Bool ParseList( TidyDocImpl* doc, const TidyOptionImpl* option )
{
    TidyConfigImpl* cfg = &doc->config;
    tmbchar buf[1024];
    unsigned int i = 0, nItems = 0;

tidy-html5.c  view on Meta::CPAN

    }
}

/************************
** Source
************************/

static void InitLastPos( StreamIn *in );

StreamIn* TY_(initStreamIn)( TidyDocImpl* doc, int encoding )
{
    StreamIn *in = (StreamIn*) TidyDocAlloc( doc, sizeof(StreamIn) );

    TidyClearMemory( in, sizeof(StreamIn) );
    in->curline = 1;
    in->curcol = 1;
    in->encoding = encoding;
    in->state = FSM_ASCII;
    in->doc = doc;
    in->bufsize = CHARBUF_SIZE;
    in->allocator = doc->allocator;
    in->charbuf = (tchar*)TidyDocAlloc(doc, sizeof(tchar) * in->bufsize);
    InitLastPos( in );
    return in;
}

void TY_(freeStreamIn)(StreamIn* in)
{
    TidyFree(in->allocator, in->charbuf);
    TidyFree(in->allocator, in);
}

StreamIn* TY_(FileInput)( TidyDocImpl* doc, FILE *fp, int encoding )
{
    StreamIn *in = TY_(initStreamIn)( doc, encoding );
    if ( TY_(initFileSource)( doc->allocator, &in->source, fp ) != 0 )
    {
        TY_(freeStreamIn)( in );
        return NULL;
    }
    in->iotype = FileIO;
    return in;
}

StreamIn* TY_(BufferInput)( TidyDocImpl* doc, TidyBuffer* buf, int encoding )
{
    StreamIn *in = TY_(initStreamIn)( doc, encoding );
    tidyInitInputBuffer( &in->source, buf );
    in->iotype = BufferIO;
    return in;
}

StreamIn* TY_(UserInput)( TidyDocImpl* doc, TidyInputSource* source, int encoding )
{
    StreamIn *in = TY_(initStreamIn)( doc, encoding );
    memcpy( &in->source, source, sizeof(TidyInputSource) );
    in->iotype = UserIO;
    return in;
}

int TY_(ReadBOMEncoding)(StreamIn *in)
{
    unsigned int c, c1;
    unsigned int bom;

    c = ReadByte(in);
    if (c == EndOfStream)
        return -1;

    c1 = ReadByte( in );
    if (c1 == EndOfStream)
    {
        UngetByte(in, c);
        return -1;
    }

    /* todo: dont warn about mismatch for auto input encoding */
    /* todo: let the user override the encoding found here */

    bom = (c << 8) + c1;

    if ( bom == UNICODE_BOM_BE )
    {
        /* big-endian UTF-16 */
        if ( in->encoding != UTF16 && in->encoding != UTF16BE )
            TY_(ReportEncodingWarning)(in->doc, ENCODING_MISMATCH, UTF16BE);

        return UTF16BE; /* return decoded BOM */
    }
    else if (bom == UNICODE_BOM_LE)
    {
        /* little-endian UTF-16 */
        if (in->encoding != UTF16 && in->encoding != UTF16LE)
            TY_(ReportEncodingWarning)(in->doc, ENCODING_MISMATCH, UTF16LE);

        return UTF16LE; /* return decoded BOM */
    }
    else
    {
        unsigned int c2 = ReadByte(in);

        if (c2 == EndOfStream)
        {
            UngetByte(in, c1);
            UngetByte(in, c);
            return -1;
        }

        if (((c << 16) + (c1 << 8) + c2) == UNICODE_BOM_UTF8)
        {
            /* UTF-8 */
            if (in->encoding != UTF8)
                TY_(ReportEncodingWarning)(in->doc, ENCODING_MISMATCH, UTF8);

            return UTF8;
        }
        else
            UngetByte( in, c2 );
    }

    UngetByte(in, c1);
    UngetByte(in, c);

    return -1;
}

static void InitLastPos( StreamIn *in )
{
    in->curlastpos = 0;
    in->firstlastpos = 0;
}

static void PopLastPos( StreamIn *in )
{
    in->curlastpos = (in->curlastpos+1)%LASTPOS_SIZE;
    if ( in->curlastpos == in->firstlastpos )
        in->firstlastpos = (in->firstlastpos+1)%LASTPOS_SIZE;
}

static void SaveLastPos( StreamIn *in )
{
    PopLastPos( in );
    in->lastcols[in->curlastpos] = in->curcol;
}

static void RestoreLastPos( StreamIn *in )
{
    if ( in->firstlastpos == in->curlastpos )
        in->curcol = 0;
    else
    {
        in->curcol = in->lastcols[in->curlastpos];
        if ( in->curlastpos == 0 )
            in->curlastpos = LASTPOS_SIZE;
        in->curlastpos--;
    }
}

unsigned int TY_(ReadChar)( StreamIn *in )
{
    unsigned int c = EndOfStream;

    if ( in->pushed )
        return PopChar( in );

    SaveLastPos( in );

    if ( in->tabs > 0 )
    {

tidy-html5.c  view on Meta::CPAN


    if ( in->encoding == UTF8 )
    {
        /* deal with UTF-8 encoded char */

        int err, count = 0;
        
        /* first byte "c" is passed in separately */
        err = TY_(DecodeUTF8BytesToChar)( &n, c, NULL, &in->source, &count );
        if (!err && (n == (unsigned int)EndOfStream) && (count == 1)) /* EOF */
            return EndOfStream;
        else if (err)
        {
            /* set error position just before offending character */
            in->doc->lexer->lines = in->curline;
            in->doc->lexer->columns = in->curcol;

            TY_(ReportEncodingError)(in->doc, INVALID_UTF8, n, no);
            n = 0xFFFD; /* replacement char */
        }
        
        return n;
    }
    
    /*
       This section is suitable for any "multibyte" variable-width 
       character encoding in which a one-byte code is less than
       128, and the first byte of a two-byte code is greater or
       equal to 128. Note that Big5 and ShiftJIS fit into this
       kind, even though their second byte may be less than 128
    */
    if ((in->encoding == BIG5) || (in->encoding == SHIFTJIS))
    {
        if (c < 128)
            return c;
        else if ((in->encoding == SHIFTJIS) && (c >= 0xa1 && c <= 0xdf)) /* 461643 - fix suggested by Rick Cameron 14 Sep 01 */
        {
            /*
              Rick Cameron pointed out that for Shift_JIS, the values from
              0xa1 through 0xdf represent singe-byte characters
              (U+FF61 to U+FF9F - half-shift Katakana)
            */
            return c;
        }
        else
        {
            unsigned int c1 = ReadByte( in );
            if ( EndOfStream == c1 )
                return EndOfStream;
            n = (c << 8) + c1;
            return n;
        }
    }
    else
        n = c;
        
    return n;
}

/* Output a Byte Order Mark if required */
void TY_(outBOM)( StreamOut *out )
{
    if ( out->encoding == UTF8
         || out->encoding == UTF16LE
         || out->encoding == UTF16BE
         || out->encoding == UTF16
       )
    {
        /* this will take care of encoding the BOM correctly */
        TY_(WriteChar)( UNICODE_BOM, out );
    }
}

/* this is in intermediate fix for various problems in the */
/* long term code and data in charsets.c should be used    */
static struct _enc2iana
{
    unsigned int id;
    ctmbstr name;
    ctmbstr tidyOptName;
} const enc2iana[] =
{
  { ASCII,    "us-ascii",     "ascii"   },
  { LATIN0,   "iso-8859-15",  "latin0"  },
  { LATIN1,   "iso-8859-1",   "latin1"  },
  { UTF8,     "utf-8",        "utf8"   },
  { MACROMAN, "macintosh",    "mac"     },
  { WIN1252,  "windows-1252", "win1252" },
  { IBM858,   "ibm00858",     "ibm858"  },
  { UTF16LE,  "utf-16",       "utf16le" },
  { UTF16BE,  "utf-16",       "utf16be" },
  { UTF16,    "utf-16",       "utf16"   },
  { BIG5,     "big5",         "big5"    },
  { SHIFTJIS, "shift_jis",    "shiftjis"},
#ifndef NO_NATIVE_ISO2022_SUPPORT
  { ISO2022,  NULL,           "iso2022" },
#endif
  { RAW,      NULL,           "raw"     }
};

ctmbstr TY_(GetEncodingNameFromTidyId)(unsigned int id)
{
    unsigned int i;

    for (i = 0; enc2iana[i].name; ++i)
        if (enc2iana[i].id == id)
            return enc2iana[i].name;

    return NULL;
}

ctmbstr TY_(GetEncodingOptNameFromTidyId)(unsigned int id)
{
    unsigned int i;

    for (i = 0; i < sizeof(enc2iana)/sizeof(enc2iana[0]); ++i)
        if (enc2iana[i].id == id)
            return enc2iana[i].tidyOptName;

    return NULL;
}

int TY_(GetCharEncodingFromOptName)( ctmbstr charenc )
{
    unsigned int i;

    for (i = 0; i < sizeof(enc2iana)/sizeof(enc2iana[0]); ++i)
        if (TY_(tmbstrcasecmp)(charenc, enc2iana[i].tidyOptName) == 0 )
            return enc2iana[i].id;

tidy-html5.c  view on Meta::CPAN

    TidyDocImpl* impl = tidyDocToImpl( tdoc );
    if ( impl )
      return tidyDocRunDiagnostics( impl );
    return -EINVAL;
}

int TIDY_CALL        tidyReportDoctype( TidyDoc tdoc )
{
    int iret = -EINVAL;
    TidyDocImpl* impl = tidyDocToImpl( tdoc );
    if ( impl ) {
      tidyDocReportDoctype( impl );
      iret = 0;
    }
    return iret;
}

/* Workhorse functions.
**
** Parse requires input source, all input config items
** and diagnostic sink to have all been set before calling.
**
** Emit likewise requires that document sink and all
** pretty printing options have been set.
*/
static ctmbstr integrity = "\nPanic - tree has lost its integrity\n";

int         TY_(DocParseStream)( TidyDocImpl* doc, StreamIn* in )
{
    Bool xmlIn = cfgBool( doc, TidyXmlTags );
    TidyConfigChangeCallback callback = doc->pConfigChangeCallback;
    
    int bomEnc;
    doc->pConfigChangeCallback = NULL;

    assert( doc != NULL && in != NULL );
    assert( doc->docIn == NULL );
    doc->docIn = in;

    TY_(ResetTags)(doc);             /* Reset table to html5 mode */
    TY_(TakeConfigSnapshot)( doc );  /* Save config state */
    TY_(AdjustConfig)( doc );        /* Ensure config internal consistency */
    TY_(FreeAnchors)( doc );

    TY_(FreeNode)(doc, &doc->root);
    TidyClearMemory(&doc->root, sizeof(Node));

    if (doc->givenDoctype)
        TidyDocFree(doc, doc->givenDoctype);
    /*\ 
     *  Issue #186 - Now FreeNode depend on the doctype, so the lexer is needed
     *  to determine which hash is to be used, so free it last.
    \*/
    TY_(FreeLexer)( doc );
    doc->givenDoctype = NULL;

    doc->lexer = TY_(NewLexer)( doc );
    /* doc->lexer->root = &doc->root; */
    doc->root.line = doc->lexer->lines;
    doc->root.column = doc->lexer->columns;
    doc->inputHadBOM = no;
    doc->xmlDetected = no;

    bomEnc = TY_(ReadBOMEncoding)(in);

    if (bomEnc != -1)
    {
        in->encoding = bomEnc;
        TY_(SetOptionInt)(doc, TidyInCharEncoding, bomEnc);
    }

    /* Tidy doesn't alter the doctype for generic XML docs */
    if ( xmlIn )
    {
        TY_(ParseXMLDocument)( doc );
        if ( !TY_(CheckNodeIntegrity)( &doc->root ) )
            TidyPanic( doc->allocator, integrity );
    }
    else
    {
        doc->warnings = 0;
        TY_(ParseDocument)( doc );
        if ( !TY_(CheckNodeIntegrity)( &doc->root ) )
            TidyPanic( doc->allocator, integrity );
    }

    doc->docIn = NULL;
    doc->pConfigChangeCallback = callback;

    return tidyDocStatus( doc );
}

int         tidyDocRunDiagnostics( TidyDocImpl* doc )
{
    TY_(ReportMarkupVersion)( doc );
    TY_(ReportNumWarnings)( doc );

    if ( doc->errors > 0 && !cfgBool( doc, TidyForceOutput ) )
        TY_(Dialogue)(doc, STRING_NEEDS_INTERVENTION );

     return tidyDocStatus( doc );
}

void         tidyDocReportDoctype( TidyDocImpl* doc )
{
        TY_(ReportMarkupVersion)( doc );
}


/*****************************************************************************
 *  HTML5 STUFF
 *****************************************************************************/
#if 0 && defined(ENABLE_DEBUG_LOG)
extern void show_not_html5(void);
/* -----------------------------
List tags that do not have version HTML5 (HT50|XH50)

acronym applet basefont big center dir font frame frameset isindex
listing noframes plaintext rb rbc rtc strike tt xmp nextid
align bgsound blink comment ilayer layer marquee multicol nobr noembed
nolayer nosave server servlet spacer

Listed total 35 tags that do not have version 393216
   ------------------------------ */

tidy-html5.c  view on Meta::CPAN

        TY_(FixXmlDecl)( doc );

    /* At this point the apparent doctype is going to be as stable as
       it can ever be, so we can start detecting things that shouldn't
       be in this version of HTML
     */
    if (doc->lexer) 
    {
        /*\ 
         *  Issue #429 #426 - These services can only be used
         *  when there is a document loaded, ie a lexer created.
         *  But really should not be calling a Clean and Repair
         *  service with no doc!
        \*/
        if (doc->lexer->versionEmitted & VERS_HTML5)
            TY_(CheckHTML5)( doc, &doc->root );
        TY_(CheckHTMLTagsAttribsVersions)( doc, &doc->root );

        if ( !doc->lexer->isvoyager && doc->xmlDetected )
        {
            TY_(Report)(doc, NULL, TY_(FindXmlDecl)(doc), XML_DECLARATION_DETECTED );

        }
    }

    TY_(CleanHead)(doc); /* Is #692 - discard multiple <title> tags */

#if defined(ENABLE_DEBUG_LOG)
    SPRTF("All nodes AFTER clean and repair\n");
    dbg_show_all_nodes( doc, &doc->root, 0  );
#endif

    doc->pConfigChangeCallback = callback;
    return tidyDocStatus( doc );
}

static
Bool showBodyOnly( TidyDocImpl* doc, TidyTriState bodyOnly )
{
    Node* node;

    switch( bodyOnly )
    {
    case TidyNoState:
        return no;
    case TidyYesState:
        return yes;
    default:
        node = TY_(FindBody)( doc );
        if (node && node->implicit )
            return yes;
    }
    return no;
}


int         tidyDocSaveStream( TidyDocImpl* doc, StreamOut* out )
{
    Bool showMarkup  = cfgBool( doc, TidyShowMarkup );
    Bool forceOutput = cfgBool( doc, TidyForceOutput );
    Bool outputBOM   = ( cfgAutoBool(doc, TidyOutputBOM) == TidyYesState );
    Bool smartBOM    = ( cfgAutoBool(doc, TidyOutputBOM) == TidyAutoState );
    Bool xmlOut      = cfgBool( doc, TidyXmlOut );
    Bool xhtmlOut    = cfgBool( doc, TidyXhtmlOut );
    TidyTriState bodyOnly    = cfgAutoBool( doc, TidyBodyOnly );

    Bool dropComments = cfgBool(doc, TidyHideComments);
    Bool makeClean    = cfgBool(doc, TidyMakeClean);
    Bool asciiChars   = cfgBool(doc, TidyAsciiChars);
    Bool makeBare     = cfgBool(doc, TidyMakeBare);
    Bool escapeCDATA  = cfgBool(doc, TidyEscapeCdata);
    Bool ppWithTabs   = cfgBool(doc, TidyPPrintTabs);
    TidyAttrSortStrategy sortAttrStrat = cfg(doc, TidySortAttributes);
    TidyConfigChangeCallback callback = doc->pConfigChangeCallback;
    doc->pConfigChangeCallback = NULL;

    if (ppWithTabs)
        TY_(PPrintTabs)();
    else
        TY_(PPrintSpaces)();

    if (escapeCDATA)
        TY_(ConvertCDATANodes)(doc, &doc->root);

    if (dropComments)
        TY_(DropComments)(doc, &doc->root);

    if (makeClean)
    {
        /* noop */
        TY_(DropFontElements)(doc, &doc->root, NULL);
    }

    if ((makeClean && asciiChars) || makeBare)
        TY_(DowngradeTypography)(doc, &doc->root);

    if (makeBare)
        /* Note: no longer replaces &nbsp; in */
        /* attribute values / non-text tokens */
        TY_(NormalizeSpaces)(doc->lexer, &doc->root);
    else
        TY_(ReplacePreformattedSpaces)(doc, &doc->root);

    TY_(SortAttributes)(doc, &doc->root, sortAttrStrat);

    if ( showMarkup && (doc->errors == 0 || forceOutput) )
    {
        /* Output a Byte Order Mark if required */
        if ( outputBOM || (doc->inputHadBOM && smartBOM) )
            TY_(outBOM)( out );

        /* No longer necessary. No DOCTYPE == HTML 3.2,
        ** which gives you only the basic character entities,
        ** which are safe in any browser.
        ** if ( !TY_(FindDocType)(doc) )
        **    TY_(SetOptionBool)( doc, TidyNumEntities, yes );
        */

        doc->docOut = out;
        if ( xmlOut && !xhtmlOut )
            TY_(PPrintXMLTree)( doc, NORMAL, 0, &doc->root );
        else if ( showBodyOnly( doc, bodyOnly ) )
            TY_(PrintBody)( doc );
        else
            TY_(PPrintTree)( doc, NORMAL, 0, &doc->root );

        TY_(PFlushLine)( doc, 0 );
        doc->docOut = NULL;
    }

    /* @jsd: removing this should solve #673, and allow saving of the buffer multiple times. */
//    TY_(ResetConfigToSnapshot)( doc );
    doc->pConfigChangeCallback = callback;
    
    return tidyDocStatus( doc );
}

/* Tree traversal functions
**
** The big issue here is the degree to which we should mimic
** a DOM and/or SAX nodes.
**
** Is it 100% possible (and, if so, how difficult is it) to
** emit SAX events from this API?  If SAX events are possible,
** is that 100% of data needed to build a DOM?
*/

TidyNode TIDY_CALL   tidyGetRoot( TidyDoc tdoc )
{
    TidyDocImpl* impl = tidyDocToImpl( tdoc );
    Node* node = NULL;
    if ( impl )
        node = &impl->root;
    return tidyImplToNode( node );
}

TidyNode TIDY_CALL   tidyGetHtml( TidyDoc tdoc )
{
  TidyDocImpl* impl = tidyDocToImpl( tdoc );
  Node* node = NULL;
  if ( impl )
      node = TY_(FindHTML)( impl );
  return tidyImplToNode( node );
}

TidyNode TIDY_CALL    tidyGetHead( TidyDoc tdoc )
{
  TidyDocImpl* impl = tidyDocToImpl( tdoc );
  Node* node = NULL;
  if ( impl )



( run in 0.590 second using v1.01-cache-2.11-cpan-39bf76dae61 )