main code updates to do HTML5

This commit is contained in:
Geoff McLane 2014-08-03 20:33:29 +02:00
parent 292145c8e2
commit 78c0080eb8
18 changed files with 1999 additions and 1259 deletions

View file

@ -9,6 +9,13 @@
*/ */
#include "tidy.h" #include "tidy.h"
#ifdef _MSC_VER
#include "sprtf.h"
#endif
#ifndef SPRTF
#define SPRTF printf
#endif
static FILE* errout = NULL; /* set to stderr */ static FILE* errout = NULL; /* set to stderr */
/* static FILE* txtout = NULL; */ /* set to stdout */ /* static FILE* txtout = NULL; */ /* set to stdout */
@ -176,6 +183,9 @@ static const CmdOptDesc cmdopt_defs[] = {
{ "-bare", { "-bare",
"strip out smart quotes and em dashes, etc.", "strip out smart quotes and em dashes, etc.",
"bare: yes", CmdOptProcDir, "-b" }, "bare: yes", CmdOptProcDir, "-b" },
{ "-gdoc",
"produce clean version of html exported by google docs",
"gdoc: yes", CmdOptProcDir, "-g" },
{ "-numeric", { "-numeric",
"output numeric rather than named entities", "output numeric rather than named entities",
"numeric-entities: yes", CmdOptProcDir, "-n" }, "numeric-entities: yes", CmdOptProcDir, "-n" },
@ -186,8 +196,8 @@ static const CmdOptDesc cmdopt_defs[] = {
"suppress nonessential output", "suppress nonessential output",
"quiet: yes", CmdOptProcDir, "-q" }, "quiet: yes", CmdOptProcDir, "-q" },
{ "-omit", { "-omit",
"omit optional end tags", "omit optional start tags and end tags",
"hide-endtags: yes", CmdOptProcDir }, "omit-optional-tags: yes", CmdOptProcDir },
{ "-xml", { "-xml",
"specify the input is well formed XML", "specify the input is well formed XML",
"input-xml: yes", CmdOptProcDir }, "input-xml: yes", CmdOptProcDir },
@ -411,14 +421,16 @@ static void help( ctmbstr prog )
{ {
printf( "%s [option...] [file...] [option...] [file...]\n", prog ); printf( "%s [option...] [file...] [option...] [file...]\n", prog );
printf( "Utility to clean up and pretty print HTML/XHTML/XML\n"); printf( "Utility to clean up and pretty print HTML/XHTML/XML\n");
printf( "See http://tidy.sourceforge.net/\n"); printf( "\n");
printf( "This is an HTML5-aware experimental fork of HTML Tidy.\n");
printf( "%s\n", tidyReleaseDate() );
printf( "\n"); printf( "\n");
#ifdef PLATFORM_NAME #ifdef PLATFORM_NAME
printf( "Options for HTML Tidy for %s released on %s:\n", printf( "Options for HTML Tidy for %s:\n", PLATFORM_NAME );
PLATFORM_NAME, tidyReleaseDate() );
#else #else
printf( "Options for HTML Tidy released on %s:\n", tidyReleaseDate() ); printf( "Options for HTML Tidy:\n");
#endif #endif
printf( "\n"); printf( "\n");
@ -429,9 +441,27 @@ static void help( ctmbstr prog )
"to the man page.\n\n"); "to the man page.\n\n");
printf( "Input/Output default to stdin/stdout respectively.\n"); printf( "Input/Output default to stdin/stdout respectively.\n");
printf( "\n");
printf( "Single letter options apart from -f may be combined\n"); printf( "Single letter options apart from -f may be combined\n");
printf( "as in: tidy -f errs.txt -imu foo.html\n"); printf( "as in: tidy -f errs.txt -imu foo.html\n");
printf( "For further info on HTML see http://www.w3.org/MarkUp\n"); printf( "\n");
printf( "For more information on this HTML5-aware experimental fork of Tidy,\n" );
printf( "see http://w3c.github.com/tidy-html5/\n" );
printf( "\n");
printf( "For more information on HTML, see the following:\n" );
printf( "\n");
printf( " HTML: Edition for Web Authors (the latest HTML specification)\n");
printf( " http://dev.w3.org/html5/spec-author-view\n" );
printf( "\n");
printf( " HTML: The Markup Language (an HTML language reference)\n" );
printf( " http://dev.w3.org/html5/markup/\n" );
printf( "\n");
printf( "File bug reports at https://github.com/w3c/tidy-html5/issues/\n" );
printf( "or send questions and comments to html-tidy@w3.org\n" );
printf( "\n");
printf( "Validate your HTML documents using the W3C Nu Markup Validator:\n" );
printf( "\n");
printf( " http://validator.w3.org/nu/" );
printf( "\n"); printf( "\n");
} }
@ -472,6 +502,7 @@ ctmbstr ConfigCategoryName( TidyConfigCategory id )
fprintf(stderr, "Fatal error: impossible value for id='%d'.\n", (int)id); fprintf(stderr, "Fatal error: impossible value for id='%d'.\n", (int)id);
assert(0); assert(0);
abort(); abort();
return "never_here"; /* only for the compiler warning */
} }
/* Description of an option */ /* Description of an option */
@ -898,10 +929,10 @@ static void optionvalues( TidyDoc tdoc )
static void version( void ) static void version( void )
{ {
#ifdef PLATFORM_NAME #ifdef PLATFORM_NAME
printf( "HTML Tidy for %s released on %s\n", printf( "HTML Tidy for HTML5 for %s %s\n",
PLATFORM_NAME, tidyReleaseDate() ); PLATFORM_NAME, tidyReleaseDate() );
#else #else
printf( "HTML Tidy released on %s\n", tidyReleaseDate() ); printf( "HTML Tidy for HTML5 %s\n", tidyReleaseDate() );
#endif #endif
} }
@ -923,6 +954,9 @@ int main( int argc, char** argv )
errout = stderr; /* initialize to stderr */ errout = stderr; /* initialize to stderr */
status = 0; status = 0;
#ifdef _MSC_VER
set_log_file((char *)"temptidy.txt", 0);
#endif
#ifdef TIDY_CONFIG_FILE #ifdef TIDY_CONFIG_FILE
if ( tidyFileExists( tdoc, TIDY_CONFIG_FILE) ) if ( tidyFileExists( tdoc, TIDY_CONFIG_FILE) )
@ -977,7 +1011,7 @@ int main( int argc, char** argv )
tidyOptResetToDefault( tdoc, TidyIndentSpaces ); tidyOptResetToDefault( tdoc, TidyIndentSpaces );
} }
else if ( strcasecmp(arg, "omit") == 0 ) else if ( strcasecmp(arg, "omit") == 0 )
tidyOptSetBool( tdoc, TidyHideEndTags, yes ); tidyOptSetBool( tdoc, TidyOmitOptionalTags, yes );
else if ( strcasecmp(arg, "upper") == 0 ) else if ( strcasecmp(arg, "upper") == 0 )
tidyOptSetBool( tdoc, TidyUpperCaseTags, yes ); tidyOptSetBool( tdoc, TidyUpperCaseTags, yes );
@ -985,6 +1019,9 @@ int main( int argc, char** argv )
else if ( strcasecmp(arg, "clean") == 0 ) else if ( strcasecmp(arg, "clean") == 0 )
tidyOptSetBool( tdoc, TidyMakeClean, yes ); tidyOptSetBool( tdoc, TidyMakeClean, yes );
else if ( strcasecmp(arg, "gdoc") == 0 )
tidyOptSetBool( tdoc, TidyGDocClean, yes );
else if ( strcasecmp(arg, "bare") == 0 ) else if ( strcasecmp(arg, "bare") == 0 )
tidyOptSetBool( tdoc, TidyMakeBare, yes ); tidyOptSetBool( tdoc, TidyMakeBare, yes );
@ -1202,6 +1239,10 @@ int main( int argc, char** argv )
tidyOptSetBool( tdoc, TidyMakeClean, yes ); tidyOptSetBool( tdoc, TidyMakeClean, yes );
break; break;
case 'g':
tidyOptSetBool( tdoc, TidyGDocClean, yes );
break;
case 'b': case 'b':
tidyOptSetBool( tdoc, TidyMakeBare, yes ); tidyOptSetBool( tdoc, TidyMakeBare, yes );
break; break;
@ -1237,6 +1278,7 @@ int main( int argc, char** argv )
if ( argc > 1 ) if ( argc > 1 )
{ {
htmlfil = argv[1]; htmlfil = argv[1];
SPRTF("Tidying '%s'\n", htmlfil);
if ( tidyOptGetBool(tdoc, TidyEmacs) ) if ( tidyOptGetBool(tdoc, TidyEmacs) )
tidyOptSetValue( tdoc, TidyEmacsFile, htmlfil ); tidyOptSetValue( tdoc, TidyEmacsFile, htmlfil );
status = tidyParseFile( tdoc, htmlfil ); status = tidyParseFile( tdoc, htmlfil );
@ -1263,10 +1305,17 @@ int main( int argc, char** argv )
else else
{ {
ctmbstr outfil = tidyOptGetValue( tdoc, TidyOutFile ); ctmbstr outfil = tidyOptGetValue( tdoc, TidyOutFile );
if ( outfil ) if ( outfil ) {
status = tidySaveFile( tdoc, outfil ); status = tidySaveFile( tdoc, outfil );
else } else {
#if !defined(NDEBUG) && defined(_MSC_VER)
static char tmp_buf[264];
sprintf(tmp_buf,"%s.html",get_log_file());
status = tidySaveFile( tdoc, tmp_buf );
#else
status = tidySaveStdout( tdoc ); status = tidySaveStdout( tdoc );
#endif
}
} }
} }

View file

@ -937,6 +937,10 @@ TIDY_EXPORT Bool TIDY_CALL tidyNodeIsSTRIKE( TidyNode tnod );
TIDY_EXPORT Bool TIDY_CALL tidyNodeIsU( TidyNode tnod ); TIDY_EXPORT Bool TIDY_CALL tidyNodeIsU( TidyNode tnod );
TIDY_EXPORT Bool TIDY_CALL tidyNodeIsMENU( TidyNode tnod ); TIDY_EXPORT Bool TIDY_CALL tidyNodeIsMENU( TidyNode tnod );
/* HTML5 */
TIDY_EXPORT Bool TIDY_CALL tidyNodeIsDATALIST( TidyNode tnod ); // bit like OPTIONS
/** @} End NodeIsElementName group */ /** @} End NodeIsElementName group */
/** @} End NodeAsk group */ /** @} End NodeAsk group */

View file

@ -102,11 +102,14 @@ typedef enum
TidyOutFile, /**< File name to write markup to */ TidyOutFile, /**< File name to write markup to */
TidyWriteBack, /**< If true then output tidied markup */ TidyWriteBack, /**< If true then output tidied markup */
TidyShowMarkup, /**< If false, normal output is suppressed */ TidyShowMarkup, /**< If false, normal output is suppressed */
TidyShowInfo, /**< If true, info-level messages are shown */
TidyShowWarnings, /**< However errors are always shown */ TidyShowWarnings, /**< However errors are always shown */
TidyQuiet, /**< No 'Parsing X', guessed DTD or summary */ TidyQuiet, /**< No 'Parsing X', guessed DTD or summary */
TidyIndentContent, /**< Indent content of appropriate tags */ TidyIndentContent, /**< Indent content of appropriate tags */
/**< "auto" does text/block level content indentation */ /**< "auto" does text/block level content indentation */
TidyHideEndTags, /**< Suppress optional end tags */ TidyCoerceEndTags, /**< Coerce end tags from start tags where probably intended */
TidyOmitOptionalTags,/**< Suppress optional start tags and end tags */
TidyHideEndTags, /**< Legacy name for TidyOmitOptionalTags */
TidyXmlTags, /**< Treat input as XML */ TidyXmlTags, /**< Treat input as XML */
TidyXmlOut, /**< Create output as XML */ TidyXmlOut, /**< Create output as XML */
TidyXhtmlOut, /**< Output extensible HTML */ TidyXhtmlOut, /**< Output extensible HTML */
@ -117,9 +120,11 @@ typedef enum
TidyUpperCaseAttrs, /**< Output attributes in upper not lower case */ TidyUpperCaseAttrs, /**< Output attributes in upper not lower case */
TidyMakeBare, /**< Make bare HTML: remove Microsoft cruft */ TidyMakeBare, /**< Make bare HTML: remove Microsoft cruft */
TidyMakeClean, /**< Replace presentational clutter by style rules */ TidyMakeClean, /**< Replace presentational clutter by style rules */
TidyGDocClean, /**< Clean up HTML exported from Google Docs */
TidyLogicalEmphasis, /**< Replace i by em and b by strong */ TidyLogicalEmphasis, /**< Replace i by em and b by strong */
TidyDropPropAttrs, /**< Discard proprietary attributes */ TidyDropPropAttrs, /**< Discard proprietary attributes */
TidyDropFontTags, /**< Discard presentation tags */ TidyDropFontTags, /**< Discard presentation tags */
TidyDropEmptyElems, /**< Discard empty elements */
TidyDropEmptyParas, /**< Discard empty p elements */ TidyDropEmptyParas, /**< Discard empty p elements */
TidyFixComments, /**< Fix comments with adjacent hyphens */ TidyFixComments, /**< Fix comments with adjacent hyphens */
TidyBreakBeforeBR, /**< Output newline before <br> or not? */ TidyBreakBeforeBR, /**< Output newline before <br> or not? */
@ -192,6 +197,7 @@ typedef enum
#else #else
TidyPunctWrapNotUsed, TidyPunctWrapNotUsed,
#endif #endif
TidyMergeEmphasis, /**< Merge nested B and I elements */
TidyMergeDivs, /**< Merge multiple DIVs */ TidyMergeDivs, /**< Merge multiple DIVs */
TidyDecorateInferredUL, /**< Mark inferred UL elements with no indent CSS */ TidyDecorateInferredUL, /**< Mark inferred UL elements with no indent CSS */
TidyPreserveEntities, /**< Preserve entities */ TidyPreserveEntities, /**< Preserve entities */
@ -234,6 +240,7 @@ typedef enum
*/ */
typedef enum typedef enum
{ {
TidyDoctypeHtml5, /**< <!DOCTYPE html> */
TidyDoctypeOmit, /**< Omit DOCTYPE altogether */ TidyDoctypeOmit, /**< Omit DOCTYPE altogether */
TidyDoctypeAuto, /**< Keep DOCTYPE in input. Set version to content */ TidyDoctypeAuto, /**< Keep DOCTYPE in input. Set version to content */
TidyDoctypeStrict, /**< Convert document to HTML 4 strict content model */ TidyDoctypeStrict, /**< Convert document to HTML 4 strict content model */
@ -436,16 +443,20 @@ typedef enum
TidyTag_ARTICLE, TidyTag_ARTICLE,
TidyTag_ASIDE, TidyTag_ASIDE,
TidyTag_AUDIO, TidyTag_AUDIO,
TidyTag_BDI,
TidyTag_CANVAS, TidyTag_CANVAS,
TidyTag_COMMAND, TidyTag_COMMAND,
TidyTag_DATALIST, TidyTag_DATALIST,
TidyTag_DETAILS, TidyTag_DETAILS,
TidyTag_DIALOG,
TidyTag_FIGCAPTION, TidyTag_FIGCAPTION,
TidyTag_FIGURE, TidyTag_FIGURE,
TidyTag_FOOTER, TidyTag_FOOTER,
TidyTag_HEADER, TidyTag_HEADER,
TidyTag_HGROUP, TidyTag_HGROUP,
TidyTag_MAIN,
TidyTag_MARK, TidyTag_MARK,
TidyTag_MENUITEM,
TidyTag_METER, TidyTag_METER,
TidyTag_NAV, TidyTag_NAV,
TidyTag_OUTPUT, TidyTag_OUTPUT,
@ -531,6 +542,7 @@ typedef enum
TidyAttr_HTTP_EQUIV, /**< HTTP_EQUIV= */ TidyAttr_HTTP_EQUIV, /**< HTTP_EQUIV= */
TidyAttr_ID, /**< ID= */ TidyAttr_ID, /**< ID= */
TidyAttr_ISMAP, /**< ISMAP= */ TidyAttr_ISMAP, /**< ISMAP= */
TidyAttr_ITEMPROP, /**< ITEMPROP= */
TidyAttr_LABEL, /**< LABEL= */ TidyAttr_LABEL, /**< LABEL= */
TidyAttr_LANG, /**< LANG= */ TidyAttr_LANG, /**< LANG= */
TidyAttr_LANGUAGE, /**< LANGUAGE= */ TidyAttr_LANGUAGE, /**< LANGUAGE= */

File diff suppressed because it is too large Load diff

View file

@ -125,14 +125,18 @@ extern const AttrVersion TY_(W3CAttrsFor_HGROUP)[];
extern const AttrVersion TY_(W3CAttrsFor_FIGURE)[]; extern const AttrVersion TY_(W3CAttrsFor_FIGURE)[];
extern const AttrVersion TY_(W3CAttrsFor_ARTICLE)[]; extern const AttrVersion TY_(W3CAttrsFor_ARTICLE)[];
extern const AttrVersion TY_(W3CAttrsFor_ASIDE)[]; extern const AttrVersion TY_(W3CAttrsFor_ASIDE)[];
extern const AttrVersion TY_(W3CAttrsFor_BDI)[];
extern const AttrVersion TY_(W3CAttrsFor_NAV)[]; extern const AttrVersion TY_(W3CAttrsFor_NAV)[];
extern const AttrVersion TY_(W3CAttrsFor_SECTION)[]; extern const AttrVersion TY_(W3CAttrsFor_SECTION)[];
extern const AttrVersion TY_(W3CAttrsFor_FOOTER)[]; extern const AttrVersion TY_(W3CAttrsFor_FOOTER)[];
extern const AttrVersion TY_(W3CAttrsFor_HEADER)[]; extern const AttrVersion TY_(W3CAttrsFor_HEADER)[];
extern const AttrVersion TY_(W3CAttrsFor_DETAILS)[]; extern const AttrVersion TY_(W3CAttrsFor_DETAILS)[];
extern const AttrVersion TY_(W3CAttrsFor_DIALOG)[];
extern const AttrVersion TY_(W3CAttrsFor_COMMAND)[]; extern const AttrVersion TY_(W3CAttrsFor_COMMAND)[];
extern const AttrVersion TY_(W3CAttrsFor_MAIN)[];
extern const AttrVersion TY_(W3CAttrsFor_MARK)[]; extern const AttrVersion TY_(W3CAttrsFor_MARK)[];
extern const AttrVersion TY_(W3CAttrsFor_OUTPUT)[]; extern const AttrVersion TY_(W3CAttrsFor_OUTPUT)[];
extern const AttrVersion TY_(W3CAttrsFor_MENUITEM)[];
extern const AttrVersion TY_(W3CAttrsFor_METER)[]; extern const AttrVersion TY_(W3CAttrsFor_METER)[];
extern const AttrVersion TY_(W3CAttrsFor_PROGRESS)[]; extern const AttrVersion TY_(W3CAttrsFor_PROGRESS)[];
extern const AttrVersion TY_(W3CAttrsFor_TIME)[]; extern const AttrVersion TY_(W3CAttrsFor_TIME)[];
@ -141,5 +145,8 @@ extern const AttrVersion TY_(W3CAttrsFor_AUDIO)[];
extern const AttrVersion TY_(W3CAttrsFor_VIDEO)[]; extern const AttrVersion TY_(W3CAttrsFor_VIDEO)[];
extern const AttrVersion TY_(W3CAttrsFor_CANVAS)[]; extern const AttrVersion TY_(W3CAttrsFor_CANVAS)[];
extern const AttrVersion TY_(W3CAttrsFor_SOURCE)[]; extern const AttrVersion TY_(W3CAttrsFor_SOURCE)[];
extern const AttrVersion TY_(W3CAttrsFor_EMBED)[];
extern const AttrVersion TY_(W3CAttrsFor_KEYGEN)[];
extern const AttrVersion TY_(W3CAttrsFor_WBR)[];
#endif /* __ATTRDICT_H__ */ #endif /* __ATTRDICT_H__ */

View file

@ -152,6 +152,7 @@ static const Attribute attribute_defs [] =
{ TidyAttr_HTTP_EQUIV, "http-equiv", CH_PCDATA }, /* META */ { TidyAttr_HTTP_EQUIV, "http-equiv", CH_PCDATA }, /* META */
{ TidyAttr_ID, "id", CH_IDDEF }, { TidyAttr_ID, "id", CH_IDDEF },
{ TidyAttr_ISMAP, "ismap", CH_BOOL }, /* IMG */ { TidyAttr_ISMAP, "ismap", CH_BOOL }, /* IMG */
{ TidyAttr_ITEMPROP, "itemprop", CH_PCDATA },
{ TidyAttr_LABEL, "label", CH_PCDATA }, /* OPT, OPTGROUP */ { TidyAttr_LABEL, "label", CH_PCDATA }, /* OPT, OPTGROUP */
{ TidyAttr_LANG, "lang", CH_LANG }, { TidyAttr_LANG, "lang", CH_LANG },
{ TidyAttr_LANGUAGE, "language", CH_PCDATA }, /* SCRIPT */ { TidyAttr_LANGUAGE, "language", CH_PCDATA }, /* SCRIPT */
@ -253,7 +254,7 @@ static const Attribute attribute_defs [] =
{ TidyAttr_SDASUFF, "sdasuff", CH_PCDATA }, /* SDATA attribute in HTML 2.0 */ { TidyAttr_SDASUFF, "sdasuff", CH_PCDATA }, /* SDATA attribute in HTML 2.0 */
{ TidyAttr_URN, "urn", CH_PCDATA }, /* for <a>, never implemented */ { TidyAttr_URN, "urn", CH_PCDATA }, /* for <a>, never implemented */
/* "HTML5" */ /* HTML5 */
{ TidyAttr_ASYNC, "async", CH_PCDATA }, { TidyAttr_ASYNC, "async", CH_PCDATA },
{ TidyAttr_AUTOCOMPLETE, "autocomplete", CH_PCDATA }, { TidyAttr_AUTOCOMPLETE, "autocomplete", CH_PCDATA },
{ TidyAttr_AUTOFOCUS, "autofocus", CH_PCDATA }, { TidyAttr_AUTOFOCUS, "autofocus", CH_PCDATA },
@ -362,7 +363,7 @@ static uint AttributeVersions(Node* node, AttVal* attval)
{ {
uint i; uint i;
/* "HTML5" data-* attributes */ /* HTML5 data-* attributes */
if (attval && attval->attribute) if (attval && attval->attribute)
if (TY_(tmbstrncmp)(attval->attribute, "data-", 5) == 0) if (TY_(tmbstrncmp)(attval->attribute, "data-", 5) == 0)
return (XH50 | HT50); return (XH50 | HT50);
@ -744,6 +745,27 @@ AttVal* TY_(GetAttrByName)( Node *node, ctmbstr name )
return attr; return attr;
} }
void TY_(DropAttrByName)( TidyDocImpl* doc, Node *node, ctmbstr name )
{
AttVal *attr, *prev = NULL, *next;
for (attr = node->attributes; attr != NULL; prev = attr, attr = next)
{
next = attr->next;
if (attr->attribute && TY_(tmbstrcmp)(attr->attribute, name) == 0)
{
if (prev)
prev->next = next;
else
node->attributes = next;
TY_(FreeAttribute)( doc, attr );
break;
}
}
}
AttVal* TY_(AddAttribute)( TidyDocImpl* doc, AttVal* TY_(AddAttribute)( TidyDocImpl* doc,
Node *node, ctmbstr name, ctmbstr value ) Node *node, ctmbstr name, ctmbstr value )
{ {
@ -1360,11 +1382,8 @@ Bool TY_(IsValidHTMLID)(ctmbstr id)
if (!s) if (!s)
return no; return no;
if (!TY_(IsLetter)(*s++))
return no;
while (*s) while (*s)
if (!TY_(IsNamechar)(*s++)) if (TY_(IsHTMLSpace)(*s++))
return no; return no;
return yes; return yes;
@ -1807,9 +1826,11 @@ void CheckLang( TidyDocImpl* doc, Node *node, AttVal *attval)
/* checks type attribute */ /* checks type attribute */
void CheckType( TidyDocImpl* doc, Node *node, AttVal *attval) void CheckType( TidyDocImpl* doc, Node *node, AttVal *attval)
{ {
ctmbstr const valuesINPUT[] = {"text", "password", "checkbox", "radio", ctmbstr const valuesINPUT[] = {
"submit", "reset", "file", "hidden", "text", "password", "checkbox", "radio", "submit", "reset", "file",
"image", "button", NULL}; "hidden", "image", "button", "color", "date", "datetime",
"datetime-local", "email", "month", "number", "range", "search",
"tel", "time", "url", "week", NULL};
ctmbstr const valuesBUTTON[] = {"button", "submit", "reset", NULL}; ctmbstr const valuesBUTTON[] = {"button", "submit", "reset", NULL};
ctmbstr const valuesUL[] = {"disc", "square", "circle", NULL}; ctmbstr const valuesUL[] = {"disc", "square", "circle", NULL};
ctmbstr const valuesOL[] = {"1", "a", "i", NULL}; ctmbstr const valuesOL[] = {"1", "a", "i", NULL};

View file

@ -81,6 +81,8 @@ const Attribute* TY_(FindAttribute)( TidyDocImpl* doc, AttVal *attval );
AttVal* TY_(GetAttrByName)( Node *node, ctmbstr name ); AttVal* TY_(GetAttrByName)( Node *node, ctmbstr name );
void TY_(DropAttrByName)( TidyDocImpl* doc, Node *node, ctmbstr name );
AttVal* TY_(AddAttribute)( TidyDocImpl* doc, AttVal* TY_(AddAttribute)( TidyDocImpl* doc,
Node *node, ctmbstr name, ctmbstr value ); Node *node, ctmbstr name, ctmbstr value );
@ -217,6 +219,7 @@ uint TY_(NodeAttributeVersions)( Node* node, TidyAttrId id );
#define attrIsHTTP_EQUIV(av) AttrIsId( av, TidyAttr_HTTP_EQUIV ) #define attrIsHTTP_EQUIV(av) AttrIsId( av, TidyAttr_HTTP_EQUIV )
#define attrIsID(av) AttrIsId( av, TidyAttr_ID ) #define attrIsID(av) AttrIsId( av, TidyAttr_ID )
#define attrIsISMAP(av) AttrIsId( av, TidyAttr_ISMAP ) #define attrIsISMAP(av) AttrIsId( av, TidyAttr_ISMAP )
#define attrIsITEMPROP(av) AttrIsId( av, TidyAttr_ITEMPROP )
#define attrIsLABEL(av) AttrIsId( av, TidyAttr_LABEL ) #define attrIsLABEL(av) AttrIsId( av, TidyAttr_LABEL )
#define attrIsLANG(av) AttrIsId( av, TidyAttr_LANG ) #define attrIsLANG(av) AttrIsId( av, TidyAttr_LANG )
#define attrIsLANGUAGE(av) AttrIsId( av, TidyAttr_LANGUAGE ) #define attrIsLANGUAGE(av) AttrIsId( av, TidyAttr_LANGUAGE )

View file

@ -4,9 +4,6 @@
(c) 1998-2008 (W3C) MIT, ERCIM, Keio University (c) 1998-2008 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice. See tidy.h for the copyright notice.
*/
/*
config files associate a property name with a value. config files associate a property name with a value.
// comments can start at the beginning of a line // comments can start at the beginning of a line
@ -130,6 +127,7 @@ static const ctmbstr newlinePicks[] =
static const ctmbstr doctypePicks[] = static const ctmbstr doctypePicks[] =
{ {
"html5",
"omit", "omit",
"auto", "auto",
"strict", "strict",
@ -200,7 +198,7 @@ static ParseProperty ParseSorter;
static ParseProperty ParseCharEnc; static ParseProperty ParseCharEnc;
static ParseProperty ParseNewline; static ParseProperty ParseNewline;
/* omit | auto | strict | loose | <fpi> */ /* html5 | omit | auto | strict | loose | <fpi> */
static ParseProperty ParseDocType; static ParseProperty ParseDocType;
/* keep-first or keep-last? */ /* keep-first or keep-last? */
@ -213,9 +211,9 @@ static const TidyOptionImpl option_defs[] =
{ TidyIndentSpaces, PP, "indent-spaces", IN, 2, ParseInt, NULL }, { TidyIndentSpaces, PP, "indent-spaces", IN, 2, ParseInt, NULL },
{ TidyWrapLen, PP, "wrap", IN, 68, ParseInt, NULL }, { TidyWrapLen, PP, "wrap", IN, 68, ParseInt, NULL },
{ TidyTabSize, PP, "tab-size", IN, 8, ParseInt, NULL }, { TidyTabSize, PP, "tab-size", IN, 8, ParseInt, NULL },
{ TidyCharEncoding, CE, "char-encoding", IN, ASCII, ParseCharEnc, charEncPicks }, { TidyCharEncoding, CE, "char-encoding", IN, UTF8, ParseCharEnc, charEncPicks },
{ TidyInCharEncoding, CE, "input-encoding", IN, LATIN1, ParseCharEnc, charEncPicks }, { TidyInCharEncoding, CE, "input-encoding", IN, UTF8, ParseCharEnc, charEncPicks },
{ TidyOutCharEncoding, CE, "output-encoding", IN, ASCII, ParseCharEnc, charEncPicks }, { TidyOutCharEncoding, CE, "output-encoding", IN, UTF8, ParseCharEnc, charEncPicks },
{ TidyNewline, CE, "newline", IN, DLF, ParseNewline, newlinePicks }, { TidyNewline, CE, "newline", IN, DLF, ParseNewline, newlinePicks },
{ TidyDoctypeMode, MU, "doctype-mode", IN, TidyDoctypeAuto, NULL, doctypePicks }, { TidyDoctypeMode, MU, "doctype-mode", IN, TidyDoctypeAuto, NULL, doctypePicks },
{ TidyDoctype, MU, "doctype", ST, 0, ParseDocType, doctypePicks }, { TidyDoctype, MU, "doctype", ST, 0, ParseDocType, doctypePicks },
@ -229,9 +227,12 @@ static const TidyOptionImpl option_defs[] =
{ TidyOutFile, MS, "output-file", ST, 0, ParseString, NULL }, { TidyOutFile, MS, "output-file", ST, 0, ParseString, NULL },
{ TidyWriteBack, MS, "write-back", BL, no, ParseBool, boolPicks }, { TidyWriteBack, MS, "write-back", BL, no, ParseBool, boolPicks },
{ TidyShowMarkup, PP, "markup", BL, yes, ParseBool, boolPicks }, { TidyShowMarkup, PP, "markup", BL, yes, ParseBool, boolPicks },
{ TidyShowInfo, DG, "show-info", BL, yes, ParseBool, boolPicks },
{ TidyShowWarnings, DG, "show-warnings", BL, yes, ParseBool, boolPicks }, { TidyShowWarnings, DG, "show-warnings", BL, yes, ParseBool, boolPicks },
{ TidyQuiet, MS, "quiet", BL, no, ParseBool, boolPicks }, { TidyQuiet, MS, "quiet", BL, no, ParseBool, boolPicks },
{ TidyIndentContent, PP, "indent", IN, TidyNoState, ParseAutoBool, autoBoolPicks }, { TidyIndentContent, PP, "indent", IN, TidyNoState, ParseAutoBool, autoBoolPicks },
{ TidyCoerceEndTags, MU, "coerce-endtags", BL, yes, ParseBool, boolPicks },
{ TidyOmitOptionalTags, MU, "omit-optional-tags", BL, no, ParseBool, boolPicks },
{ TidyHideEndTags, MU, "hide-endtags", BL, no, ParseBool, boolPicks }, { TidyHideEndTags, MU, "hide-endtags", BL, no, ParseBool, boolPicks },
{ TidyXmlTags, MU, "input-xml", BL, no, ParseBool, boolPicks }, { TidyXmlTags, MU, "input-xml", BL, no, ParseBool, boolPicks },
{ TidyXmlOut, MU, "output-xml", BL, no, ParseBool, boolPicks }, { TidyXmlOut, MU, "output-xml", BL, no, ParseBool, boolPicks },
@ -242,9 +243,11 @@ static const TidyOptionImpl option_defs[] =
{ TidyUpperCaseAttrs, MU, "uppercase-attributes", BL, no, ParseBool, boolPicks }, { TidyUpperCaseAttrs, MU, "uppercase-attributes", BL, no, ParseBool, boolPicks },
{ TidyMakeBare, MU, "bare", BL, no, ParseBool, boolPicks }, { TidyMakeBare, MU, "bare", BL, no, ParseBool, boolPicks },
{ TidyMakeClean, MU, "clean", BL, no, ParseBool, boolPicks }, { TidyMakeClean, MU, "clean", BL, no, ParseBool, boolPicks },
{ TidyGDocClean, MU, "gdoc", BL, no, ParseBool, boolPicks },
{ TidyLogicalEmphasis, MU, "logical-emphasis", BL, no, ParseBool, boolPicks }, { TidyLogicalEmphasis, MU, "logical-emphasis", BL, no, ParseBool, boolPicks },
{ TidyDropPropAttrs, MU, "drop-proprietary-attributes", BL, no, ParseBool, boolPicks }, { TidyDropPropAttrs, MU, "drop-proprietary-attributes", BL, no, ParseBool, boolPicks },
{ TidyDropFontTags, MU, "drop-font-tags", BL, no, ParseBool, boolPicks }, { TidyDropFontTags, MU, "drop-font-tags", BL, no, ParseBool, boolPicks },
{ TidyDropEmptyElems, MU, "drop-empty-elements", BL, yes, ParseBool, boolPicks },
{ TidyDropEmptyParas, MU, "drop-empty-paras", BL, yes, ParseBool, boolPicks }, { TidyDropEmptyParas, MU, "drop-empty-paras", BL, yes, ParseBool, boolPicks },
{ TidyFixComments, MU, "fix-bad-comments", BL, yes, ParseBool, boolPicks }, { TidyFixComments, MU, "fix-bad-comments", BL, yes, ParseBool, boolPicks },
{ TidyBreakBeforeBR, PP, "break-before-br", BL, no, ParseBool, boolPicks }, { TidyBreakBeforeBR, PP, "break-before-br", BL, no, ParseBool, boolPicks },
@ -303,6 +306,7 @@ static const TidyOptionImpl option_defs[] =
#if SUPPORT_ASIAN_ENCODINGS #if SUPPORT_ASIAN_ENCODINGS
{ TidyPunctWrap, PP, "punctuation-wrap", BL, no, ParseBool, boolPicks }, { TidyPunctWrap, PP, "punctuation-wrap", BL, no, ParseBool, boolPicks },
#endif #endif
{ TidyMergeEmphasis, MU, "merge-emphasis", BL, yes, ParseBool, boolPicks },
{ TidyMergeDivs, MU, "merge-divs", IN, TidyAutoState, ParseAutoBool, autoBoolPicks }, { TidyMergeDivs, MU, "merge-divs", IN, TidyAutoState, ParseAutoBool, autoBoolPicks },
{ TidyDecorateInferredUL, MU, "decorate-inferred-ul", BL, no, ParseBool, boolPicks }, { TidyDecorateInferredUL, MU, "decorate-inferred-ul", BL, no, ParseBool, boolPicks },
{ TidyPreserveEntities, MU, "preserve-entities", BL, no, ParseBool, boolPicks }, { TidyPreserveEntities, MU, "preserve-entities", BL, no, ParseBool, boolPicks },
@ -1425,7 +1429,7 @@ ctmbstr TY_(CharEncodingOptName)( int encoding )
} }
/* /*
doctype: omit | auto | strict | loose | <fpi> doctype: html5 | omit | auto | strict | loose | <fpi>
where the fpi is a string similar to where the fpi is a string similar to
@ -1462,6 +1466,8 @@ Bool ParseDocType( TidyDocImpl* doc, const TidyOptionImpl* option )
if ( TY_(tmbstrcasecmp)(buf, "auto") == 0 ) if ( TY_(tmbstrcasecmp)(buf, "auto") == 0 )
dtmode = TidyDoctypeAuto; dtmode = TidyDoctypeAuto;
else if ( TY_(tmbstrcasecmp)(buf, "html5") == 0 )
dtmode = TidyDoctypeHtml5;
else if ( TY_(tmbstrcasecmp)(buf, "omit") == 0 ) else if ( TY_(tmbstrcasecmp)(buf, "omit") == 0 )
dtmode = TidyDoctypeOmit; dtmode = TidyDoctypeOmit;
else if ( TY_(tmbstrcasecmp)(buf, "strict") == 0 ) else if ( TY_(tmbstrcasecmp)(buf, "strict") == 0 )

View file

@ -39,6 +39,13 @@
#include "clean.h" #include "clean.h"
#include "utf8.h" #include "utf8.h"
#include "streamio.h" #include "streamio.h"
#ifdef _MSC_VER
#include "sprtf.h"
#endif
#ifndef SPRTF
#define SPRTF printf
#endif
/* Forward references /* Forward references
*/ */
@ -113,6 +120,9 @@ int TY_(HTMLVersion)(TidyDocImpl* doc)
!cfgBool(doc, TidyHtmlOut); !cfgBool(doc, TidyHtmlOut);
Bool html4 = dtmode == TidyDoctypeStrict || dtmode == TidyDoctypeLoose || VERS_FROM40 & dtver; Bool html4 = dtmode == TidyDoctypeStrict || dtmode == TidyDoctypeLoose || VERS_FROM40 & dtver;
if (xhtml && dtver == VERS_UNKNOWN) return XH50;
if (dtver == VERS_UNKNOWN) return HT50;
for (i = 0; W3C_Doctypes[i].name; ++i) for (i = 0; W3C_Doctypes[i].name; ++i)
{ {
if ((xhtml && !(VERS_XHTML & W3C_Doctypes[i].vers)) || if ((xhtml && !(VERS_XHTML & W3C_Doctypes[i].vers)) ||
@ -171,7 +181,7 @@ static uint GetVersFromFPI(ctmbstr fpi)
uint i; uint i;
for (i = 0; W3C_Doctypes[i].name; ++i) for (i = 0; W3C_Doctypes[i].name; ++i)
if (TY_(tmbstrcasecmp)(W3C_Doctypes[i].fpi, fpi) == 0) if (W3C_Doctypes[i].fpi != NULL && TY_(tmbstrcasecmp)(W3C_Doctypes[i].fpi, fpi) == 0)
return W3C_Doctypes[i].vers; return W3C_Doctypes[i].vers;
return 0; return 0;
@ -224,6 +234,11 @@ Bool TY_(IsLetter)(uint c)
return (map & letter)!=0; return (map & letter)!=0;
} }
Bool TY_(IsHTMLSpace)(uint c)
{
return c == 0x020 || c == 0x009 || c == 0x00a || c == 0x00c || c == 0x00d;
}
Bool TY_(IsNamechar)(uint c) Bool TY_(IsNamechar)(uint c)
{ {
uint map = MAP(c); uint map = MAP(c);
@ -1393,10 +1408,10 @@ Bool TY_(AddGenerator)( TidyDocImpl* doc )
if (head) if (head)
{ {
#ifdef PLATFORM_NAME #ifdef PLATFORM_NAME
TY_(tmbsnprintf)(buf, sizeof(buf), "HTML Tidy for "PLATFORM_NAME" (vers %s), see www.w3.org", TY_(tmbsnprintf)(buf, sizeof(buf), "HTML Tidy for HTML5 (experimental) for "PLATFORM_NAME" %s",
tidyReleaseDate()); tidyReleaseDate());
#else #else
TY_(tmbsnprintf)(buf, sizeof(buf), "HTML Tidy (vers %s), see www.w3.org", tidyReleaseDate()); TY_(tmbsnprintf)(buf, sizeof(buf), "HTML Tidy for HTML5 (experimental) %s", tidyReleaseDate());
#endif #endif
for ( node = head->content; node; node = node->next ) for ( node = head->content; node; node = node->next )
@ -1562,6 +1577,12 @@ Bool TY_(SetXHTMLDocType)( TidyDocImpl* doc )
switch(dtmode) switch(dtmode)
{ {
case TidyDoctypeHtml5:
/* HTML5 */
TY_(RepairAttrValue)(doc, doctype, pub, NULL);
TY_(RepairAttrValue)(doc, doctype, sys, NULL);
lexer->versionEmitted = XH50;
break;
case TidyDoctypeStrict: case TidyDoctypeStrict:
/* XHTML 1.0 Strict */ /* XHTML 1.0 Strict */
TY_(RepairAttrValue)(doc, doctype, pub, GetFPIFromVers(X10S)); TY_(RepairAttrValue)(doc, doctype, pub, GetFPIFromVers(X10S));
@ -1580,7 +1601,11 @@ Bool TY_(SetXHTMLDocType)( TidyDocImpl* doc )
TY_(RepairAttrValue)(doc, doctype, sys, ""); TY_(RepairAttrValue)(doc, doctype, sys, "");
break; break;
case TidyDoctypeAuto: case TidyDoctypeAuto:
if (lexer->versions & XH11 && lexer->doctype == XH11) if (lexer->doctype == VERS_UNKNOWN) {
lexer->versionEmitted = XH50;
return yes;
}
else if (lexer->versions & XH11 && lexer->doctype == XH11)
{ {
if (!TY_(GetAttrByName)(doctype, sys)) if (!TY_(GetAttrByName)(doctype, sys))
TY_(RepairAttrValue)(doc, doctype, sys, GetSIFromVers(XH11)); TY_(RepairAttrValue)(doc, doctype, sys, GetSIFromVers(XH11));
@ -1618,10 +1643,6 @@ Bool TY_(SetXHTMLDocType)( TidyDocImpl* doc )
TY_(RepairAttrValue)(doc, doctype, sys, GetSIFromVers(X10T)); TY_(RepairAttrValue)(doc, doctype, sys, GetSIFromVers(X10T));
lexer->versionEmitted = X10T; lexer->versionEmitted = X10T;
} }
else if (lexer->versions & XH50)
{
lexer->versionEmitted = XH50;
}
else else
{ {
if (doctype) if (doctype)
@ -1678,6 +1699,9 @@ Bool TY_(FixDocType)( TidyDocImpl* doc )
switch (dtmode) switch (dtmode)
{ {
case TidyDoctypeHtml5:
guessed = HT50;
break;
case TidyDoctypeStrict: case TidyDoctypeStrict:
guessed = H41S; guessed = H41S;
break; break;
@ -2010,6 +2034,7 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode );
Node* TY_(GetToken)( TidyDocImpl* doc, GetTokenMode mode ) Node* TY_(GetToken)( TidyDocImpl* doc, GetTokenMode mode )
{ {
Node *node;
Lexer* lexer = doc->lexer; Lexer* lexer = doc->lexer;
if (lexer->pushed || lexer->itoken) if (lexer->pushed || lexer->itoken)
@ -2030,33 +2055,61 @@ Node* TY_(GetToken)( TidyDocImpl* doc, GetTokenMode mode )
/* duplicate inlines in preference to pushed text nodes when appropriate */ /* duplicate inlines in preference to pushed text nodes when appropriate */
lexer->pushed = no; lexer->pushed = no;
if (lexer->token->type != TextNode if (lexer->token->type != TextNode
|| !(lexer->insert || lexer->inode)) || !(lexer->insert || lexer->inode)) {
return lexer->token; node = lexer->token;
return lexer->itoken = TY_(InsertedToken)( doc ); #if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Returning pushed token '%s'...\n", node->element ? node->element : "<blank>");
#endif
return node;
}
lexer->itoken = TY_(InsertedToken)( doc );
node = lexer->token;
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Returning inserted token '%s'...\n", node->element ? node->element : "<blank>");
#endif
return node;
} }
assert( !(lexer->pushed || lexer->itoken) ); assert( !(lexer->pushed || lexer->itoken) );
/* at start of block elements, unclosed inline /* at start of block elements, unclosed inline
elements are inserted into the token stream */ elements are inserted into the token stream */
if (lexer->insert || lexer->inode) if (lexer->insert || lexer->inode) {
return lexer->token = TY_(InsertedToken)( doc ); lexer->token = TY_(InsertedToken)( doc );
node = lexer->token;
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Returning Inserted token '%s'...\n", node->element ? node->element : "<blank>");
#endif
return node;
}
if (mode == CdataContent) if (mode == CdataContent)
{ {
assert( lexer->parent != NULL ); assert( lexer->parent != NULL );
return GetCDATA(doc, lexer->parent); node = GetCDATA(doc, lexer->parent);
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Returning Cdatacontent token '%s'...\n", node->element ? node->element : "<blank>");
#endif
return node;
} }
return GetTokenFromStream( doc, mode ); return GetTokenFromStream( doc, mode );
} }
#if !defined(NDEBUG) && defined(_MSC_VER)
static void check_me(char *name)
{
SPRTF("Have node %s\n", name);
}
#endif
static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode ) static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
{ {
Lexer* lexer = doc->lexer; Lexer* lexer = doc->lexer;
uint c, badcomment = 0; uint c, badcomment = 0;
Bool isempty = no; Bool isempty = no;
AttVal *attributes = NULL; AttVal *attributes = NULL;
Node *node;
/* Lexer->token must be set on return. Nullify it for safety. */ /* Lexer->token must be set on return. Nullify it for safety. */
lexer->token = NULL; lexer->token = NULL;
@ -2170,7 +2223,11 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
#ifdef TIDY_STORE_ORIGINAL_TEXT #ifdef TIDY_STORE_ORIGINAL_TEXT
StoreOriginalTextInToken(doc, lexer->token, 3); StoreOriginalTextInToken(doc, lexer->token, 3);
#endif #endif
return lexer->token; node = lexer->token;
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Returning text token len %d...\n", node->end - node->start );
#endif
return node;
} }
continue; /* no text so keep going */ continue; /* no text so keep going */
@ -2397,7 +2454,11 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
#ifdef TIDY_STORE_ORIGINAL_TEXT #ifdef TIDY_STORE_ORIGINAL_TEXT
StoreOriginalTextInToken(doc, lexer->token, 0); /* hmm... */ StoreOriginalTextInToken(doc, lexer->token, 0); /* hmm... */
#endif #endif
return lexer->token; /* the endtag token */ node = lexer->token;
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Returning endtag token '%s'...\n", node->element ? node->element : "<blank>");
#endif
return node; /* the endtag token */
case LEX_STARTTAG: /* first letter of tagname */ case LEX_STARTTAG: /* first letter of tagname */
c = TY_(ReadChar)(doc->docIn); c = TY_(ReadChar)(doc->docIn);
@ -2471,7 +2532,19 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
#ifdef TIDY_STORE_ORIGINAL_TEXT #ifdef TIDY_STORE_ORIGINAL_TEXT
StoreOriginalTextInToken(doc, lexer->token, 0); StoreOriginalTextInToken(doc, lexer->token, 0);
#endif #endif
return lexer->token; /* return start tag */ node = lexer->token;
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Returning starttag token '%s'...\n", node->element ? node->element : "<blank>");
if (node->element) {
//if (stricmp(node->element,"datalist") == 0) {
// check_me(node->element);
//} else
if (stricmp(node->element,"option") == 0) {
check_me(node->element);
}
}
#endif
return node; /* return start tag */
case LEX_COMMENT: /* seen <!-- so look for --> */ case LEX_COMMENT: /* seen <!-- so look for --> */
@ -2509,7 +2582,11 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
else else
TY_(UngetChar)(c, doc->docIn); TY_(UngetChar)(c, doc->docIn);
return lexer->token; node = lexer->token;
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Returning comment token '%s'...\n", node->element ? node->element : "<blank>");
#endif
return node;
} }
/* note position of first such error in the comment */ /* note position of first such error in the comment */
@ -2554,7 +2631,11 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
/* make a note of the version named by the 1st doctype */ /* make a note of the version named by the 1st doctype */
if (lexer->doctype == VERS_UNKNOWN && lexer->token && !cfgBool(doc, TidyXmlTags)) if (lexer->doctype == VERS_UNKNOWN && lexer->token && !cfgBool(doc, TidyXmlTags))
lexer->doctype = FindGivenVersion(doc, lexer->token); lexer->doctype = FindGivenVersion(doc, lexer->token);
return lexer->token; node = lexer->token;
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Returning doctype token '%s'...\n", node->element ? node->element : "<blank>");
#endif
return node;
case LEX_PROCINSTR: /* seen <? so look for '>' */ case LEX_PROCINSTR: /* seen <? so look for '>' */
/* check for PHP preprocessor instructions <?php ... ?> */ /* check for PHP preprocessor instructions <?php ... ?> */
@ -2636,7 +2717,11 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
lexer->state = LEX_CONTENT; lexer->state = LEX_CONTENT;
lexer->waswhite = no; lexer->waswhite = no;
return lexer->token; node = lexer->token;
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Returning procinstr token '%s'...\n", node->element ? node->element : "<blank>");
#endif
return node;
case LEX_ASP: /* seen <% so look for "%>" */ case LEX_ASP: /* seen <% so look for "%>" */
if (c != '%') if (c != '%')
@ -2657,7 +2742,14 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
lexer->lexbuf[lexer->lexsize] = '\0'; lexer->lexbuf[lexer->lexsize] = '\0';
lexer->state = LEX_CONTENT; lexer->state = LEX_CONTENT;
lexer->waswhite = no; lexer->waswhite = no;
return lexer->token = AspToken(doc); lexer->token = AspToken(doc);
node = lexer->token;
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Returning ASP token '%s'...\n", node->element ? node->element : "<blank>");
#endif
return node; /* the endtag token */
case LEX_JSTE: /* seen <# so look for "#>" */ case LEX_JSTE: /* seen <# so look for "#>" */
if (c != '#') if (c != '#')
@ -2678,7 +2770,13 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
lexer->lexbuf[lexer->lexsize] = '\0'; lexer->lexbuf[lexer->lexsize] = '\0';
lexer->state = LEX_CONTENT; lexer->state = LEX_CONTENT;
lexer->waswhite = no; lexer->waswhite = no;
return lexer->token = JsteToken(doc); lexer->token = JsteToken(doc);
node = lexer->token;
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Returning JSTE token '%s'...\n", node->element ? node->element : "<blank>");
#endif
return node; /* the JSTE token */
case LEX_PHP: /* seen "<?php" so look for "?>" */ case LEX_PHP: /* seen "<?php" so look for "?>" */
if (c != '?') if (c != '?')
@ -2698,7 +2796,12 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
lexer->lexbuf[lexer->lexsize] = '\0'; lexer->lexbuf[lexer->lexsize] = '\0';
lexer->state = LEX_CONTENT; lexer->state = LEX_CONTENT;
lexer->waswhite = no; lexer->waswhite = no;
return lexer->token = PhpToken(doc); lexer->token = PhpToken(doc);
node = lexer->token;
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Returning PHP token '%s'...\n", node->element ? node->element : "<blank>");
#endif
return node; /* the PHP token */
case LEX_XMLDECL: /* seen "<?xml" so look for "?>" */ case LEX_XMLDECL: /* seen "<?xml" so look for "?>" */
@ -2728,7 +2831,11 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
lexer->waswhite = no; lexer->waswhite = no;
lexer->token = XmlDeclToken(doc); lexer->token = XmlDeclToken(doc);
lexer->token->attributes = attributes; lexer->token->attributes = attributes;
return lexer->token; node = lexer->token;
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Returning xml token '%s'...\n", node->element ? node->element : "<blank>");
#endif
return node; /* the xml token */
} }
av = TY_(NewAttribute)(doc); av = TY_(NewAttribute)(doc);
@ -2756,7 +2863,11 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
lexer->waswhite = no; lexer->waswhite = no;
lexer->token = XmlDeclToken(doc); lexer->token = XmlDeclToken(doc);
lexer->token->attributes = attributes; lexer->token->attributes = attributes;
return lexer->token; node = lexer->token;
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Returning XML token '%s'...\n", node->element ? node->element : "<blank>");
#endif
return node; /* the XML token */
case LEX_SECTION: /* seen "<![" so look for "]>" */ case LEX_SECTION: /* seen "<![" so look for "]>" */
if (c == '[') if (c == '[')
@ -2787,7 +2898,12 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
lexer->lexbuf[lexer->lexsize] = '\0'; lexer->lexbuf[lexer->lexsize] = '\0';
lexer->state = LEX_CONTENT; lexer->state = LEX_CONTENT;
lexer->waswhite = no; lexer->waswhite = no;
return lexer->token = SectionToken(doc); lexer->token = SectionToken(doc);
node = lexer->token;
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Returning SECTION token '%s'...\n", node->element ? node->element : "<blank>");
#endif
return node; /* the SECTION token */
case LEX_CDATA: /* seen "<![CDATA[" so look for "]]>" */ case LEX_CDATA: /* seen "<![CDATA[" so look for "]]>" */
if (c != ']') if (c != ']')
@ -2817,7 +2933,12 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
lexer->lexbuf[lexer->lexsize] = '\0'; lexer->lexbuf[lexer->lexsize] = '\0';
lexer->state = LEX_CONTENT; lexer->state = LEX_CONTENT;
lexer->waswhite = no; lexer->waswhite = no;
return lexer->token = CDATAToken(doc); lexer->token = CDATAToken(doc);
node = lexer->token;
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Returning CDATA token '%s'...\n", node->element ? node->element : "<blank>");
#endif
return node; /* the CDATA token */
} }
} }
@ -2838,7 +2959,11 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
#ifdef TIDY_STORE_ORIGINAL_TEXT #ifdef TIDY_STORE_ORIGINAL_TEXT
StoreOriginalTextInToken(doc, lexer->token, 0); /* ? */ StoreOriginalTextInToken(doc, lexer->token, 0); /* ? */
#endif #endif
return lexer->token; node = lexer->token;
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Returning textstring token '%s'...\n", node->element ? node->element : "<blank>");
#endif
return node; /* the textstring token */
} }
} }
else if (lexer->state == LEX_COMMENT) /* comment */ else if (lexer->state == LEX_COMMENT) /* comment */
@ -2850,9 +2975,17 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
lexer->lexbuf[lexer->lexsize] = '\0'; lexer->lexbuf[lexer->lexsize] = '\0';
lexer->state = LEX_CONTENT; lexer->state = LEX_CONTENT;
lexer->waswhite = no; lexer->waswhite = no;
return lexer->token = CommentToken(doc); lexer->token = CommentToken(doc);
node = lexer->token;
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Returning COMMENT token '%s'...\n", node->element ? node->element : "<blank>");
#endif
return node; /* the COMMENT token */
} }
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Returning NULL...\n");
#endif
return NULL; return NULL;
} }

View file

@ -6,9 +6,6 @@
(c) 1998-2008 (W3C) MIT, ERCIM, Keio University (c) 1998-2008 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice. See tidy.h for the copyright notice.
*/
/*
Given an input source, it returns a sequence of tokens. Given an input source, it returns a sequence of tokens.
GetToken(source) gets the next token GetToken(source) gets the next token
@ -189,7 +186,7 @@ typedef enum
/* special flag */ /* special flag */
#define VERS_XML 65536u #define VERS_XML 65536u
/* "HTML5" */ /* HTML5 */
#define HT50 131072u #define HT50 131072u
#define XH50 262144u #define XH50 262144u
@ -202,6 +199,8 @@ typedef enum
#define VERS_FRAMESET (H40F|H41F|X10F) #define VERS_FRAMESET (H40F|H41F|X10F)
#define VERS_XHTML11 (XH11) #define VERS_XHTML11 (XH11)
#define VERS_BASIC (XB10) #define VERS_BASIC (XB10)
/* HTML5 */
#define VERS_HTML5 (HT50|XH50)
/* meta symbols */ /* meta symbols */
#define VERS_HTML40 (VERS_HTML40_STRICT|VERS_HTML40_LOOSE|VERS_FRAMESET) #define VERS_HTML40 (VERS_HTML40_STRICT|VERS_HTML40_LOOSE|VERS_FRAMESET)
@ -411,6 +410,7 @@ void TY_(ConstrainVersion)( TidyDocImpl* doc, uint vers );
Bool TY_(IsWhite)(uint c); Bool TY_(IsWhite)(uint c);
Bool TY_(IsDigit)(uint c); Bool TY_(IsDigit)(uint c);
Bool TY_(IsLetter)(uint c); Bool TY_(IsLetter)(uint c);
Bool TY_(IsHTMLSpace)(uint c);
Bool TY_(IsNewline)(uint c); Bool TY_(IsNewline)(uint c);
Bool TY_(IsNamechar)(uint c); Bool TY_(IsNamechar)(uint c);
Bool TY_(IsXMLLetter)(uint c); Bool TY_(IsXMLLetter)(uint c);

View file

@ -101,6 +101,8 @@ static struct _msgfmt
{ NESTED_QUOTATION, "nested q elements, possible typo." }, /* Warning */ { NESTED_QUOTATION, "nested q elements, possible typo." }, /* Warning */
{ OBSOLETE_ELEMENT, "replacing obsolete element %s by %s" }, /* Warning */ { OBSOLETE_ELEMENT, "replacing obsolete element %s by %s" }, /* Warning */
{ COERCE_TO_ENDTAG_WARN, "<%s> is probably intended as </%s>" }, /* Warning */ { COERCE_TO_ENDTAG_WARN, "<%s> is probably intended as </%s>" }, /* Warning */
/* HTML5 */
{ REMOVED_HTML5, "%s element removed from HTML5" }, /* Warning */
/* ReportNotice */ /* ReportNotice */
{ TRIM_EMPTY_ELEMENT, "trimming empty %s" }, /* Notice */ { TRIM_EMPTY_ELEMENT, "trimming empty %s" }, /* Notice */
@ -320,7 +322,7 @@ static const TidyOptionId TidyIndentContentLinks[] =
static const TidyOptionId TidyIndentSpacesLinks[] = static const TidyOptionId TidyIndentSpacesLinks[] =
{ TidyIndentContent, TidyUnknownOption }; { TidyIndentContent, TidyUnknownOption };
static const TidyOptionId TidyWrapAttValsLinks[] = static const TidyOptionId TidyWrapAttValsLinks[] =
{ TidyWrapScriptlets, TidyUnknownOption }; { TidyWrapScriptlets, TidyLiteralAttribs, TidyUnknownOption };
static const TidyOptionId TidyWrapScriptletsLinks[] = static const TidyOptionId TidyWrapScriptletsLinks[] =
{ TidyWrapAttVals, TidyUnknownOption }; { TidyWrapAttVals, TidyUnknownOption };
static const TidyOptionId TidyCharEncodingLinks[] = static const TidyOptionId TidyCharEncodingLinks[] =
@ -353,6 +355,8 @@ static const TidyOptionId TidyDropFontTagsLinks[] =
{ TidyMakeClean, TidyUnknownOption }; { TidyMakeClean, TidyUnknownOption };
static const TidyOptionId TidyMakeCleanTagsLinks[] = static const TidyOptionId TidyMakeCleanTagsLinks[] =
{ TidyDropFontTags, TidyUnknownOption }; { TidyDropFontTags, TidyUnknownOption };
static const TidyOptionId TidyGDocCleanLinks[] =
{ TidyMakeClean, TidyUnknownOption };
/* Documentation of options */ /* Documentation of options */
static const TidyOptionDoc option_docs[] = static const TidyOptionDoc option_docs[] =
@ -399,14 +403,24 @@ static const TidyOptionDoc option_docs[] =
"on the HTML saved by Microsoft Office products. " "on the HTML saved by Microsoft Office products. "
, TidyMakeCleanTagsLinks , TidyMakeCleanTagsLinks
}, },
{TidyGDocClean,
"This option specifies if Tidy "
"should enable specific behavior for cleaning up HTML exported from "
"Google Docs. "
, TidyMakeCleanTagsLinks
},
{TidyDoctype, {TidyDoctype,
"This option specifies the DOCTYPE declaration generated by Tidy. If set " "This option specifies the DOCTYPE declaration generated by Tidy.<br />"
"to \"omit\" the output won't contain a DOCTYPE declaration. If set to " "If set to \"omit\" the output won't contain a DOCTYPE declaration.<br />"
"\"auto\" (the default) Tidy will use an educated guess based upon the " "If set to \"html5\" the DOCTYPE is set to \"&lt;!DOCTYPE html>\".<br />"
"contents of the document. If set to \"strict\", Tidy will set the DOCTYPE " "If set to \"auto\" (the default) Tidy will use an educated guess based "
"to the strict DTD. If set to \"loose\", the DOCTYPE is set to the loose " "upon the contents of the document.<br />"
"(transitional) DTD. Alternatively, you can supply a string for the formal " "If set to \"strict\", Tidy will set the DOCTYPE to the HTML4 or XHTML1 "
"public identifier (FPI).<br />" "strict DTD.<br />"
"If set to \"loose\", the DOCTYPE is set to the HTML4 or XHTML1 loose "
"(transitional) DTD. <br />"
"Alternatively, you can supply a string for the formal public identifier "
"(FPI).<br />"
"<br />" "<br />"
"For example: <br />" "For example: <br />"
"doctype: \"-//ACME//DTD HTML 3.14159//EN\"<br />" "doctype: \"-//ACME//DTD HTML 3.14159//EN\"<br />"
@ -419,6 +433,9 @@ static const TidyOptionDoc option_docs[] =
"<code>--numeric-entities yes</code>. This option does not offer a " "<code>--numeric-entities yes</code>. This option does not offer a "
"validation of the document conformance. " "validation of the document conformance. "
}, },
{TidyDropEmptyElems,
"This option specifies if Tidy should discard empty elements. "
},
{TidyDropEmptyParas, {TidyDropEmptyParas,
"This option specifies if Tidy should discard empty paragraphs. " "This option specifies if Tidy should discard empty paragraphs. "
}, },
@ -460,10 +477,22 @@ static const TidyOptionDoc option_docs[] =
{TidyHideComments, {TidyHideComments,
"This option specifies if Tidy should print out comments. " "This option specifies if Tidy should print out comments. "
}, },
{TidyCoerceEndTags,
"This option specifies if Tidy should coerce a start tag into an end tag "
"in cases where it looks like an end tag was probably intended; "
"for example, given &lt;span&gt;foo &lt;b&gt;bar&lt;b&gt; baz&lt;/span&gt;, "
"Tidy will output &lt;span&gt;foo &lt;b&gt;bar&lt;/b&gt; baz&lt;/span&gt;. "
},
{TidyOmitOptionalTags,
"This option specifies if Tidy should omit optional start tags and end tags "
"when generating output. Setting this option causes all tags for the "
"html, head, and body elements to be omitted from output, as well as such "
"end tags as &lt;/p&gt;, &lt;/li&gt;, &lt;/dt&gt;, &lt;/dd&gt;, "
"&lt;/option&gt;, &lt;/tr&gt;, &lt;/td&gt;, and &lt;/th&gt;. "
"This option is ignored for XML output. "
},
{TidyHideEndTags, {TidyHideEndTags,
"This option specifies if Tidy should omit optional end-tags when " "This option is an alias for omit-optional-tags. "
"generating the pretty printed markup. This option is ignored if you are "
"outputting to XML. "
}, },
{TidyIndentCdata, {TidyIndentCdata,
"This option specifies if Tidy should indent &lt;![CDATA[]]&gt; sections. " "This option specifies if Tidy should indent &lt;![CDATA[]]&gt; sections. "
@ -494,6 +523,12 @@ static const TidyOptionDoc option_docs[] =
"that takes a list of predefined values to lower case. This is required " "that takes a list of predefined values to lower case. This is required "
"for XHTML documents. " "for XHTML documents. "
}, },
{TidyMergeEmphasis,
"This option specifies if Tidy should merge nested &lt;b&gt; and &lt;i&gt; "
"elements; for example, for the case "
"&lt;b class=\"rtop-2\"&gt;foo &lt;b class=\"r2-2\"&gt;bar&lt;/b&gt; baz&lt;/b&gt;, "
"Tidy will output &lt;b class=\"rtop-2\"&gt;foo bar baz&lt;/b&gt;. "
},
{TidyMergeDivs, {TidyMergeDivs,
"Can be used to modify behavior of -c (--clean yes) option. " "Can be used to modify behavior of -c (--clean yes) option. "
"This option specifies if Tidy should merge nested &lt;div&gt; such as " "This option specifies if Tidy should merge nested &lt;div&gt; such as "
@ -644,6 +679,9 @@ static const TidyOptionDoc option_docs[] =
"This option specifies the number Tidy uses to determine if further errors " "This option specifies the number Tidy uses to determine if further errors "
"should be shown. If set to 0, then no errors are shown. " "should be shown. If set to 0, then no errors are shown. "
}, },
{TidyShowInfo,
"This option specifies if Tidy should display info-level messages. "
},
{TidyShowWarnings, {TidyShowWarnings,
"This option specifies if Tidy should suppress warnings. This can be " "This option specifies if Tidy should suppress warnings. This can be "
"useful when a few errors are hidden in a flurry of warnings. " "useful when a few errors are hidden in a flurry of warnings. "
@ -670,8 +708,14 @@ static const TidyOptionDoc option_docs[] =
,TidyIndentSpacesLinks ,TidyIndentSpacesLinks
}, },
{TidyLiteralAttribs, {TidyLiteralAttribs,
"This option specifies if Tidy should ensure that whitespace characters " "This option specifies how Tidy deals with whitespace characters within "
"within attribute values are passed through unchanged. " "attribute values. If the value is \"no\" (the default), Tidy \"munges\" "
"or \"normalizes\" attribute values by replacing any newline or tab "
"character with a single space character, and further by replacing "
"any sequences of multiple whitespace characters with a single space. "
"To force tidy to preserve the original, literal values of all attributes, "
"and ensure that whitespace characters within attribute values are passed "
"through unchanged, set this option to \"yes\". "
}, },
{TidyShowMarkup, {TidyShowMarkup,
"This option specifies if Tidy should generate a pretty printed version " "This option specifies if Tidy should generate a pretty printed version "
@ -706,9 +750,18 @@ static const TidyOptionDoc option_docs[] =
"pseudo elements, which look like: &lt;% ... %&gt;. " "pseudo elements, which look like: &lt;% ... %&gt;. "
}, },
{TidyWrapAttVals, {TidyWrapAttVals,
"This option specifies if Tidy should line wrap attribute values, for " "This option specifies if Tidy should line-wrap attribute values, for "
"easier editing. This option can be set independently of " "easier editing. Line wrapping means that if the value of an attribute "
"wrap-script-literals. " "causes a line to exceed the width specified by the \"wrap\" option, "
"tidy will add one or more line breaks to the value, causing it to "
"wrapped into multiple lines. Note that this option can be set "
"independently of wrap-script-literals. Also note that by default, Tidy "
"\"munges\" or \"normalizes\" attribute values by replacing any newline "
"or tab character with a single space character, and further by replacing "
"any sequences of multiple whitespace characters with a single space. "
"To force Tidy to preserve the original, literal values of all attributes, "
"and ensure that whitespace characters within attribute values are passed "
"through unchanged, set the literal-attributes option to \"yes\". "
,TidyWrapAttValsLinks ,TidyWrapAttValsLinks
}, },
{TidyWrapJste, {TidyWrapJste,
@ -1047,6 +1100,7 @@ __attribute__((format(printf, 2, 3)))
void message( TidyDocImpl* doc, TidyReportLevel level, ctmbstr msg, ... ) void message( TidyDocImpl* doc, TidyReportLevel level, ctmbstr msg, ... )
{ {
va_list args; va_list args;
if (level == TidyInfo && !cfgBool(doc, TidyShowInfo)) return;
va_start( args, msg ); va_start( args, msg );
messagePos( doc, level, 0, 0, msg, args ); messagePos( doc, level, 0, 0, msg, args );
va_end( args ); va_end( args );
@ -1367,14 +1421,14 @@ void TY_(ReportAccessWarning)( TidyDocImpl* doc, Node* node, uint code )
{ {
ctmbstr fmt = GetFormatFromCode(code); ctmbstr fmt = GetFormatFromCode(code);
doc->badAccess |= BA_WAI; doc->badAccess |= BA_WAI;
messageNode( doc, TidyAccess, node, fmt ); messageNode( doc, TidyAccess, node, "%s", fmt );
} }
void TY_(ReportAccessError)( TidyDocImpl* doc, Node* node, uint code ) void TY_(ReportAccessError)( TidyDocImpl* doc, Node* node, uint code )
{ {
ctmbstr fmt = GetFormatFromCode(code); ctmbstr fmt = GetFormatFromCode(code);
doc->badAccess |= BA_WAI; doc->badAccess |= BA_WAI;
messageNode( doc, TidyAccess, node, fmt ); messageNode( doc, TidyAccess, node, "%s", fmt );
} }
#endif /* SUPPORT_ACCESSIBILITY_CHECKS */ #endif /* SUPPORT_ACCESSIBILITY_CHECKS */
@ -1393,7 +1447,7 @@ void TY_(ReportWarning)(TidyDocImpl* doc, Node *element, Node *node, uint code)
switch (code) switch (code)
{ {
case NESTED_QUOTATION: case NESTED_QUOTATION:
messageNode(doc, TidyWarning, rpt, fmt); messageNode(doc, TidyWarning, rpt, "%s", fmt);
break; break;
case OBSOLETE_ELEMENT: case OBSOLETE_ELEMENT:
@ -1401,6 +1455,7 @@ void TY_(ReportWarning)(TidyDocImpl* doc, Node *element, Node *node, uint code)
messageNode(doc, TidyWarning, rpt, fmt, elemdesc, nodedesc); messageNode(doc, TidyWarning, rpt, fmt, elemdesc, nodedesc);
break; break;
case REMOVED_HTML5:
case NESTED_EMPHASIS: case NESTED_EMPHASIS:
messageNode(doc, TidyWarning, rpt, fmt, nodedesc); messageNode(doc, TidyWarning, rpt, fmt, nodedesc);
break; break;
@ -1474,7 +1529,7 @@ void TY_(ReportError)(TidyDocImpl* doc, Node *element, Node *node, uint code)
case INCONSISTENT_NAMESPACE: case INCONSISTENT_NAMESPACE:
case DOCTYPE_AFTER_TAGS: case DOCTYPE_AFTER_TAGS:
case DTYPE_NOT_UPPER_CASE: case DTYPE_NOT_UPPER_CASE:
messageNode(doc, TidyWarning, rpt, fmt); messageNode(doc, TidyWarning, rpt, "%s", fmt);
break; break;
case COERCE_TO_ENDTAG: case COERCE_TO_ENDTAG:
@ -1493,7 +1548,7 @@ void TY_(ReportError)(TidyDocImpl* doc, Node *element, Node *node, uint code)
case ENCODING_IO_CONFLICT: case ENCODING_IO_CONFLICT:
case MISSING_DOCTYPE: case MISSING_DOCTYPE:
case SPACE_PRECEDING_XMLDECL: case SPACE_PRECEDING_XMLDECL:
messageNode(doc, TidyWarning, node, fmt); messageNode(doc, TidyWarning, node, "%s", fmt);
break; break;
case TRIM_EMPTY_ELEMENT: case TRIM_EMPTY_ELEMENT:
@ -1542,7 +1597,7 @@ void TY_(ReportFatal)( TidyDocImpl* doc, Node *element, Node *node, uint code)
{ {
case SUSPECTED_MISSING_QUOTE: case SUSPECTED_MISSING_QUOTE:
case DUPLICATE_FRAMESET: case DUPLICATE_FRAMESET:
messageNode(doc, TidyError, rpt, fmt); messageNode(doc, TidyError, rpt, "%s", fmt);
break; break;
case UNKNOWN_ELEMENT: case UNKNOWN_ELEMENT:
@ -1775,11 +1830,14 @@ void TY_(NeedsAuthorIntervention)( TidyDocImpl* doc )
void TY_(GeneralInfo)( TidyDocImpl* doc ) void TY_(GeneralInfo)( TidyDocImpl* doc )
{ {
tidy_out(doc, "To learn more about HTML Tidy see http://tidy.sourceforge.net\n"); if (!cfgBool(doc, TidyShowInfo)) return;
tidy_out(doc, "Please fill bug reports and queries using the \"tracker\" on the Tidy web site.\n"); tidy_out(doc, "About this fork of Tidy: http://w3c.github.com/tidy-html5/\n");
tidy_out(doc, "Additionally, questions can be sent to html-tidy@w3.org\n"); tidy_out(doc, "Bug reports and comments: https://github.com/w3c/tidy-html5/issues/\n");
tidy_out(doc, "HTML and CSS specifications are available from http://www.w3.org/\n"); tidy_out(doc, "Or send questions and comments to html-tidy@w3.org\n");
tidy_out(doc, "Lobby your company to join W3C, see http://www.w3.org/Consortium\n"); tidy_out(doc, "Latest HTML specification: http://dev.w3.org/html5/spec-author-view/\n");
tidy_out(doc, "HTML language reference: http://dev.w3.org/html5/markup/\n");
tidy_out(doc, "Validate your HTML5 documents: http://validator.w3.org/nu/\n");
tidy_out(doc, "Lobby your company to join the W3C: http://www.w3.org/Consortium\n");
} }
#if SUPPORT_ACCESSIBILITY_CHECKS #if SUPPORT_ACCESSIBILITY_CHECKS

View file

@ -154,7 +154,9 @@ void TY_(ReportFatal)(TidyDocImpl* doc, Node* element, Node* node, uint code);
#define MISSING_ATTRIBUTE 86 #define MISSING_ATTRIBUTE 86
#define WHITE_IN_URI 87 #define WHITE_IN_URI 87
#define PREVIOUS_LOCATION 88 /* last */ #define REMOVED_HTML5 88 /* this element removed from HTML5 */
#define PREVIOUS_LOCATION 89 /* last */
/* character encoding errors */ /* character encoding errors */

View file

@ -12,6 +12,13 @@
#include "clean.h" #include "clean.h"
#include "tags.h" #include "tags.h"
#include "tmbstr.h" #include "tmbstr.h"
#ifdef _MSC_VER
#include "sprtf.h"
#endif
#ifndef SPRTF
#define SPRTF printf
#endif
#ifdef AUTO_INPUT_ENCODING #ifdef AUTO_INPUT_ENCODING
#include "charsets.h" #include "charsets.h"
@ -234,6 +241,9 @@ void TY_(InsertNodeAfterElement)(Node *element, Node *node)
static Bool CanPrune( TidyDocImpl* doc, Node *element ) static Bool CanPrune( TidyDocImpl* doc, Node *element )
{ {
if ( !cfgBool(doc, TidyDropEmptyElems) )
return no;
if ( TY_(nodeIsText)(element) ) if ( TY_(nodeIsText)(element) )
return yes; return yes;
@ -278,6 +288,13 @@ static Bool CanPrune( TidyDocImpl* doc, Node *element )
if (nodeIsTEXTAREA(element)) if (nodeIsTEXTAREA(element))
return no; return no;
/* fix for ISSUE #7 https://github.com/w3c/tidy-html5/issues/7 */
if (nodeIsCANVAS(element))
return no;
if (nodeIsPROGRESS(element))
return no;
if ( attrGetID(element) || attrGetNAME(element) ) if ( attrGetID(element) || attrGetNAME(element) )
return no; return no;
@ -296,6 +313,10 @@ static Bool CanPrune( TidyDocImpl* doc, Node *element )
if (nodeIsCOLGROUP(element)) if (nodeIsCOLGROUP(element))
return no; return no;
/* HTML5 - do NOT drop empty option if it has attributes */
if ( nodeIsOPTION(element) && element->attributes != NULL )
return no;
return yes; return yes;
} }
@ -811,13 +832,25 @@ static void AddClassNoIndent( TidyDocImpl* doc, Node *node )
*/ */
void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode) void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
{ {
#if !defined(NDEBUG) && defined(_MSC_VER)
static int in_parse_block = 0;
#endif
Lexer* lexer = doc->lexer; Lexer* lexer = doc->lexer;
Node *node; Node *node;
Bool checkstack = yes; Bool checkstack = yes;
uint istackbase = 0; uint istackbase = 0;
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_block++;
SPRTF("Entering ParseBlock %d...\n",in_parse_block);
#endif
if ( element->tag->model & CM_EMPTY ) if ( element->tag->model & CM_EMPTY ) {
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_block--;
SPRTF("Exit ParseBlockL 1 %d...\n",in_parse_block);
#endif
return; return;
}
if ( nodeIsFORM(element) && if ( nodeIsFORM(element) &&
DescendantOf(element, TidyTag_FORM) ) DescendantOf(element, TidyTag_FORM) )
@ -860,6 +893,10 @@ void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
element->closed = yes; element->closed = yes;
TrimSpaces( doc, element ); TrimSpaces( doc, element );
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_block--;
SPRTF("Exit ParseBlock 2 %d...\n",in_parse_block);
#endif
return; return;
} }
@ -951,6 +988,10 @@ void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
{ {
TY_(UngetToken)( doc ); TY_(UngetToken)( doc );
TrimSpaces( doc, element ); TrimSpaces( doc, element );
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_block--;
SPRTF("Exit ParseBlock 2 %d...\n",in_parse_block);
#endif
return; return;
} }
} }
@ -1111,6 +1152,10 @@ void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
{ {
TY_(UngetToken)( doc ); TY_(UngetToken)( doc );
TrimSpaces( doc, element ); TrimSpaces( doc, element );
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_block--;
SPRTF("Exit ParseBlock 3 %d...\n",in_parse_block);
#endif
return; return;
} }
} }
@ -1127,6 +1172,10 @@ void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
lexer->istackbase = istackbase; lexer->istackbase = istackbase;
TrimSpaces( doc, element ); TrimSpaces( doc, element );
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_block--;
SPRTF("Exit ParseBlock 4 %d...\n",in_parse_block);
#endif
return; return;
} }
} }
@ -1177,6 +1226,10 @@ void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
element->parent->tag->parser == TY_(ParseList) ) element->parent->tag->parser == TY_(ParseList) )
{ {
TrimSpaces( doc, element ); TrimSpaces( doc, element );
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_block--;
SPRTF("Exit ParseBlock 5 %d...\n",in_parse_block);
#endif
return; return;
} }
@ -1188,6 +1241,10 @@ void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
if ( nodeIsDL(element->parent) ) if ( nodeIsDL(element->parent) )
{ {
TrimSpaces( doc, element ); TrimSpaces( doc, element );
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_block--;
SPRTF("Exit ParseBlock 6 %d...\n",in_parse_block);
#endif
return; return;
} }
@ -1198,8 +1255,13 @@ void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
/* http://tidy.sf.net/issue/1316307 */ /* http://tidy.sf.net/issue/1316307 */
/* In exiled mode, return so table processing can /* In exiled mode, return so table processing can
continue. */ continue. */
if (lexer->exiled) if (lexer->exiled) {
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_block--;
SPRTF("Exit ParseBlock 7 %d...\n",in_parse_block);
#endif
return; return;
}
node = TY_(InferredTag)(doc, TidyTag_TABLE); node = TY_(InferredTag)(doc, TidyTag_TABLE);
} }
else if ( TY_(nodeHasCM)(element, CM_OBJECT) ) else if ( TY_(nodeHasCM)(element, CM_OBJECT) )
@ -1209,12 +1271,20 @@ void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
TY_(PopInline)( doc, NULL ); TY_(PopInline)( doc, NULL );
lexer->istackbase = istackbase; lexer->istackbase = istackbase;
TrimSpaces( doc, element ); TrimSpaces( doc, element );
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_block--;
SPRTF("Exit ParseBlock 8 %d...\n",in_parse_block);
#endif
return; return;
} }
else else
{ {
TrimSpaces( doc, element ); TrimSpaces( doc, element );
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_block--;
SPRTF("Exit ParseBlock 9 %d...\n",in_parse_block);
#endif
return; return;
} }
} }
@ -1278,15 +1348,31 @@ void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
} }
TrimSpaces( doc, element ); TrimSpaces( doc, element );
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_block--;
SPRTF("Exit ParseBlock 10 %d...\n",in_parse_block);
#endif
} }
void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode ) void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
{ {
#if !defined(NDEBUG) && defined(_MSC_VER)
static int in_parse_inline = 0;
#endif
Lexer* lexer = doc->lexer; Lexer* lexer = doc->lexer;
Node *node, *parent; Node *node, *parent;
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_inline++;
SPRTF("Entering ParseInline %d...\n",in_parse_inline);
#endif
if (element->tag->model & CM_EMPTY) if (element->tag->model & CM_EMPTY) {
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_inline--;
SPRTF("Exit ParseInline 1 %d...\n",in_parse_inline);
#endif
return; return;
}
/* /*
ParseInline is used for some block level elements like H1 to H6 ParseInline is used for some block level elements like H1 to H6
@ -1363,6 +1449,10 @@ void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
element->closed = yes; element->closed = yes;
TrimSpaces( doc, element ); TrimSpaces( doc, element );
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_inline--;
SPRTF("Exit ParseInline 2 %d...\n",in_parse_inline);
#endif
return; return;
} }
@ -1384,6 +1474,7 @@ void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
&& !nodeIsSUP(node) && !nodeIsSUP(node)
&& !nodeIsQ(node) && !nodeIsQ(node)
&& !nodeIsSPAN(node) && !nodeIsSPAN(node)
&& cfgBool(doc, TidyCoerceEndTags)
) )
{ {
/* proceeds only if "node" does not have any attribute and /* proceeds only if "node" does not have any attribute and
@ -1442,7 +1533,10 @@ void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
if (!(mode & Preformatted)) if (!(mode & Preformatted))
TrimSpaces(doc, element); TrimSpaces(doc, element);
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_inline--;
SPRTF("Exit ParseInline 3 %d...\n",in_parse_inline);
#endif
return; return;
} }
@ -1529,6 +1623,10 @@ void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
TY_(InlineDup1)( doc, NULL, element ); /* dupe the <i>, after </b> */ TY_(InlineDup1)( doc, NULL, element ); /* dupe the <i>, after </b> */
if (!(mode & Preformatted)) if (!(mode & Preformatted))
TrimSpaces( doc, element ); TrimSpaces( doc, element );
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_inline--;
SPRTF("Exit ParseInline 4 %d...\n",in_parse_inline);
#endif
return; /* close <i>, but will re-open it, after </b> */ return; /* close <i>, but will re-open it, after </b> */
} }
} }
@ -1549,7 +1647,10 @@ void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
if (!(mode & Preformatted)) if (!(mode & Preformatted))
TrimSpaces(doc, element); TrimSpaces(doc, element);
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_inline--;
SPRTF("Exit ParseInline 5 %d...\n",in_parse_inline);
#endif
return; return;
} }
@ -1563,6 +1664,10 @@ void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
{ {
TY_(UngetToken)( doc ); TY_(UngetToken)( doc );
TrimSpaces(doc, element); TrimSpaces(doc, element);
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_inline--;
SPRTF("Exit ParseInline 6 %d...\n",in_parse_inline);
#endif
return; return;
} }
} }
@ -1585,6 +1690,10 @@ void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
if (!(mode & Preformatted)) if (!(mode & Preformatted))
TrimSpaces(doc, element); TrimSpaces(doc, element);
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_inline--;
SPRTF("Exit ParseInline 7 %d...\n",in_parse_inline);
#endif
return; return;
} }
@ -1601,7 +1710,8 @@ void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
/* #427827 - fix by Randy Waki and Bjoern Hoehrmann 23 Aug 00 */ /* #427827 - fix by Randy Waki and Bjoern Hoehrmann 23 Aug 00 */
/* other fixes by Dave Raggett */ /* other fixes by Dave Raggett */
/* if (node->attributes == NULL) */ /* if (node->attributes == NULL) */
if (node->type != EndTag && node->attributes == NULL) if (node->type != EndTag && node->attributes == NULL
&& cfgBool(doc, TidyCoerceEndTags) )
{ {
node->type = EndTag; node->type = EndTag;
TY_(ReportError)(doc, element, node, COERCE_TO_ENDTAG); TY_(ReportError)(doc, element, node, COERCE_TO_ENDTAG);
@ -1617,6 +1727,10 @@ void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
if (!(mode & Preformatted)) if (!(mode & Preformatted))
TrimSpaces(doc, element); TrimSpaces(doc, element);
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_inline--;
SPRTF("Exit ParseInline 8 %d...\n",in_parse_inline);
#endif
return; return;
} }
@ -1739,6 +1853,10 @@ void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
if (!(mode & Preformatted)) if (!(mode & Preformatted))
TrimSpaces(doc, element); TrimSpaces(doc, element);
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_inline--;
SPRTF("Exit ParseInline 9 %d...\n",in_parse_inline);
#endif
return; return;
} }
} }
@ -1754,7 +1872,10 @@ void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
TY_(FreeNode)( doc, node); TY_(FreeNode)( doc, node);
continue; continue;
} }
/* HTML5 */
if (nodeIsDATALIST(element)) {
TY_(ConstrainVersion)( doc, ~VERS_HTML5 );
} else
if (!(element->tag->model & CM_OPT)) if (!(element->tag->model & CM_OPT))
TY_(ReportError)(doc, element, node, MISSING_ENDTAG_BEFORE); TY_(ReportError)(doc, element, node, MISSING_ENDTAG_BEFORE);
@ -1776,6 +1897,10 @@ void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
{ {
TY_(DiscardElement)( doc, element ); TY_(DiscardElement)( doc, element );
TY_(UngetToken)( doc ); TY_(UngetToken)( doc );
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_inline--;
SPRTF("Exit ParseInline 10 %d...\n",in_parse_inline);
#endif
return; return;
} }
} }
@ -1785,6 +1910,10 @@ void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
if (!(mode & Preformatted)) if (!(mode & Preformatted))
TrimSpaces(doc, element); TrimSpaces(doc, element);
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_inline--;
SPRTF("Exit ParseInline 11 %d...\n",in_parse_inline);
#endif
return; return;
} }
@ -1812,6 +1941,10 @@ void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
if (!(element->tag->model & CM_OPT)) if (!(element->tag->model & CM_OPT))
TY_(ReportError)(doc, element, node, MISSING_ENDTAG_FOR); TY_(ReportError)(doc, element, node, MISSING_ENDTAG_FOR);
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_inline--;
SPRTF("Exit ParseInline 12 %d...\n",in_parse_inline);
#endif
} }
void TY_(ParseEmpty)(TidyDocImpl* doc, Node *element, GetTokenMode mode) void TY_(ParseEmpty)(TidyDocImpl* doc, Node *element, GetTokenMode mode)
@ -1824,7 +1957,7 @@ void TY_(ParseEmpty)(TidyDocImpl* doc, Node *element, GetTokenMode mode)
{ {
if ( !(node->type == EndTag && node->tag == element->tag) ) if ( !(node->type == EndTag && node->tag == element->tag) )
{ {
TY_(ReportError)(doc, element, node, ELEMENT_NOT_EMPTY); /* TY_(ReportError)(doc, element, node, ELEMENT_NOT_EMPTY); */
TY_(UngetToken)( doc ); TY_(UngetToken)( doc );
} }
else else
@ -2895,10 +3028,17 @@ void TY_(ParseOptGroup)(TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(m
void TY_(ParseSelect)(TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(mode)) void TY_(ParseSelect)(TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(mode))
{ {
#if !defined(NDEBUG) && defined(_MSC_VER)
static int in_parse_select = 0;
#endif
Lexer* lexer = doc->lexer; Lexer* lexer = doc->lexer;
Node *node; Node *node;
lexer->insert = NULL; /* defer implicit inline start tags */ lexer->insert = NULL; /* defer implicit inline start tags */
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_select++;
SPRTF("Entering ParseSelect %d...\n",in_parse_select);
#endif
while ((node = TY_(GetToken)(doc, IgnoreWhitespace)) != NULL) while ((node = TY_(GetToken)(doc, IgnoreWhitespace)) != NULL)
{ {
@ -2907,6 +3047,10 @@ void TY_(ParseSelect)(TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(mod
TY_(FreeNode)( doc, node); TY_(FreeNode)( doc, node);
field->closed = yes; field->closed = yes;
TrimSpaces(doc, field); TrimSpaces(doc, field);
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_select--;
SPRTF("Exit ParseSelect 1 %d...\n",in_parse_select);
#endif
return; return;
} }
@ -2917,6 +3061,7 @@ void TY_(ParseSelect)(TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(mod
if ( node->type == StartTag && if ( node->type == StartTag &&
( nodeIsOPTION(node) || ( nodeIsOPTION(node) ||
nodeIsOPTGROUP(node) || nodeIsOPTGROUP(node) ||
nodeIsDATALIST(node) ||
nodeIsSCRIPT(node)) nodeIsSCRIPT(node))
) )
{ {
@ -2931,8 +3076,72 @@ void TY_(ParseSelect)(TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(mod
} }
TY_(ReportError)(doc, field, node, MISSING_ENDTAG_FOR); TY_(ReportError)(doc, field, node, MISSING_ENDTAG_FOR);
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_select--;
SPRTF("Exit ParseSelect 2 %d...\n",in_parse_select);
#endif
} }
/* HTML5 */
void TY_(ParseDatalist)(TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(mode))
{
#if !defined(NDEBUG) && defined(_MSC_VER)
static int in_parse_datalist = 0;
#endif
Lexer* lexer = doc->lexer;
Node *node;
lexer->insert = NULL; /* defer implicit inline start tags */
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_datalist++;
SPRTF("Entering ParseDatalist %d...\n",in_parse_datalist);
#endif
while ((node = TY_(GetToken)(doc, IgnoreWhitespace)) != NULL)
{
if (node->tag == field->tag && node->type == EndTag)
{
TY_(FreeNode)( doc, node);
field->closed = yes;
TrimSpaces(doc, field);
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_datalist--;
SPRTF("Exit ParseDatalist 1 %d...\n",in_parse_datalist);
#endif
return;
}
/* deal with comments etc. */
if (InsertMisc(field, node))
continue;
if ( node->type == StartTag &&
( nodeIsOPTION(node) ||
nodeIsOPTGROUP(node) ||
nodeIsDATALIST(node) ||
nodeIsSCRIPT(node))
)
{
TY_(InsertNodeAtEnd)(field, node);
ParseTag(doc, node, IgnoreWhitespace);
continue;
}
/* discard unexpected tags */
TY_(ReportError)(doc, field, node, DISCARDING_UNEXPECTED);
TY_(FreeNode)( doc, node);
}
TY_(ReportError)(doc, field, node, MISSING_ENDTAG_FOR);
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_datalist--;
SPRTF("Exit ParseDatalist 2 %d...\n",in_parse_datalist);
#endif
}
void TY_(ParseText)(TidyDocImpl* doc, Node *field, GetTokenMode mode) void TY_(ParseText)(TidyDocImpl* doc, Node *field, GetTokenMode mode)
{ {
Lexer* lexer = doc->lexer; Lexer* lexer = doc->lexer;
@ -3006,7 +3215,8 @@ void TY_(ParseTitle)(TidyDocImpl* doc, Node *title, GetTokenMode ARG_UNUSED(mode
Node *node; Node *node;
while ((node = TY_(GetToken)(doc, MixedContent)) != NULL) while ((node = TY_(GetToken)(doc, MixedContent)) != NULL)
{ {
if (node->tag == title->tag && node->type == StartTag) if (node->tag == title->tag && node->type == StartTag
&& cfgBool(doc, TidyCoerceEndTags) )
{ {
TY_(ReportError)(doc, title, node, COERCE_TO_ENDTAG); TY_(ReportError)(doc, title, node, COERCE_TO_ENDTAG);
node->type = EndTag; node->type = EndTag;
@ -3129,6 +3339,9 @@ void TY_(ParseHead)(TidyDocImpl* doc, Node *head, GetTokenMode ARG_UNUSED(mode))
int HasTitle = 0; int HasTitle = 0;
int HasBase = 0; int HasBase = 0;
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Enter ParseHead...\n");
#endif
while ((node = TY_(GetToken)(doc, IgnoreWhitespace)) != NULL) while ((node = TY_(GetToken)(doc, IgnoreWhitespace)) != NULL)
{ {
if (node->tag == head->tag && node->type == EndTag) if (node->tag == head->tag && node->type == EndTag)
@ -3214,10 +3427,6 @@ void TY_(ParseHead)(TidyDocImpl* doc, Node *head, GetTokenMode ARG_UNUSED(mode))
head ? head ?
TOO_MANY_ELEMENTS_IN : TOO_MANY_ELEMENTS); TOO_MANY_ELEMENTS_IN : TOO_MANY_ELEMENTS);
} }
else if ( nodeIsNOSCRIPT(node) )
{
TY_(ReportError)(doc, head, node, TAG_NOT_ALLOWED_IN);
}
#ifdef AUTO_INPUT_ENCODING #ifdef AUTO_INPUT_ENCODING
else if (nodeIsMETA(node)) else if (nodeIsMETA(node))
@ -3271,6 +3480,9 @@ void TY_(ParseHead)(TidyDocImpl* doc, Node *head, GetTokenMode ARG_UNUSED(mode))
TY_(ReportError)(doc, head, node, DISCARDING_UNEXPECTED); TY_(ReportError)(doc, head, node, DISCARDING_UNEXPECTED);
TY_(FreeNode)( doc, node); TY_(FreeNode)( doc, node);
} }
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Exit ParseHead 1...\n");
#endif
} }
void TY_(ParseBody)(TidyDocImpl* doc, Node *body, GetTokenMode mode) void TY_(ParseBody)(TidyDocImpl* doc, Node *body, GetTokenMode mode)
@ -3283,6 +3495,9 @@ void TY_(ParseBody)(TidyDocImpl* doc, Node *body, GetTokenMode mode)
checkstack = yes; checkstack = yes;
TY_(BumpObject)( doc, body->parent ); TY_(BumpObject)( doc, body->parent );
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Enter ParseBody...\n");
#endif
while ((node = TY_(GetToken)(doc, mode)) != NULL) while ((node = TY_(GetToken)(doc, mode)) != NULL)
{ {
@ -3510,7 +3725,7 @@ void TY_(ParseBody)(TidyDocImpl* doc, Node *body, GetTokenMode mode)
if (TY_(nodeIsElement)(node)) if (TY_(nodeIsElement)(node))
{ {
if ( TY_(nodeHasCM)(node, CM_INLINE) && !TY_(nodeHasCM)(node, CM_MIXED) ) if ( TY_(nodeHasCM)(node, CM_INLINE) )
{ {
/* HTML4 strict doesn't allow inline content here */ /* HTML4 strict doesn't allow inline content here */
/* but HTML2 does allow img elements as children of body */ /* but HTML2 does allow img elements as children of body */
@ -3547,6 +3762,9 @@ void TY_(ParseBody)(TidyDocImpl* doc, Node *body, GetTokenMode mode)
TY_(ReportError)(doc, body, node, DISCARDING_UNEXPECTED); TY_(ReportError)(doc, body, node, DISCARDING_UNEXPECTED);
TY_(FreeNode)( doc, node); TY_(FreeNode)( doc, node);
} }
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Exit ParseBody 1...\n");
#endif
} }
void TY_(ParseNoFrames)(TidyDocImpl* doc, Node *noframes, GetTokenMode mode) void TY_(ParseNoFrames)(TidyDocImpl* doc, Node *noframes, GetTokenMode mode)
@ -3735,6 +3953,9 @@ void TY_(ParseHTML)(TidyDocImpl* doc, Node *html, GetTokenMode mode)
Node *frameset = NULL; Node *frameset = NULL;
Node *noframes = NULL; Node *noframes = NULL;
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Entering ParseHTML...\n");
#endif
TY_(SetOptionBool)( doc, TidyXmlTags, no ); TY_(SetOptionBool)( doc, TidyXmlTags, no );
for (;;) for (;;)
@ -3790,7 +4011,9 @@ void TY_(ParseHTML)(TidyDocImpl* doc, Node *html, GetTokenMode mode)
TY_(InsertNodeAtEnd)(html, node); TY_(InsertNodeAtEnd)(html, node);
TY_(ParseBody)(doc, node, mode); TY_(ParseBody)(doc, node, mode);
} }
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Exit ParseHTML 1...\n");
#endif
return; return;
} }
@ -3956,6 +4179,9 @@ void TY_(ParseHTML)(TidyDocImpl* doc, Node *html, GetTokenMode mode)
TY_(InsertNodeAtEnd)(html, node); TY_(InsertNodeAtEnd)(html, node);
ParseTag(doc, node, mode); ParseTag(doc, node, mode);
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Exit ParseHTML 2...\n");
#endif
} }
static Bool nodeCMIsOnlyInline( Node* node ) static Bool nodeCMIsOnlyInline( Node* node )
@ -4048,7 +4274,9 @@ static void ReplaceObsoleteElements(TidyDocImpl* doc, Node* node)
{ {
next = node->next; next = node->next;
if (nodeIsDIR(node) || nodeIsMENU(node)) /* if (nodeIsDIR(node) || nodeIsMENU(node)) */
/* HTML5 - <menu ... > is no longer obsolete */
if (nodeIsDIR(node))
TY_(CoerceNode)(doc, node, TidyTag_UL, yes, yes); TY_(CoerceNode)(doc, node, TidyTag_UL, yes, yes);
if (nodeIsXMP(node) || nodeIsLISTING(node) || if (nodeIsXMP(node) || nodeIsLISTING(node) ||

View file

@ -1152,7 +1152,7 @@ static void PPrintAttribute( TidyDocImpl* doc, uint indent,
{ {
if ( TY_(IsScript)(doc, name) ) if ( TY_(IsScript)(doc, name) )
wrappable = cfgBool( doc, TidyWrapScriptlets ); wrappable = cfgBool( doc, TidyWrapScriptlets );
else if (!(attrIsCONTENT(attr) || attrIsVALUE(attr) || attrIsALT(attr)) && wrapAttrs ) else if (!(attrIsCONTENT(attr) || attrIsVALUE(attr) || attrIsALT(attr) || attrIsTITLE(attr)) && wrapAttrs )
wrappable = yes; wrappable = yes;
} }
@ -2083,7 +2083,8 @@ void TY_(PPrintTree)( TidyDocImpl* doc, uint mode, uint indent, Node *node )
{ {
Bool indcont = ( cfgAutoBool(doc, TidyIndentContent) != TidyNoState ); Bool indcont = ( cfgAutoBool(doc, TidyIndentContent) != TidyNoState );
Bool indsmart = ( cfgAutoBool(doc, TidyIndentContent) == TidyAutoState ); Bool indsmart = ( cfgAutoBool(doc, TidyIndentContent) == TidyAutoState );
Bool hideend = cfgBool( doc, TidyHideEndTags ); Bool hideend = cfgBool( doc, TidyHideEndTags ) ||
cfgBool( doc, TidyOmitOptionalTags );
Bool classic = cfgBool( doc, TidyVertSpace ); Bool classic = cfgBool( doc, TidyVertSpace );
uint contentIndent = indent; uint contentIndent = indent;

View file

@ -269,6 +269,11 @@ Bool TIDY_CALL tidyNodeIsMENU( TidyNode tnod )
{ return nodeIsMENU( tidyNodeToImpl(tnod) ); { return nodeIsMENU( tidyNodeToImpl(tnod) );
} }
/* HTML5 */
Bool TIDY_CALL tidyNodeIsDATALIST( TidyNode tnod )
{ return nodeIsDATALIST( tidyNodeToImpl(tnod) );
}
/* /*
* local variables: * local variables:

View file

@ -17,11 +17,7 @@ static CheckAttribs CheckLINK;
static CheckAttribs CheckAREA; static CheckAttribs CheckAREA;
static CheckAttribs CheckTABLE; static CheckAttribs CheckTABLE;
static CheckAttribs CheckCaption; static CheckAttribs CheckCaption;
static CheckAttribs CheckSCRIPT;
static CheckAttribs CheckSTYLE;
static CheckAttribs CheckHTML; static CheckAttribs CheckHTML;
static CheckAttribs CheckFORM;
static CheckAttribs CheckMETA;
#define VERS_ELEM_A (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10|HT50|XH50) #define VERS_ELEM_A (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10|HT50|XH50)
#define VERS_ELEM_ABBR (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10|HT50|XH50) #define VERS_ELEM_ABBR (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10|HT50|XH50)
@ -128,16 +124,23 @@ static CheckAttribs CheckMETA;
#define VERS_ELEM_ARTICLE (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50) #define VERS_ELEM_ARTICLE (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
#define VERS_ELEM_ASIDE (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50) #define VERS_ELEM_ASIDE (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
#define VERS_ELEM_AUDIO (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50) #define VERS_ELEM_AUDIO (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
#define VERS_ELEM_BDI (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
#define VERS_ELEM_CANVAS (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50) #define VERS_ELEM_CANVAS (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
#define VERS_ELEM_COMMAND (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50) #define VERS_ELEM_COMMAND (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
#define VERS_ELEM_DATALIST (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50) #define VERS_ELEM_DATALIST (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
#define VERS_ELEM_DETAILS (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50) #define VERS_ELEM_DETAILS (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
#define VERS_ELEM_DIALOG (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
#define VERS_ELEM_EMBED (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
#define VERS_ELEM_FIGCAPTION (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50) #define VERS_ELEM_FIGCAPTION (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
#define VERS_ELEM_FIGURE (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50) #define VERS_ELEM_FIGURE (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
#define VERS_ELEM_FOOTER (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50) #define VERS_ELEM_FOOTER (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
#define VERS_ELEM_HEADER (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50) #define VERS_ELEM_HEADER (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
#define VERS_ELEM_HGROUP (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50) #define VERS_ELEM_HGROUP (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
#define VERS_ELEM_KEYGEN (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
#define VERS_ELEM_MAIN (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
#define VERS_ELEM_MARK (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50) #define VERS_ELEM_MARK (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
#define VERS_ELEM_MENUITEM (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
#define VERS_ELEM_KEYGEN (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
#define VERS_ELEM_METER (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50) #define VERS_ELEM_METER (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
#define VERS_ELEM_NAV (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50) #define VERS_ELEM_NAV (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
#define VERS_ELEM_OUTPUT (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50) #define VERS_ELEM_OUTPUT (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
@ -148,16 +151,17 @@ static CheckAttribs CheckMETA;
#define VERS_ELEM_TIME (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50) #define VERS_ELEM_TIME (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
#define VERS_ELEM_TRACK (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50) #define VERS_ELEM_TRACK (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
#define VERS_ELEM_VIDEO (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50) #define VERS_ELEM_VIDEO (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
#define VERS_ELEM_WBR (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
static const Dict tag_defs[] = static const Dict tag_defs[] =
{ {
{ TidyTag_UNKNOWN, "unknown!", VERS_UNKNOWN, NULL, (0), NULL, NULL }, { TidyTag_UNKNOWN, "unknown!", VERS_UNKNOWN, NULL, (0), NULL, NULL },
/* W3C defined elements */ /* W3C defined elements */
{ TidyTag_A, "a", VERS_ELEM_A, &TY_(W3CAttrsFor_A)[0], (CM_INLINE), TY_(ParseInline), NULL }, { TidyTag_A, "a", VERS_ELEM_A, &TY_(W3CAttrsFor_A)[0], (CM_INLINE|CM_BLOCK|CM_MIXED), TY_(ParseBlock), NULL },
{ TidyTag_ABBR, "abbr", VERS_ELEM_ABBR, &TY_(W3CAttrsFor_ABBR)[0], (CM_INLINE), TY_(ParseInline), NULL }, { TidyTag_ABBR, "abbr", VERS_ELEM_ABBR, &TY_(W3CAttrsFor_ABBR)[0], (CM_INLINE), TY_(ParseInline), NULL },
{ TidyTag_ACRONYM, "acronym", VERS_ELEM_ACRONYM, &TY_(W3CAttrsFor_ACRONYM)[0], (CM_INLINE), TY_(ParseInline), NULL }, { TidyTag_ACRONYM, "acronym", VERS_ELEM_ACRONYM, &TY_(W3CAttrsFor_ACRONYM)[0], (CM_INLINE), TY_(ParseInline), NULL },
{ TidyTag_ADDRESS, "address", VERS_ELEM_ADDRESS, &TY_(W3CAttrsFor_ADDRESS)[0], (CM_BLOCK), TY_(ParseInline), NULL }, { TidyTag_ADDRESS, "address", VERS_ELEM_ADDRESS, &TY_(W3CAttrsFor_ADDRESS)[0], (CM_BLOCK), TY_(ParseBlock), NULL },
{ TidyTag_APPLET, "applet", VERS_ELEM_APPLET, &TY_(W3CAttrsFor_APPLET)[0], (CM_OBJECT|CM_IMG|CM_INLINE|CM_PARAM), TY_(ParseBlock), NULL }, { TidyTag_APPLET, "applet", VERS_ELEM_APPLET, &TY_(W3CAttrsFor_APPLET)[0], (CM_OBJECT|CM_IMG|CM_INLINE|CM_PARAM), TY_(ParseBlock), NULL },
{ TidyTag_AREA, "area", VERS_ELEM_AREA, &TY_(W3CAttrsFor_AREA)[0], (CM_BLOCK|CM_EMPTY), TY_(ParseEmpty), CheckAREA }, { TidyTag_AREA, "area", VERS_ELEM_AREA, &TY_(W3CAttrsFor_AREA)[0], (CM_BLOCK|CM_EMPTY), TY_(ParseEmpty), CheckAREA },
{ TidyTag_B, "b", VERS_ELEM_B, &TY_(W3CAttrsFor_B)[0], (CM_INLINE), TY_(ParseInline), NULL }, { TidyTag_B, "b", VERS_ELEM_B, &TY_(W3CAttrsFor_B)[0], (CM_INLINE), TY_(ParseInline), NULL },
@ -185,7 +189,8 @@ static const Dict tag_defs[] =
{ TidyTag_EM, "em", VERS_ELEM_EM, &TY_(W3CAttrsFor_EM)[0], (CM_INLINE), TY_(ParseInline), NULL }, { TidyTag_EM, "em", VERS_ELEM_EM, &TY_(W3CAttrsFor_EM)[0], (CM_INLINE), TY_(ParseInline), NULL },
{ TidyTag_FIELDSET, "fieldset", VERS_ELEM_FIELDSET, &TY_(W3CAttrsFor_FIELDSET)[0], (CM_BLOCK), TY_(ParseBlock), NULL }, { TidyTag_FIELDSET, "fieldset", VERS_ELEM_FIELDSET, &TY_(W3CAttrsFor_FIELDSET)[0], (CM_BLOCK), TY_(ParseBlock), NULL },
{ TidyTag_FONT, "font", VERS_ELEM_FONT, &TY_(W3CAttrsFor_FONT)[0], (CM_INLINE), TY_(ParseInline), NULL }, { TidyTag_FONT, "font", VERS_ELEM_FONT, &TY_(W3CAttrsFor_FONT)[0], (CM_INLINE), TY_(ParseInline), NULL },
{ TidyTag_FORM, "form", VERS_ELEM_FORM, &TY_(W3CAttrsFor_FORM)[0], (CM_BLOCK), TY_(ParseBlock), CheckFORM }, /* HTML5 Form Elements has several new elements and attributes - datalist keygen output */
{ TidyTag_FORM, "form", VERS_ELEM_FORM, &TY_(W3CAttrsFor_FORM)[0], (CM_BLOCK), TY_(ParseBlock), NULL },
{ TidyTag_FRAME, "frame", VERS_ELEM_FRAME, &TY_(W3CAttrsFor_FRAME)[0], (CM_FRAMES|CM_EMPTY), TY_(ParseEmpty), NULL }, { TidyTag_FRAME, "frame", VERS_ELEM_FRAME, &TY_(W3CAttrsFor_FRAME)[0], (CM_FRAMES|CM_EMPTY), TY_(ParseEmpty), NULL },
{ TidyTag_FRAMESET, "frameset", VERS_ELEM_FRAMESET, &TY_(W3CAttrsFor_FRAMESET)[0], (CM_HTML|CM_FRAMES), TY_(ParseFrameSet), NULL }, { TidyTag_FRAMESET, "frameset", VERS_ELEM_FRAMESET, &TY_(W3CAttrsFor_FRAMESET)[0], (CM_HTML|CM_FRAMES), TY_(ParseFrameSet), NULL },
{ TidyTag_H1, "h1", VERS_ELEM_H1, &TY_(W3CAttrsFor_H1)[0], (CM_BLOCK|CM_HEADING), TY_(ParseInline), NULL }, { TidyTag_H1, "h1", VERS_ELEM_H1, &TY_(W3CAttrsFor_H1)[0], (CM_BLOCK|CM_HEADING), TY_(ParseInline), NULL },
@ -207,13 +212,13 @@ static const Dict tag_defs[] =
{ TidyTag_LABEL, "label", VERS_ELEM_LABEL, &TY_(W3CAttrsFor_LABEL)[0], (CM_INLINE), TY_(ParseInline), NULL }, { TidyTag_LABEL, "label", VERS_ELEM_LABEL, &TY_(W3CAttrsFor_LABEL)[0], (CM_INLINE), TY_(ParseInline), NULL },
{ TidyTag_LEGEND, "legend", VERS_ELEM_LEGEND, &TY_(W3CAttrsFor_LEGEND)[0], (CM_INLINE), TY_(ParseInline), NULL }, { TidyTag_LEGEND, "legend", VERS_ELEM_LEGEND, &TY_(W3CAttrsFor_LEGEND)[0], (CM_INLINE), TY_(ParseInline), NULL },
{ TidyTag_LI, "li", VERS_ELEM_LI, &TY_(W3CAttrsFor_LI)[0], (CM_LIST|CM_OPT|CM_NO_INDENT), TY_(ParseBlock), NULL }, { TidyTag_LI, "li", VERS_ELEM_LI, &TY_(W3CAttrsFor_LI)[0], (CM_LIST|CM_OPT|CM_NO_INDENT), TY_(ParseBlock), NULL },
{ TidyTag_LINK, "link", VERS_ELEM_LINK, &TY_(W3CAttrsFor_LINK)[0], (CM_HEAD|CM_EMPTY), TY_(ParseEmpty), CheckLINK }, { TidyTag_LINK, "link", VERS_ELEM_LINK, &TY_(W3CAttrsFor_LINK)[0], (CM_HEAD|CM_BLOCK|CM_EMPTY), TY_(ParseEmpty), CheckLINK },
{ TidyTag_LISTING, "listing", VERS_ELEM_LISTING, &TY_(W3CAttrsFor_LISTING)[0], (CM_BLOCK|CM_OBSOLETE), TY_(ParsePre), NULL }, { TidyTag_LISTING, "listing", VERS_ELEM_LISTING, &TY_(W3CAttrsFor_LISTING)[0], (CM_BLOCK|CM_OBSOLETE), TY_(ParsePre), NULL },
{ TidyTag_MAP, "map", VERS_ELEM_MAP, &TY_(W3CAttrsFor_MAP)[0], (CM_INLINE), TY_(ParseBlock), NULL }, { TidyTag_MAP, "map", VERS_ELEM_MAP, &TY_(W3CAttrsFor_MAP)[0], (CM_INLINE), TY_(ParseBlock), NULL },
{ TidyTag_MENU, "menu", VERS_ELEM_MENU, &TY_(W3CAttrsFor_MENU)[0], (CM_BLOCK|CM_OBSOLETE), TY_(ParseList), NULL }, // { TidyTag_MENU, "menu", VERS_ELEM_MENU, &TY_(W3CAttrsFor_MENU)[0], (CM_BLOCK|CM_OBSOLETE), TY_(ParseList), NULL },
{ TidyTag_META, "meta", VERS_ELEM_META, &TY_(W3CAttrsFor_META)[0], (CM_HEAD|CM_EMPTY), TY_(ParseEmpty), CheckMETA }, { TidyTag_META, "meta", VERS_ELEM_META, &TY_(W3CAttrsFor_META)[0], (CM_HEAD|CM_BLOCK|CM_EMPTY), TY_(ParseEmpty), NULL },
{ TidyTag_NOFRAMES, "noframes", VERS_ELEM_NOFRAMES, &TY_(W3CAttrsFor_NOFRAMES)[0], (CM_BLOCK|CM_FRAMES), TY_(ParseNoFrames), NULL }, { TidyTag_NOFRAMES, "noframes", VERS_ELEM_NOFRAMES, &TY_(W3CAttrsFor_NOFRAMES)[0], (CM_BLOCK|CM_FRAMES), TY_(ParseNoFrames), NULL },
{ TidyTag_NOSCRIPT, "noscript", VERS_ELEM_NOSCRIPT, &TY_(W3CAttrsFor_NOSCRIPT)[0], (CM_BLOCK|CM_INLINE|CM_MIXED), TY_(ParseBlock), NULL }, { TidyTag_NOSCRIPT, "noscript", VERS_ELEM_NOSCRIPT, &TY_(W3CAttrsFor_NOSCRIPT)[0], (CM_HEAD|CM_BLOCK|CM_INLINE|CM_MIXED), TY_(ParseBlock), NULL },
{ TidyTag_OBJECT, "object", VERS_ELEM_OBJECT, &TY_(W3CAttrsFor_OBJECT)[0], (CM_OBJECT|CM_HEAD|CM_IMG|CM_INLINE|CM_PARAM), TY_(ParseBlock), NULL }, { TidyTag_OBJECT, "object", VERS_ELEM_OBJECT, &TY_(W3CAttrsFor_OBJECT)[0], (CM_OBJECT|CM_HEAD|CM_IMG|CM_INLINE|CM_PARAM), TY_(ParseBlock), NULL },
{ TidyTag_OL, "ol", VERS_ELEM_OL, &TY_(W3CAttrsFor_OL)[0], (CM_BLOCK), TY_(ParseList), NULL }, { TidyTag_OL, "ol", VERS_ELEM_OL, &TY_(W3CAttrsFor_OL)[0], (CM_BLOCK), TY_(ParseList), NULL },
{ TidyTag_OPTGROUP, "optgroup", VERS_ELEM_OPTGROUP, &TY_(W3CAttrsFor_OPTGROUP)[0], (CM_FIELD|CM_OPT), TY_(ParseOptGroup), NULL }, { TidyTag_OPTGROUP, "optgroup", VERS_ELEM_OPTGROUP, &TY_(W3CAttrsFor_OPTGROUP)[0], (CM_FIELD|CM_OPT), TY_(ParseOptGroup), NULL },
@ -231,13 +236,13 @@ static const Dict tag_defs[] =
{ TidyTag_RUBY, "ruby", VERS_ELEM_RUBY, &TY_(W3CAttrsFor_RUBY)[0], (CM_INLINE), TY_(ParseInline), NULL }, { TidyTag_RUBY, "ruby", VERS_ELEM_RUBY, &TY_(W3CAttrsFor_RUBY)[0], (CM_INLINE), TY_(ParseInline), NULL },
{ TidyTag_S, "s", VERS_ELEM_S, &TY_(W3CAttrsFor_S)[0], (CM_INLINE), TY_(ParseInline), NULL }, { TidyTag_S, "s", VERS_ELEM_S, &TY_(W3CAttrsFor_S)[0], (CM_INLINE), TY_(ParseInline), NULL },
{ TidyTag_SAMP, "samp", VERS_ELEM_SAMP, &TY_(W3CAttrsFor_SAMP)[0], (CM_INLINE), TY_(ParseInline), NULL }, { TidyTag_SAMP, "samp", VERS_ELEM_SAMP, &TY_(W3CAttrsFor_SAMP)[0], (CM_INLINE), TY_(ParseInline), NULL },
{ TidyTag_SCRIPT, "script", VERS_ELEM_SCRIPT, &TY_(W3CAttrsFor_SCRIPT)[0], (CM_HEAD|CM_MIXED|CM_BLOCK|CM_INLINE), TY_(ParseScript), CheckSCRIPT }, { TidyTag_SCRIPT, "script", VERS_ELEM_SCRIPT, &TY_(W3CAttrsFor_SCRIPT)[0], (CM_HEAD|CM_MIXED|CM_BLOCK|CM_INLINE), TY_(ParseScript), NULL },
{ TidyTag_SELECT, "select", VERS_ELEM_SELECT, &TY_(W3CAttrsFor_SELECT)[0], (CM_INLINE|CM_FIELD), TY_(ParseSelect), NULL }, { TidyTag_SELECT, "select", VERS_ELEM_SELECT, &TY_(W3CAttrsFor_SELECT)[0], (CM_INLINE|CM_FIELD), TY_(ParseSelect), NULL },
{ TidyTag_SMALL, "small", VERS_ELEM_SMALL, &TY_(W3CAttrsFor_SMALL)[0], (CM_INLINE), TY_(ParseInline), NULL }, { TidyTag_SMALL, "small", VERS_ELEM_SMALL, &TY_(W3CAttrsFor_SMALL)[0], (CM_INLINE), TY_(ParseInline), NULL },
{ TidyTag_SPAN, "span", VERS_ELEM_SPAN, &TY_(W3CAttrsFor_SPAN)[0], (CM_INLINE), TY_(ParseInline), NULL }, { TidyTag_SPAN, "span", VERS_ELEM_SPAN, &TY_(W3CAttrsFor_SPAN)[0], (CM_INLINE), TY_(ParseInline), NULL },
{ TidyTag_STRIKE, "strike", VERS_ELEM_STRIKE, &TY_(W3CAttrsFor_STRIKE)[0], (CM_INLINE), TY_(ParseInline), NULL }, { TidyTag_STRIKE, "strike", VERS_ELEM_STRIKE, &TY_(W3CAttrsFor_STRIKE)[0], (CM_INLINE), TY_(ParseInline), NULL },
{ TidyTag_STRONG, "strong", VERS_ELEM_STRONG, &TY_(W3CAttrsFor_STRONG)[0], (CM_INLINE), TY_(ParseInline), NULL }, { TidyTag_STRONG, "strong", VERS_ELEM_STRONG, &TY_(W3CAttrsFor_STRONG)[0], (CM_INLINE), TY_(ParseInline), NULL },
{ TidyTag_STYLE, "style", VERS_ELEM_STYLE, &TY_(W3CAttrsFor_STYLE)[0], (CM_HEAD), TY_(ParseScript), CheckSTYLE }, { TidyTag_STYLE, "style", VERS_ELEM_STYLE, &TY_(W3CAttrsFor_STYLE)[0], (CM_HEAD|CM_BLOCK), TY_(ParseScript), NULL },
{ TidyTag_SUB, "sub", VERS_ELEM_SUB, &TY_(W3CAttrsFor_SUB)[0], (CM_INLINE), TY_(ParseInline), NULL }, { TidyTag_SUB, "sub", VERS_ELEM_SUB, &TY_(W3CAttrsFor_SUB)[0], (CM_INLINE), TY_(ParseInline), NULL },
{ TidyTag_SUP, "sup", VERS_ELEM_SUP, &TY_(W3CAttrsFor_SUP)[0], (CM_INLINE), TY_(ParseInline), NULL }, { TidyTag_SUP, "sup", VERS_ELEM_SUP, &TY_(W3CAttrsFor_SUP)[0], (CM_INLINE), TY_(ParseInline), NULL },
{ TidyTag_TABLE, "table", VERS_ELEM_TABLE, &TY_(W3CAttrsFor_TABLE)[0], (CM_BLOCK), TY_(ParseTableTag), CheckTABLE }, { TidyTag_TABLE, "table", VERS_ELEM_TABLE, &TY_(W3CAttrsFor_TABLE)[0], (CM_BLOCK), TY_(ParseTableTag), CheckTABLE },
@ -261,9 +266,7 @@ static const Dict tag_defs[] =
{ TidyTag_BGSOUND, "bgsound", VERS_MICROSOFT, NULL, (CM_HEAD|CM_EMPTY), TY_(ParseEmpty), NULL }, { TidyTag_BGSOUND, "bgsound", VERS_MICROSOFT, NULL, (CM_HEAD|CM_EMPTY), TY_(ParseEmpty), NULL },
{ TidyTag_BLINK, "blink", VERS_PROPRIETARY, NULL, (CM_INLINE), TY_(ParseInline), NULL }, { TidyTag_BLINK, "blink", VERS_PROPRIETARY, NULL, (CM_INLINE), TY_(ParseInline), NULL },
{ TidyTag_COMMENT, "comment", VERS_MICROSOFT, NULL, (CM_INLINE), TY_(ParseInline), NULL }, { TidyTag_COMMENT, "comment", VERS_MICROSOFT, NULL, (CM_INLINE), TY_(ParseInline), NULL },
{ TidyTag_EMBED, "embed", VERS_NETSCAPE, NULL, (CM_INLINE|CM_IMG|CM_EMPTY), TY_(ParseEmpty), NULL },
{ TidyTag_ILAYER, "ilayer", VERS_NETSCAPE, NULL, (CM_INLINE), TY_(ParseInline), NULL }, { TidyTag_ILAYER, "ilayer", VERS_NETSCAPE, NULL, (CM_INLINE), TY_(ParseInline), NULL },
{ TidyTag_KEYGEN, "keygen", VERS_NETSCAPE, NULL, (CM_INLINE|CM_EMPTY), TY_(ParseEmpty), NULL },
{ TidyTag_LAYER, "layer", VERS_NETSCAPE, NULL, (CM_BLOCK), TY_(ParseBlock), NULL }, { TidyTag_LAYER, "layer", VERS_NETSCAPE, NULL, (CM_BLOCK), TY_(ParseBlock), NULL },
{ TidyTag_MARQUEE, "marquee", VERS_MICROSOFT, NULL, (CM_INLINE|CM_OPT), TY_(ParseInline), NULL }, { TidyTag_MARQUEE, "marquee", VERS_MICROSOFT, NULL, (CM_INLINE|CM_OPT), TY_(ParseInline), NULL },
{ TidyTag_MULTICOL, "multicol", VERS_NETSCAPE, NULL, (CM_BLOCK), TY_(ParseBlock), NULL }, { TidyTag_MULTICOL, "multicol", VERS_NETSCAPE, NULL, (CM_BLOCK), TY_(ParseBlock), NULL },
@ -274,32 +277,40 @@ static const Dict tag_defs[] =
{ TidyTag_SERVER, "server", VERS_NETSCAPE, NULL, (CM_HEAD|CM_MIXED|CM_BLOCK|CM_INLINE), TY_(ParseScript), NULL }, { TidyTag_SERVER, "server", VERS_NETSCAPE, NULL, (CM_HEAD|CM_MIXED|CM_BLOCK|CM_INLINE), TY_(ParseScript), NULL },
{ TidyTag_SERVLET, "servlet", VERS_SUN, NULL, (CM_OBJECT|CM_IMG|CM_INLINE|CM_PARAM), TY_(ParseBlock), NULL }, { TidyTag_SERVLET, "servlet", VERS_SUN, NULL, (CM_OBJECT|CM_IMG|CM_INLINE|CM_PARAM), TY_(ParseBlock), NULL },
{ TidyTag_SPACER, "spacer", VERS_NETSCAPE, NULL, (CM_INLINE|CM_EMPTY), TY_(ParseEmpty), NULL }, { TidyTag_SPACER, "spacer", VERS_NETSCAPE, NULL, (CM_INLINE|CM_EMPTY), TY_(ParseEmpty), NULL },
{ TidyTag_WBR, "wbr", VERS_PROPRIETARY, NULL, (CM_INLINE|CM_EMPTY), TY_(ParseEmpty), NULL },
/* "HTML5" */ /* HTML5 */
{ TidyTag_ARTICLE, "article", VERS_ELEM_ARTICLE, &TY_(W3CAttrsFor_ARTICLE)[0], (CM_BLOCK), TY_(ParseBlock), NULL }, { TidyTag_ARTICLE, "article", VERS_ELEM_ARTICLE, &TY_(W3CAttrsFor_ARTICLE)[0], (CM_BLOCK), TY_(ParseBlock), NULL },
{ TidyTag_ASIDE, "aside", VERS_ELEM_ASIDE, &TY_(W3CAttrsFor_ASIDE)[0], (CM_BLOCK), TY_(ParseBlock), NULL }, { TidyTag_ASIDE, "aside", VERS_ELEM_ASIDE, &TY_(W3CAttrsFor_ASIDE)[0], (CM_BLOCK), TY_(ParseBlock), NULL },
{ TidyTag_AUDIO, "audio", VERS_ELEM_AUDIO, &TY_(W3CAttrsFor_AUDIO)[0], (CM_INLINE), TY_(ParseInline), NULL }, { TidyTag_AUDIO, "audio", VERS_ELEM_AUDIO, &TY_(W3CAttrsFor_AUDIO)[0], (CM_BLOCK|CM_INLINE), TY_(ParseBlock), NULL },
{ TidyTag_BDI, "bdi", VERS_ELEM_BDI, &TY_(W3CAttrsFor_BDI)[0], (CM_BLOCK), TY_(ParseBlock), NULL },
{ TidyTag_CANVAS, "canvas", VERS_ELEM_CANVAS, &TY_(W3CAttrsFor_CANVAS)[0], (CM_INLINE), TY_(ParseInline), NULL }, { TidyTag_CANVAS, "canvas", VERS_ELEM_CANVAS, &TY_(W3CAttrsFor_CANVAS)[0], (CM_INLINE), TY_(ParseInline), NULL },
{ TidyTag_COMMAND, "command", VERS_ELEM_COMMAND, &TY_(W3CAttrsFor_COMMAND)[0], (CM_HEAD|CM_INLINE|CM_EMPTY), TY_(ParseEmpty), NULL }, { TidyTag_COMMAND, "command", VERS_ELEM_COMMAND, &TY_(W3CAttrsFor_COMMAND)[0], (CM_HEAD|CM_INLINE|CM_EMPTY), TY_(ParseEmpty), NULL },
{ TidyTag_DATALIST, "datalist", VERS_ELEM_DATALIST, &TY_(W3CAttrsFor_DATALIST)[0], (CM_INLINE), TY_(ParseInline), NULL }, { TidyTag_DATALIST, "datalist", VERS_ELEM_DATALIST, &TY_(W3CAttrsFor_DATALIST)[0], (CM_INLINE|CM_FIELD), TY_(ParseDatalist), NULL },
//{ TidyTag_DATALIST, "datalist", VERS_ELEM_DATALIST, &TY_(W3CAttrsFor_DATALIST)[0], (CM_FIELD), TY_(ParseInline), NULL },
{ TidyTag_DETAILS, "details", VERS_ELEM_DETAILS, &TY_(W3CAttrsFor_DETAILS)[0], (CM_BLOCK), TY_(ParseBlock), NULL }, { TidyTag_DETAILS, "details", VERS_ELEM_DETAILS, &TY_(W3CAttrsFor_DETAILS)[0], (CM_BLOCK), TY_(ParseBlock), NULL },
{ TidyTag_DIALOG, "dialog", VERS_ELEM_DIALOG, &TY_(W3CAttrsFor_DIALOG)[0], (CM_BLOCK), TY_(ParseBlock), NULL },
{ TidyTag_EMBED, "embed", VERS_ELEM_EMBED, &TY_(W3CAttrsFor_EMBED)[0], (CM_INLINE|CM_IMG|CM_EMPTY), TY_(ParseEmpty), NULL },
{ TidyTag_FIGCAPTION, "figcaption", VERS_ELEM_FIGCAPTION, &TY_(W3CAttrsFor_FIGCAPTION)[0], (CM_BLOCK), TY_(ParseBlock), NULL }, { TidyTag_FIGCAPTION, "figcaption", VERS_ELEM_FIGCAPTION, &TY_(W3CAttrsFor_FIGCAPTION)[0], (CM_BLOCK), TY_(ParseBlock), NULL },
{ TidyTag_FIGURE, "figure", VERS_ELEM_FIGURE, &TY_(W3CAttrsFor_FIGURE)[0], (CM_BLOCK), TY_(ParseBlock), NULL }, { TidyTag_FIGURE, "figure", VERS_ELEM_FIGURE, &TY_(W3CAttrsFor_FIGURE)[0], (CM_BLOCK), TY_(ParseBlock), NULL },
{ TidyTag_FOOTER, "footer", VERS_ELEM_FOOTER, &TY_(W3CAttrsFor_FOOTER)[0], (CM_BLOCK), TY_(ParseBlock), NULL }, { TidyTag_FOOTER, "footer", VERS_ELEM_FOOTER, &TY_(W3CAttrsFor_FOOTER)[0], (CM_BLOCK), TY_(ParseBlock), NULL },
{ TidyTag_HEADER, "header", VERS_ELEM_HEADER, &TY_(W3CAttrsFor_HEADER)[0], (CM_BLOCK), TY_(ParseBlock), NULL }, { TidyTag_HEADER, "header", VERS_ELEM_HEADER, &TY_(W3CAttrsFor_HEADER)[0], (CM_BLOCK), TY_(ParseBlock), NULL },
{ TidyTag_HGROUP, "hgroup", VERS_ELEM_HGROUP, &TY_(W3CAttrsFor_HGROUP)[0], (CM_BLOCK), TY_(ParseBlock), NULL }, { TidyTag_HGROUP, "hgroup", VERS_ELEM_HGROUP, &TY_(W3CAttrsFor_HGROUP)[0], (CM_BLOCK), TY_(ParseBlock), NULL },
{ TidyTag_KEYGEN, "keygen", VERS_ELEM_KEYGEN, &TY_(W3CAttrsFor_KEYGEN)[0], (CM_INLINE|CM_EMPTY), TY_(ParseEmpty), NULL },
{ TidyTag_MAIN, "main", VERS_ELEM_MAIN, &TY_(W3CAttrsFor_MAIN)[0], (CM_BLOCK), TY_(ParseBlock), NULL },
{ TidyTag_MARK, "mark", VERS_ELEM_MARK, &TY_(W3CAttrsFor_MARK)[0], (CM_INLINE), TY_(ParseInline), NULL }, { TidyTag_MARK, "mark", VERS_ELEM_MARK, &TY_(W3CAttrsFor_MARK)[0], (CM_INLINE), TY_(ParseInline), NULL },
{ TidyTag_MENU, "menu", VERS_ELEM_MENU, &TY_(W3CAttrsFor_MENU)[0], (CM_BLOCK), TY_(ParseBlock), NULL },
{ TidyTag_MENUITEM, "menuitem", VERS_ELEM_MENUITEM, &TY_(W3CAttrsFor_MENUITEM)[0], (CM_INLINE|CM_BLOCK|CM_MIXED), TY_(ParseInline), NULL },
{ TidyTag_METER, "meter", VERS_ELEM_METER, &TY_(W3CAttrsFor_METER)[0], (CM_INLINE), TY_(ParseInline), NULL }, { TidyTag_METER, "meter", VERS_ELEM_METER, &TY_(W3CAttrsFor_METER)[0], (CM_INLINE), TY_(ParseInline), NULL },
{ TidyTag_NAV, "nav", VERS_ELEM_NAV, &TY_(W3CAttrsFor_NAV)[0], (CM_BLOCK), TY_(ParseBlock), NULL }, { TidyTag_NAV, "nav", VERS_ELEM_NAV, &TY_(W3CAttrsFor_NAV)[0], (CM_BLOCK), TY_(ParseBlock), NULL },
{ TidyTag_OUTPUT, "output", VERS_ELEM_OUTPUT, &TY_(W3CAttrsFor_OUTPUT)[0], (CM_INLINE), TY_(ParseInline), NULL }, { TidyTag_OUTPUT, "output", VERS_ELEM_OUTPUT, &TY_(W3CAttrsFor_OUTPUT)[0], (CM_INLINE), TY_(ParseInline), NULL },
{ TidyTag_PROGRESS, "progress", VERS_ELEM_PROGRESS, &TY_(W3CAttrsFor_PROGRESS)[0], (CM_INLINE), TY_(ParseInline), NULL }, { TidyTag_PROGRESS, "progress", VERS_ELEM_PROGRESS, &TY_(W3CAttrsFor_PROGRESS)[0], (CM_INLINE), TY_(ParseInline), NULL },
{ TidyTag_SECTION, "section", VERS_ELEM_SECTION, &TY_(W3CAttrsFor_SECTION)[0], (CM_BLOCK), TY_(ParseBlock), NULL }, { TidyTag_SECTION, "section", VERS_ELEM_SECTION, &TY_(W3CAttrsFor_SECTION)[0], (CM_BLOCK), TY_(ParseBlock), NULL },
{ TidyTag_SOURCE, "source", VERS_ELEM_SOURCE, &TY_(W3CAttrsFor_SOURCE)[0], (CM_INLINE|CM_EMPTY), TY_(ParseEmpty), NULL }, { TidyTag_SOURCE, "source", VERS_ELEM_SOURCE, &TY_(W3CAttrsFor_SOURCE)[0], (CM_BLOCK|CM_EMPTY), TY_(ParseBlock), NULL },
{ TidyTag_SUMMARY, "summary", VERS_ELEM_SUMMARY, &TY_(W3CAttrsFor_SUMMARY)[0], (CM_BLOCK), TY_(ParseInline), NULL }, { TidyTag_SUMMARY, "summary", VERS_ELEM_SUMMARY, &TY_(W3CAttrsFor_SUMMARY)[0], (CM_BLOCK), TY_(ParseInline), NULL },
{ TidyTag_TIME, "time", VERS_ELEM_TIME, &TY_(W3CAttrsFor_TIME)[0], (CM_INLINE), TY_(ParseInline), NULL }, { TidyTag_TIME, "time", VERS_ELEM_TIME, &TY_(W3CAttrsFor_TIME)[0], (CM_INLINE), TY_(ParseInline), NULL },
{ TidyTag_TRACK, "track", VERS_ELEM_TRACK, &TY_(W3CAttrsFor_TRACK)[0], (CM_INLINE|CM_EMPTY), TY_(ParseEmpty), NULL }, { TidyTag_TRACK, "track", VERS_ELEM_TRACK, &TY_(W3CAttrsFor_TRACK)[0], (CM_BLOCK|CM_EMPTY), TY_(ParseBlock), NULL },
{ TidyTag_VIDEO, "video", VERS_ELEM_VIDEO, &TY_(W3CAttrsFor_VIDEO)[0], (CM_INLINE), TY_(ParseInline), NULL }, { TidyTag_VIDEO, "video", VERS_ELEM_VIDEO, &TY_(W3CAttrsFor_VIDEO)[0], (CM_BLOCK|CM_INLINE), TY_(ParseBlock), NULL },
{ TidyTag_WBR, "wbr", VERS_ELEM_WBR, &TY_(W3CAttrsFor_WBR)[0], (CM_INLINE|CM_EMPTY), TY_(ParseEmpty), NULL },
/* this must be the final entry */ /* this must be the final entry */
{ (TidyTagId)0, NULL, 0, NULL, (0), NULL, NULL } { (TidyTagId)0, NULL, 0, NULL, (0), NULL, NULL }
@ -760,21 +771,9 @@ void CheckAREA( TidyDocImpl* doc, Node *node )
void CheckTABLE( TidyDocImpl* doc, Node *node ) void CheckTABLE( TidyDocImpl* doc, Node *node )
{ {
AttVal* attval; AttVal* attval;
Bool HasSummary = TY_(AttrGetById)(node, TidyAttr_SUMMARY) != NULL;
TY_(CheckAttributes)(doc, node); TY_(CheckAttributes)(doc, node);
/* a missing summary attribute is bad accessibility, no matter
what HTML version is involved; a document without is valid */
if (cfg(doc, TidyAccessibilityCheckLevel) == 0)
{
if (!HasSummary)
{
doc->badAccess |= BA_MISSING_SUMMARY;
TY_(ReportMissingAttr)( doc, node, "summary");
}
}
/* convert <table border> to <table border="1"> */ /* convert <table border> to <table border="1"> */
if ( cfgBool(doc, TidyXmlOut) && (attval = TY_(AttrGetById)(node, TidyAttr_BORDER)) ) if ( cfgBool(doc, TidyXmlOut) && (attval = TY_(AttrGetById)(node, TidyAttr_BORDER)) )
{ {
@ -783,115 +782,24 @@ void CheckTABLE( TidyDocImpl* doc, Node *node )
} }
} }
/* add missing type attribute when appropriate */ /* report missing href attribute; report missing rel attribute */
void CheckSCRIPT( TidyDocImpl* doc, Node *node )
{
AttVal *lang, *type;
char buf[16];
TY_(CheckAttributes)(doc, node);
lang = TY_(AttrGetById)(node, TidyAttr_LANGUAGE);
type = TY_(AttrGetById)(node, TidyAttr_TYPE);
if (!type)
{
/* check for javascript */
if (lang)
{
/* Test #696799. lang->value can be NULL. */
buf[0] = '\0';
TY_(tmbstrncpy)(buf, lang->value, sizeof(buf));
buf[10] = '\0';
if (TY_(tmbstrncasecmp)(buf, "javascript", 10) == 0 ||
TY_(tmbstrncasecmp)(buf, "jscript", 7) == 0)
{
TY_(AddAttribute)(doc, node, "type", "text/javascript");
}
else if (TY_(tmbstrcasecmp)(buf, "vbscript") == 0)
{
/* per Randy Waki 8/6/01 */
TY_(AddAttribute)(doc, node, "type", "text/vbscript");
}
}
else
{
TY_(AddAttribute)(doc, node, "type", "text/javascript");
}
type = TY_(AttrGetById)(node, TidyAttr_TYPE);
if (type != NULL)
{
TY_(ReportAttrError)(doc, node, type, INSERTING_ATTRIBUTE);
}
else
{
TY_(ReportMissingAttr)(doc, node, "type");
}
}
}
/* add missing type attribute when appropriate */
void CheckSTYLE( TidyDocImpl* doc, Node *node )
{
AttVal *type = TY_(AttrGetById)(node, TidyAttr_TYPE);
TY_(CheckAttributes)( doc, node );
if ( !type || !type->value || !TY_(tmbstrlen)(type->value) )
{
type = TY_(RepairAttrValue)(doc, node, "type", "text/css");
TY_(ReportAttrError)( doc, node, type, INSERTING_ATTRIBUTE );
}
}
/* add missing type attribute when appropriate */
void CheckLINK( TidyDocImpl* doc, Node *node ) void CheckLINK( TidyDocImpl* doc, Node *node )
{ {
AttVal *rel = TY_(AttrGetById)(node, TidyAttr_REL); Bool HasHref = TY_(AttrGetById)(node, TidyAttr_HREF) != NULL;
Bool HasRel = TY_(AttrGetById)(node, TidyAttr_REL) != NULL;
Bool HasItemprop = TY_(AttrGetById)(node, TidyAttr_ITEMPROP) != NULL;
TY_(CheckAttributes)( doc, node ); if (!HasHref)
/* todo: <link rel="alternate stylesheet"> */
if (AttrValueIs(rel, "stylesheet"))
{ {
AttVal *type = TY_(AttrGetById)(node, TidyAttr_TYPE); TY_(ReportMissingAttr)( doc, node, "href" );
if (!type)
{
TY_(AddAttribute)( doc, node, "type", "text/css" );
type = TY_(AttrGetById)(node, TidyAttr_TYPE);
TY_(ReportAttrError)( doc, node, type, INSERTING_ATTRIBUTE );
}
}
} }
/* reports missing action attribute */ if (!HasItemprop && !HasRel)
void CheckFORM( TidyDocImpl* doc, Node *node )
{ {
AttVal *action = TY_(AttrGetById)(node, TidyAttr_ACTION); TY_(ReportMissingAttr)( doc, node, "rel" );
TY_(CheckAttributes)(doc, node);
if (!action)
TY_(ReportMissingAttr)(doc, node, "action");
} }
/* reports missing content attribute */
void CheckMETA( TidyDocImpl* doc, Node *node )
{
AttVal *content = TY_(AttrGetById)(node, TidyAttr_CONTENT);
TY_(CheckAttributes)(doc, node);
if (!content)
TY_(ReportMissingAttr)( doc, node, "content" );
/* name or http-equiv attribute must also be set */
} }
Bool TY_(nodeIsText)( Node* node ) Bool TY_(nodeIsText)( Node* node )
{ {
return ( node && node->type == TextNode ); return ( node && node->type == TextNode );

View file

@ -110,6 +110,7 @@ Parser TY_(ParseRow);
Parser TY_(ParseSelect); Parser TY_(ParseSelect);
Parser TY_(ParseOptGroup); Parser TY_(ParseOptGroup);
Parser TY_(ParseText); Parser TY_(ParseText);
Parser TY_(ParseDatalist);
CheckAttribs TY_(CheckAttributes); CheckAttribs TY_(CheckAttributes);
@ -224,6 +225,11 @@ uint TY_(nodeHeaderLevel)( Node* node ); /* 1, 2, ..., 6 */
#define nodeIsU( node ) TagIsId( node, TidyTag_U ) #define nodeIsU( node ) TagIsId( node, TidyTag_U )
#define nodeIsMENU( node ) TagIsId( node, TidyTag_MENU ) #define nodeIsMENU( node ) TagIsId( node, TidyTag_MENU )
#define nodeIsBUTTON( node ) TagIsId( node, TidyTag_BUTTON ) #define nodeIsBUTTON( node ) TagIsId( node, TidyTag_BUTTON )
#define nodeIsCANVAS( node ) TagIsId( node, TidyTag_CANVAS )
#define nodeIsPROGRESS( node ) TagIsId( node, TidyTag_PROGRESS )
/* HTML5 */
#define nodeIsDATALIST( node ) TagIsId( node, TidyTag_DATALIST )
#endif /* __TAGS_H__ */ #endif /* __TAGS_H__ */

View file

@ -23,6 +23,7 @@
#include "tidy-int.h" #include "tidy-int.h"
#include "parser.h" #include "parser.h"
#include "clean.h" #include "clean.h"
#include "gdoc.h"
#include "config.h" #include "config.h"
#include "message.h" #include "message.h"
#include "pprint.h" #include "pprint.h"
@ -1227,11 +1228,63 @@ int tidyDocRunDiagnostics( TidyDocImpl* doc )
return tidyDocStatus( doc ); return tidyDocStatus( doc );
} }
static struct _html5Info
{
const char *tag;
uint id;
} const html5Info[] = {
{"acronym", TidyTag_ACRONYM},
{"applet", TidyTag_APPLET },
{"basefont",TidyTag_BASEFONT },
{ "big", TidyTag_BIG },
{ "center", TidyTag_CENTER },
{ "dir", TidyTag_DIR },
{ "font", TidyTag_FONT },
{ "frame", TidyTag_FRAME},
{ "frameset", TidyTag_FRAMESET},
{ "noframes", TidyTag_NOFRAMES },
{ "strike", TidyTag_STRIKE },
{ "tt", TidyTag_TT },
{ 0, 0 }
};
Bool inRemovedInfo( uint tid )
{
int i;
for (i = 0; ; i++) {
if (html5Info[i].tag == 0)
break;
if (html5Info[i].id == tid)
return yes;
}
return no;
}
void TY_(CheckHTML5)( TidyDocImpl* doc, Node* node )
{
Lexer* lexer = doc->lexer;
while (node)
{
if (TY_(nodeIsElement)(node)) {
if (node->tag) {
if ((!node->tag->versions & VERS_HTML5)||(inRemovedInfo(node->tag->id))) {
/* issue warning */
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
}
}
}
if (node->content)
TY_(CheckHTML5)( doc, node->content );
node = node->next;
}
}
int tidyDocCleanAndRepair( TidyDocImpl* doc ) int tidyDocCleanAndRepair( TidyDocImpl* doc )
{ {
Bool word2K = cfgBool( doc, TidyWord2000 ); Bool word2K = cfgBool( doc, TidyWord2000 );
Bool logical = cfgBool( doc, TidyLogicalEmphasis ); Bool logical = cfgBool( doc, TidyLogicalEmphasis );
Bool clean = cfgBool( doc, TidyMakeClean ); Bool clean = cfgBool( doc, TidyMakeClean );
Bool gdoc = cfgBool( doc, TidyGDocClean );
Bool dropFont = cfgBool( doc, TidyDropFontTags ); Bool dropFont = cfgBool( doc, TidyDropFontTags );
Bool htmlOut = cfgBool( doc, TidyHtmlOut ); Bool htmlOut = cfgBool( doc, TidyHtmlOut );
Bool xmlOut = cfgBool( doc, TidyXmlOut ); Bool xmlOut = cfgBool( doc, TidyXmlOut );
@ -1240,13 +1293,16 @@ int tidyDocCleanAndRepair( TidyDocImpl* doc )
Bool tidyMark = cfgBool( doc, TidyMark ); Bool tidyMark = cfgBool( doc, TidyMark );
Bool tidyXmlTags = cfgBool( doc, TidyXmlTags ); Bool tidyXmlTags = cfgBool( doc, TidyXmlTags );
Bool wantNameAttr = cfgBool( doc, TidyAnchorAsName ); Bool wantNameAttr = cfgBool( doc, TidyAnchorAsName );
Bool mergeEmphasis = cfgBool( doc, TidyMergeEmphasis );
ctmbstr sdef = NULL;
Node* node; Node* node;
if (tidyXmlTags) if (tidyXmlTags)
return tidyDocStatus( doc ); return tidyDocStatus( doc );
/* simplifies <b><b> ... </b> ...</b> etc. */ /* simplifies <b><b> ... </b> ...</b> etc. */
TY_(NestedEmphasis)( doc, &doc->root ); if ( mergeEmphasis )
TY_(NestedEmphasis)( doc, &doc->root );
/* cleans up <dir>indented text</dir> etc. */ /* cleans up <dir>indented text</dir> etc. */
TY_(List2BQ)( doc, &doc->root ); TY_(List2BQ)( doc, &doc->root );
@ -1270,6 +1326,10 @@ int tidyDocCleanAndRepair( TidyDocImpl* doc )
if ( clean || dropFont ) if ( clean || dropFont )
TY_(CleanDocument)( doc ); TY_(CleanDocument)( doc );
/* clean up html exported by Google Docs */
if ( gdoc )
TY_(CleanGoogleDocument)( doc );
/* Move terminating <br /> tags from out of paragraphs */ /* Move terminating <br /> tags from out of paragraphs */
/*! Do we want to do this for all block-level elements? */ /*! Do we want to do this for all block-level elements? */
@ -1291,6 +1351,12 @@ int tidyDocCleanAndRepair( TidyDocImpl* doc )
/* remember given doctype for reporting */ /* remember given doctype for reporting */
node = TY_(FindDocType)(doc); node = TY_(FindDocType)(doc);
sdef = tidyOptGetValue((TidyDoc)doc, TidyDoctype );
if (!sdef)
sdef = tidyOptGetCurrPick((TidyDoc) doc, TidyDoctypeMode );
if (sdef && (strcmp(sdef,"html5") == 0)) {
TY_(CheckHTML5)( doc, &doc->root );
}
if (node) if (node)
{ {
AttVal* fpi = TY_(GetAttrByName)(node, "PUBLIC"); AttVal* fpi = TY_(GetAttrByName)(node, "PUBLIC");
@ -1388,7 +1454,6 @@ int tidyDocSaveStream( TidyDocImpl* doc, StreamOut* out )
{ {
/* noop */ /* noop */
TY_(DropFontElements)(doc, &doc->root, NULL); TY_(DropFontElements)(doc, &doc->root, NULL);
TY_(WbrToSpace)(doc, &doc->root);
} }
if ((makeClean && asciiChars) || makeBare) if ((makeClean && asciiChars) || makeBare)