main code updates to do HTML5

This commit is contained in:
Geoff McLane 2014-08-03 20:33:29 +02:00
parent 292145c8e2
commit 78c0080eb8
18 changed files with 1999 additions and 1259 deletions

View file

@ -9,6 +9,13 @@
*/ */
#include "tidy.h" #include "tidy.h"
#ifdef _MSC_VER
#include "sprtf.h"
#endif
#ifndef SPRTF
#define SPRTF printf
#endif
static FILE* errout = NULL; /* set to stderr */ static FILE* errout = NULL; /* set to stderr */
/* static FILE* txtout = NULL; */ /* set to stdout */ /* static FILE* txtout = NULL; */ /* set to stdout */
@ -176,6 +183,9 @@ static const CmdOptDesc cmdopt_defs[] = {
{ "-bare", { "-bare",
"strip out smart quotes and em dashes, etc.", "strip out smart quotes and em dashes, etc.",
"bare: yes", CmdOptProcDir, "-b" }, "bare: yes", CmdOptProcDir, "-b" },
{ "-gdoc",
"produce clean version of html exported by google docs",
"gdoc: yes", CmdOptProcDir, "-g" },
{ "-numeric", { "-numeric",
"output numeric rather than named entities", "output numeric rather than named entities",
"numeric-entities: yes", CmdOptProcDir, "-n" }, "numeric-entities: yes", CmdOptProcDir, "-n" },
@ -186,8 +196,8 @@ static const CmdOptDesc cmdopt_defs[] = {
"suppress nonessential output", "suppress nonessential output",
"quiet: yes", CmdOptProcDir, "-q" }, "quiet: yes", CmdOptProcDir, "-q" },
{ "-omit", { "-omit",
"omit optional end tags", "omit optional start tags and end tags",
"hide-endtags: yes", CmdOptProcDir }, "omit-optional-tags: yes", CmdOptProcDir },
{ "-xml", { "-xml",
"specify the input is well formed XML", "specify the input is well formed XML",
"input-xml: yes", CmdOptProcDir }, "input-xml: yes", CmdOptProcDir },
@ -411,14 +421,16 @@ static void help( ctmbstr prog )
{ {
printf( "%s [option...] [file...] [option...] [file...]\n", prog ); printf( "%s [option...] [file...] [option...] [file...]\n", prog );
printf( "Utility to clean up and pretty print HTML/XHTML/XML\n"); printf( "Utility to clean up and pretty print HTML/XHTML/XML\n");
printf( "See http://tidy.sourceforge.net/\n"); printf( "\n");
printf( "This is an HTML5-aware experimental fork of HTML Tidy.\n");
printf( "%s\n", tidyReleaseDate() );
printf( "\n"); printf( "\n");
#ifdef PLATFORM_NAME #ifdef PLATFORM_NAME
printf( "Options for HTML Tidy for %s released on %s:\n", printf( "Options for HTML Tidy for %s:\n", PLATFORM_NAME );
PLATFORM_NAME, tidyReleaseDate() );
#else #else
printf( "Options for HTML Tidy released on %s:\n", tidyReleaseDate() ); printf( "Options for HTML Tidy:\n");
#endif #endif
printf( "\n"); printf( "\n");
@ -429,9 +441,27 @@ static void help( ctmbstr prog )
"to the man page.\n\n"); "to the man page.\n\n");
printf( "Input/Output default to stdin/stdout respectively.\n"); printf( "Input/Output default to stdin/stdout respectively.\n");
printf( "\n");
printf( "Single letter options apart from -f may be combined\n"); printf( "Single letter options apart from -f may be combined\n");
printf( "as in: tidy -f errs.txt -imu foo.html\n"); printf( "as in: tidy -f errs.txt -imu foo.html\n");
printf( "For further info on HTML see http://www.w3.org/MarkUp\n"); printf( "\n");
printf( "For more information on this HTML5-aware experimental fork of Tidy,\n" );
printf( "see http://w3c.github.com/tidy-html5/\n" );
printf( "\n");
printf( "For more information on HTML, see the following:\n" );
printf( "\n");
printf( " HTML: Edition for Web Authors (the latest HTML specification)\n");
printf( " http://dev.w3.org/html5/spec-author-view\n" );
printf( "\n");
printf( " HTML: The Markup Language (an HTML language reference)\n" );
printf( " http://dev.w3.org/html5/markup/\n" );
printf( "\n");
printf( "File bug reports at https://github.com/w3c/tidy-html5/issues/\n" );
printf( "or send questions and comments to html-tidy@w3.org\n" );
printf( "\n");
printf( "Validate your HTML documents using the W3C Nu Markup Validator:\n" );
printf( "\n");
printf( " http://validator.w3.org/nu/" );
printf( "\n"); printf( "\n");
} }
@ -472,6 +502,7 @@ ctmbstr ConfigCategoryName( TidyConfigCategory id )
fprintf(stderr, "Fatal error: impossible value for id='%d'.\n", (int)id); fprintf(stderr, "Fatal error: impossible value for id='%d'.\n", (int)id);
assert(0); assert(0);
abort(); abort();
return "never_here"; /* only for the compiler warning */
} }
/* Description of an option */ /* Description of an option */
@ -898,10 +929,10 @@ static void optionvalues( TidyDoc tdoc )
static void version( void ) static void version( void )
{ {
#ifdef PLATFORM_NAME #ifdef PLATFORM_NAME
printf( "HTML Tidy for %s released on %s\n", printf( "HTML Tidy for HTML5 for %s %s\n",
PLATFORM_NAME, tidyReleaseDate() ); PLATFORM_NAME, tidyReleaseDate() );
#else #else
printf( "HTML Tidy released on %s\n", tidyReleaseDate() ); printf( "HTML Tidy for HTML5 %s\n", tidyReleaseDate() );
#endif #endif
} }
@ -923,6 +954,9 @@ int main( int argc, char** argv )
errout = stderr; /* initialize to stderr */ errout = stderr; /* initialize to stderr */
status = 0; status = 0;
#ifdef _MSC_VER
set_log_file((char *)"temptidy.txt", 0);
#endif
#ifdef TIDY_CONFIG_FILE #ifdef TIDY_CONFIG_FILE
if ( tidyFileExists( tdoc, TIDY_CONFIG_FILE) ) if ( tidyFileExists( tdoc, TIDY_CONFIG_FILE) )
@ -977,7 +1011,7 @@ int main( int argc, char** argv )
tidyOptResetToDefault( tdoc, TidyIndentSpaces ); tidyOptResetToDefault( tdoc, TidyIndentSpaces );
} }
else if ( strcasecmp(arg, "omit") == 0 ) else if ( strcasecmp(arg, "omit") == 0 )
tidyOptSetBool( tdoc, TidyHideEndTags, yes ); tidyOptSetBool( tdoc, TidyOmitOptionalTags, yes );
else if ( strcasecmp(arg, "upper") == 0 ) else if ( strcasecmp(arg, "upper") == 0 )
tidyOptSetBool( tdoc, TidyUpperCaseTags, yes ); tidyOptSetBool( tdoc, TidyUpperCaseTags, yes );
@ -985,6 +1019,9 @@ int main( int argc, char** argv )
else if ( strcasecmp(arg, "clean") == 0 ) else if ( strcasecmp(arg, "clean") == 0 )
tidyOptSetBool( tdoc, TidyMakeClean, yes ); tidyOptSetBool( tdoc, TidyMakeClean, yes );
else if ( strcasecmp(arg, "gdoc") == 0 )
tidyOptSetBool( tdoc, TidyGDocClean, yes );
else if ( strcasecmp(arg, "bare") == 0 ) else if ( strcasecmp(arg, "bare") == 0 )
tidyOptSetBool( tdoc, TidyMakeBare, yes ); tidyOptSetBool( tdoc, TidyMakeBare, yes );
@ -1202,6 +1239,10 @@ int main( int argc, char** argv )
tidyOptSetBool( tdoc, TidyMakeClean, yes ); tidyOptSetBool( tdoc, TidyMakeClean, yes );
break; break;
case 'g':
tidyOptSetBool( tdoc, TidyGDocClean, yes );
break;
case 'b': case 'b':
tidyOptSetBool( tdoc, TidyMakeBare, yes ); tidyOptSetBool( tdoc, TidyMakeBare, yes );
break; break;
@ -1237,6 +1278,7 @@ int main( int argc, char** argv )
if ( argc > 1 ) if ( argc > 1 )
{ {
htmlfil = argv[1]; htmlfil = argv[1];
SPRTF("Tidying '%s'\n", htmlfil);
if ( tidyOptGetBool(tdoc, TidyEmacs) ) if ( tidyOptGetBool(tdoc, TidyEmacs) )
tidyOptSetValue( tdoc, TidyEmacsFile, htmlfil ); tidyOptSetValue( tdoc, TidyEmacsFile, htmlfil );
status = tidyParseFile( tdoc, htmlfil ); status = tidyParseFile( tdoc, htmlfil );
@ -1263,10 +1305,17 @@ int main( int argc, char** argv )
else else
{ {
ctmbstr outfil = tidyOptGetValue( tdoc, TidyOutFile ); ctmbstr outfil = tidyOptGetValue( tdoc, TidyOutFile );
if ( outfil ) if ( outfil ) {
status = tidySaveFile( tdoc, outfil ); status = tidySaveFile( tdoc, outfil );
else } else {
#if !defined(NDEBUG) && defined(_MSC_VER)
static char tmp_buf[264];
sprintf(tmp_buf,"%s.html",get_log_file());
status = tidySaveFile( tdoc, tmp_buf );
#else
status = tidySaveStdout( tdoc ); status = tidySaveStdout( tdoc );
#endif
}
} }
} }

View file

@ -937,6 +937,10 @@ TIDY_EXPORT Bool TIDY_CALL tidyNodeIsSTRIKE( TidyNode tnod );
TIDY_EXPORT Bool TIDY_CALL tidyNodeIsU( TidyNode tnod ); TIDY_EXPORT Bool TIDY_CALL tidyNodeIsU( TidyNode tnod );
TIDY_EXPORT Bool TIDY_CALL tidyNodeIsMENU( TidyNode tnod ); TIDY_EXPORT Bool TIDY_CALL tidyNodeIsMENU( TidyNode tnod );
/* HTML5 */
TIDY_EXPORT Bool TIDY_CALL tidyNodeIsDATALIST( TidyNode tnod ); // bit like OPTIONS
/** @} End NodeIsElementName group */ /** @} End NodeIsElementName group */
/** @} End NodeAsk group */ /** @} End NodeAsk group */

View file

@ -102,11 +102,14 @@ typedef enum
TidyOutFile, /**< File name to write markup to */ TidyOutFile, /**< File name to write markup to */
TidyWriteBack, /**< If true then output tidied markup */ TidyWriteBack, /**< If true then output tidied markup */
TidyShowMarkup, /**< If false, normal output is suppressed */ TidyShowMarkup, /**< If false, normal output is suppressed */
TidyShowInfo, /**< If true, info-level messages are shown */
TidyShowWarnings, /**< However errors are always shown */ TidyShowWarnings, /**< However errors are always shown */
TidyQuiet, /**< No 'Parsing X', guessed DTD or summary */ TidyQuiet, /**< No 'Parsing X', guessed DTD or summary */
TidyIndentContent, /**< Indent content of appropriate tags */ TidyIndentContent, /**< Indent content of appropriate tags */
/**< "auto" does text/block level content indentation */ /**< "auto" does text/block level content indentation */
TidyHideEndTags, /**< Suppress optional end tags */ TidyCoerceEndTags, /**< Coerce end tags from start tags where probably intended */
TidyOmitOptionalTags,/**< Suppress optional start tags and end tags */
TidyHideEndTags, /**< Legacy name for TidyOmitOptionalTags */
TidyXmlTags, /**< Treat input as XML */ TidyXmlTags, /**< Treat input as XML */
TidyXmlOut, /**< Create output as XML */ TidyXmlOut, /**< Create output as XML */
TidyXhtmlOut, /**< Output extensible HTML */ TidyXhtmlOut, /**< Output extensible HTML */
@ -117,9 +120,11 @@ typedef enum
TidyUpperCaseAttrs, /**< Output attributes in upper not lower case */ TidyUpperCaseAttrs, /**< Output attributes in upper not lower case */
TidyMakeBare, /**< Make bare HTML: remove Microsoft cruft */ TidyMakeBare, /**< Make bare HTML: remove Microsoft cruft */
TidyMakeClean, /**< Replace presentational clutter by style rules */ TidyMakeClean, /**< Replace presentational clutter by style rules */
TidyGDocClean, /**< Clean up HTML exported from Google Docs */
TidyLogicalEmphasis, /**< Replace i by em and b by strong */ TidyLogicalEmphasis, /**< Replace i by em and b by strong */
TidyDropPropAttrs, /**< Discard proprietary attributes */ TidyDropPropAttrs, /**< Discard proprietary attributes */
TidyDropFontTags, /**< Discard presentation tags */ TidyDropFontTags, /**< Discard presentation tags */
TidyDropEmptyElems, /**< Discard empty elements */
TidyDropEmptyParas, /**< Discard empty p elements */ TidyDropEmptyParas, /**< Discard empty p elements */
TidyFixComments, /**< Fix comments with adjacent hyphens */ TidyFixComments, /**< Fix comments with adjacent hyphens */
TidyBreakBeforeBR, /**< Output newline before <br> or not? */ TidyBreakBeforeBR, /**< Output newline before <br> or not? */
@ -192,6 +197,7 @@ typedef enum
#else #else
TidyPunctWrapNotUsed, TidyPunctWrapNotUsed,
#endif #endif
TidyMergeEmphasis, /**< Merge nested B and I elements */
TidyMergeDivs, /**< Merge multiple DIVs */ TidyMergeDivs, /**< Merge multiple DIVs */
TidyDecorateInferredUL, /**< Mark inferred UL elements with no indent CSS */ TidyDecorateInferredUL, /**< Mark inferred UL elements with no indent CSS */
TidyPreserveEntities, /**< Preserve entities */ TidyPreserveEntities, /**< Preserve entities */
@ -234,6 +240,7 @@ typedef enum
*/ */
typedef enum typedef enum
{ {
TidyDoctypeHtml5, /**< <!DOCTYPE html> */
TidyDoctypeOmit, /**< Omit DOCTYPE altogether */ TidyDoctypeOmit, /**< Omit DOCTYPE altogether */
TidyDoctypeAuto, /**< Keep DOCTYPE in input. Set version to content */ TidyDoctypeAuto, /**< Keep DOCTYPE in input. Set version to content */
TidyDoctypeStrict, /**< Convert document to HTML 4 strict content model */ TidyDoctypeStrict, /**< Convert document to HTML 4 strict content model */
@ -436,16 +443,20 @@ typedef enum
TidyTag_ARTICLE, TidyTag_ARTICLE,
TidyTag_ASIDE, TidyTag_ASIDE,
TidyTag_AUDIO, TidyTag_AUDIO,
TidyTag_BDI,
TidyTag_CANVAS, TidyTag_CANVAS,
TidyTag_COMMAND, TidyTag_COMMAND,
TidyTag_DATALIST, TidyTag_DATALIST,
TidyTag_DETAILS, TidyTag_DETAILS,
TidyTag_DIALOG,
TidyTag_FIGCAPTION, TidyTag_FIGCAPTION,
TidyTag_FIGURE, TidyTag_FIGURE,
TidyTag_FOOTER, TidyTag_FOOTER,
TidyTag_HEADER, TidyTag_HEADER,
TidyTag_HGROUP, TidyTag_HGROUP,
TidyTag_MAIN,
TidyTag_MARK, TidyTag_MARK,
TidyTag_MENUITEM,
TidyTag_METER, TidyTag_METER,
TidyTag_NAV, TidyTag_NAV,
TidyTag_OUTPUT, TidyTag_OUTPUT,
@ -531,6 +542,7 @@ typedef enum
TidyAttr_HTTP_EQUIV, /**< HTTP_EQUIV= */ TidyAttr_HTTP_EQUIV, /**< HTTP_EQUIV= */
TidyAttr_ID, /**< ID= */ TidyAttr_ID, /**< ID= */
TidyAttr_ISMAP, /**< ISMAP= */ TidyAttr_ISMAP, /**< ISMAP= */
TidyAttr_ITEMPROP, /**< ITEMPROP= */
TidyAttr_LABEL, /**< LABEL= */ TidyAttr_LABEL, /**< LABEL= */
TidyAttr_LANG, /**< LANG= */ TidyAttr_LANG, /**< LANG= */
TidyAttr_LANGUAGE, /**< LANGUAGE= */ TidyAttr_LANGUAGE, /**< LANGUAGE= */

File diff suppressed because it is too large Load diff

View file

@ -125,14 +125,18 @@ extern const AttrVersion TY_(W3CAttrsFor_HGROUP)[];
extern const AttrVersion TY_(W3CAttrsFor_FIGURE)[]; extern const AttrVersion TY_(W3CAttrsFor_FIGURE)[];
extern const AttrVersion TY_(W3CAttrsFor_ARTICLE)[]; extern const AttrVersion TY_(W3CAttrsFor_ARTICLE)[];
extern const AttrVersion TY_(W3CAttrsFor_ASIDE)[]; extern const AttrVersion TY_(W3CAttrsFor_ASIDE)[];
extern const AttrVersion TY_(W3CAttrsFor_BDI)[];
extern const AttrVersion TY_(W3CAttrsFor_NAV)[]; extern const AttrVersion TY_(W3CAttrsFor_NAV)[];
extern const AttrVersion TY_(W3CAttrsFor_SECTION)[]; extern const AttrVersion TY_(W3CAttrsFor_SECTION)[];
extern const AttrVersion TY_(W3CAttrsFor_FOOTER)[]; extern const AttrVersion TY_(W3CAttrsFor_FOOTER)[];
extern const AttrVersion TY_(W3CAttrsFor_HEADER)[]; extern const AttrVersion TY_(W3CAttrsFor_HEADER)[];
extern const AttrVersion TY_(W3CAttrsFor_DETAILS)[]; extern const AttrVersion TY_(W3CAttrsFor_DETAILS)[];
extern const AttrVersion TY_(W3CAttrsFor_DIALOG)[];
extern const AttrVersion TY_(W3CAttrsFor_COMMAND)[]; extern const AttrVersion TY_(W3CAttrsFor_COMMAND)[];
extern const AttrVersion TY_(W3CAttrsFor_MAIN)[];
extern const AttrVersion TY_(W3CAttrsFor_MARK)[]; extern const AttrVersion TY_(W3CAttrsFor_MARK)[];
extern const AttrVersion TY_(W3CAttrsFor_OUTPUT)[]; extern const AttrVersion TY_(W3CAttrsFor_OUTPUT)[];
extern const AttrVersion TY_(W3CAttrsFor_MENUITEM)[];
extern const AttrVersion TY_(W3CAttrsFor_METER)[]; extern const AttrVersion TY_(W3CAttrsFor_METER)[];
extern const AttrVersion TY_(W3CAttrsFor_PROGRESS)[]; extern const AttrVersion TY_(W3CAttrsFor_PROGRESS)[];
extern const AttrVersion TY_(W3CAttrsFor_TIME)[]; extern const AttrVersion TY_(W3CAttrsFor_TIME)[];
@ -141,5 +145,8 @@ extern const AttrVersion TY_(W3CAttrsFor_AUDIO)[];
extern const AttrVersion TY_(W3CAttrsFor_VIDEO)[]; extern const AttrVersion TY_(W3CAttrsFor_VIDEO)[];
extern const AttrVersion TY_(W3CAttrsFor_CANVAS)[]; extern const AttrVersion TY_(W3CAttrsFor_CANVAS)[];
extern const AttrVersion TY_(W3CAttrsFor_SOURCE)[]; extern const AttrVersion TY_(W3CAttrsFor_SOURCE)[];
extern const AttrVersion TY_(W3CAttrsFor_EMBED)[];
extern const AttrVersion TY_(W3CAttrsFor_KEYGEN)[];
extern const AttrVersion TY_(W3CAttrsFor_WBR)[];
#endif /* __ATTRDICT_H__ */ #endif /* __ATTRDICT_H__ */

View file

@ -2,7 +2,7 @@
(c) 1998-2009 (W3C) MIT, ERCIM, Keio University (c) 1998-2009 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice. See tidy.h for the copyright notice.
*/ */
#include "tidy-int.h" #include "tidy-int.h"
@ -152,6 +152,7 @@ static const Attribute attribute_defs [] =
{ TidyAttr_HTTP_EQUIV, "http-equiv", CH_PCDATA }, /* META */ { TidyAttr_HTTP_EQUIV, "http-equiv", CH_PCDATA }, /* META */
{ TidyAttr_ID, "id", CH_IDDEF }, { TidyAttr_ID, "id", CH_IDDEF },
{ TidyAttr_ISMAP, "ismap", CH_BOOL }, /* IMG */ { TidyAttr_ISMAP, "ismap", CH_BOOL }, /* IMG */
{ TidyAttr_ITEMPROP, "itemprop", CH_PCDATA },
{ TidyAttr_LABEL, "label", CH_PCDATA }, /* OPT, OPTGROUP */ { TidyAttr_LABEL, "label", CH_PCDATA }, /* OPT, OPTGROUP */
{ TidyAttr_LANG, "lang", CH_LANG }, { TidyAttr_LANG, "lang", CH_LANG },
{ TidyAttr_LANGUAGE, "language", CH_PCDATA }, /* SCRIPT */ { TidyAttr_LANGUAGE, "language", CH_PCDATA }, /* SCRIPT */
@ -253,7 +254,7 @@ static const Attribute attribute_defs [] =
{ TidyAttr_SDASUFF, "sdasuff", CH_PCDATA }, /* SDATA attribute in HTML 2.0 */ { TidyAttr_SDASUFF, "sdasuff", CH_PCDATA }, /* SDATA attribute in HTML 2.0 */
{ TidyAttr_URN, "urn", CH_PCDATA }, /* for <a>, never implemented */ { TidyAttr_URN, "urn", CH_PCDATA }, /* for <a>, never implemented */
/* "HTML5" */ /* HTML5 */
{ TidyAttr_ASYNC, "async", CH_PCDATA }, { TidyAttr_ASYNC, "async", CH_PCDATA },
{ TidyAttr_AUTOCOMPLETE, "autocomplete", CH_PCDATA }, { TidyAttr_AUTOCOMPLETE, "autocomplete", CH_PCDATA },
{ TidyAttr_AUTOFOCUS, "autofocus", CH_PCDATA }, { TidyAttr_AUTOFOCUS, "autofocus", CH_PCDATA },
@ -362,7 +363,7 @@ static uint AttributeVersions(Node* node, AttVal* attval)
{ {
uint i; uint i;
/* "HTML5" data-* attributes */ /* HTML5 data-* attributes */
if (attval && attval->attribute) if (attval && attval->attribute)
if (TY_(tmbstrncmp)(attval->attribute, "data-", 5) == 0) if (TY_(tmbstrncmp)(attval->attribute, "data-", 5) == 0)
return (XH50 | HT50); return (XH50 | HT50);
@ -744,6 +745,27 @@ AttVal* TY_(GetAttrByName)( Node *node, ctmbstr name )
return attr; return attr;
} }
void TY_(DropAttrByName)( TidyDocImpl* doc, Node *node, ctmbstr name )
{
AttVal *attr, *prev = NULL, *next;
for (attr = node->attributes; attr != NULL; prev = attr, attr = next)
{
next = attr->next;
if (attr->attribute && TY_(tmbstrcmp)(attr->attribute, name) == 0)
{
if (prev)
prev->next = next;
else
node->attributes = next;
TY_(FreeAttribute)( doc, attr );
break;
}
}
}
AttVal* TY_(AddAttribute)( TidyDocImpl* doc, AttVal* TY_(AddAttribute)( TidyDocImpl* doc,
Node *node, ctmbstr name, ctmbstr value ) Node *node, ctmbstr name, ctmbstr value )
{ {
@ -1360,11 +1382,8 @@ Bool TY_(IsValidHTMLID)(ctmbstr id)
if (!s) if (!s)
return no; return no;
if (!TY_(IsLetter)(*s++))
return no;
while (*s) while (*s)
if (!TY_(IsNamechar)(*s++)) if (TY_(IsHTMLSpace)(*s++))
return no; return no;
return yes; return yes;
@ -1807,9 +1826,11 @@ void CheckLang( TidyDocImpl* doc, Node *node, AttVal *attval)
/* checks type attribute */ /* checks type attribute */
void CheckType( TidyDocImpl* doc, Node *node, AttVal *attval) void CheckType( TidyDocImpl* doc, Node *node, AttVal *attval)
{ {
ctmbstr const valuesINPUT[] = {"text", "password", "checkbox", "radio", ctmbstr const valuesINPUT[] = {
"submit", "reset", "file", "hidden", "text", "password", "checkbox", "radio", "submit", "reset", "file",
"image", "button", NULL}; "hidden", "image", "button", "color", "date", "datetime",
"datetime-local", "email", "month", "number", "range", "search",
"tel", "time", "url", "week", NULL};
ctmbstr const valuesBUTTON[] = {"button", "submit", "reset", NULL}; ctmbstr const valuesBUTTON[] = {"button", "submit", "reset", NULL};
ctmbstr const valuesUL[] = {"disc", "square", "circle", NULL}; ctmbstr const valuesUL[] = {"disc", "square", "circle", NULL};
ctmbstr const valuesOL[] = {"1", "a", "i", NULL}; ctmbstr const valuesOL[] = {"1", "a", "i", NULL};

View file

@ -5,7 +5,7 @@
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University (c) 1998-2007 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice. See tidy.h for the copyright notice.
*/ */
#include "forward.h" #include "forward.h"
@ -81,6 +81,8 @@ const Attribute* TY_(FindAttribute)( TidyDocImpl* doc, AttVal *attval );
AttVal* TY_(GetAttrByName)( Node *node, ctmbstr name ); AttVal* TY_(GetAttrByName)( Node *node, ctmbstr name );
void TY_(DropAttrByName)( TidyDocImpl* doc, Node *node, ctmbstr name );
AttVal* TY_(AddAttribute)( TidyDocImpl* doc, AttVal* TY_(AddAttribute)( TidyDocImpl* doc,
Node *node, ctmbstr name, ctmbstr value ); Node *node, ctmbstr name, ctmbstr value );
@ -217,6 +219,7 @@ uint TY_(NodeAttributeVersions)( Node* node, TidyAttrId id );
#define attrIsHTTP_EQUIV(av) AttrIsId( av, TidyAttr_HTTP_EQUIV ) #define attrIsHTTP_EQUIV(av) AttrIsId( av, TidyAttr_HTTP_EQUIV )
#define attrIsID(av) AttrIsId( av, TidyAttr_ID ) #define attrIsID(av) AttrIsId( av, TidyAttr_ID )
#define attrIsISMAP(av) AttrIsId( av, TidyAttr_ISMAP ) #define attrIsISMAP(av) AttrIsId( av, TidyAttr_ISMAP )
#define attrIsITEMPROP(av) AttrIsId( av, TidyAttr_ITEMPROP )
#define attrIsLABEL(av) AttrIsId( av, TidyAttr_LABEL ) #define attrIsLABEL(av) AttrIsId( av, TidyAttr_LABEL )
#define attrIsLANG(av) AttrIsId( av, TidyAttr_LANG ) #define attrIsLANG(av) AttrIsId( av, TidyAttr_LANG )
#define attrIsLANGUAGE(av) AttrIsId( av, TidyAttr_LANGUAGE ) #define attrIsLANGUAGE(av) AttrIsId( av, TidyAttr_LANGUAGE )

View file

@ -4,9 +4,6 @@
(c) 1998-2008 (W3C) MIT, ERCIM, Keio University (c) 1998-2008 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice. See tidy.h for the copyright notice.
*/
/*
config files associate a property name with a value. config files associate a property name with a value.
// comments can start at the beginning of a line // comments can start at the beginning of a line
@ -130,6 +127,7 @@ static const ctmbstr newlinePicks[] =
static const ctmbstr doctypePicks[] = static const ctmbstr doctypePicks[] =
{ {
"html5",
"omit", "omit",
"auto", "auto",
"strict", "strict",
@ -200,7 +198,7 @@ static ParseProperty ParseSorter;
static ParseProperty ParseCharEnc; static ParseProperty ParseCharEnc;
static ParseProperty ParseNewline; static ParseProperty ParseNewline;
/* omit | auto | strict | loose | <fpi> */ /* html5 | omit | auto | strict | loose | <fpi> */
static ParseProperty ParseDocType; static ParseProperty ParseDocType;
/* keep-first or keep-last? */ /* keep-first or keep-last? */
@ -213,9 +211,9 @@ static const TidyOptionImpl option_defs[] =
{ TidyIndentSpaces, PP, "indent-spaces", IN, 2, ParseInt, NULL }, { TidyIndentSpaces, PP, "indent-spaces", IN, 2, ParseInt, NULL },
{ TidyWrapLen, PP, "wrap", IN, 68, ParseInt, NULL }, { TidyWrapLen, PP, "wrap", IN, 68, ParseInt, NULL },
{ TidyTabSize, PP, "tab-size", IN, 8, ParseInt, NULL }, { TidyTabSize, PP, "tab-size", IN, 8, ParseInt, NULL },
{ TidyCharEncoding, CE, "char-encoding", IN, ASCII, ParseCharEnc, charEncPicks }, { TidyCharEncoding, CE, "char-encoding", IN, UTF8, ParseCharEnc, charEncPicks },
{ TidyInCharEncoding, CE, "input-encoding", IN, LATIN1, ParseCharEnc, charEncPicks }, { TidyInCharEncoding, CE, "input-encoding", IN, UTF8, ParseCharEnc, charEncPicks },
{ TidyOutCharEncoding, CE, "output-encoding", IN, ASCII, ParseCharEnc, charEncPicks }, { TidyOutCharEncoding, CE, "output-encoding", IN, UTF8, ParseCharEnc, charEncPicks },
{ TidyNewline, CE, "newline", IN, DLF, ParseNewline, newlinePicks }, { TidyNewline, CE, "newline", IN, DLF, ParseNewline, newlinePicks },
{ TidyDoctypeMode, MU, "doctype-mode", IN, TidyDoctypeAuto, NULL, doctypePicks }, { TidyDoctypeMode, MU, "doctype-mode", IN, TidyDoctypeAuto, NULL, doctypePicks },
{ TidyDoctype, MU, "doctype", ST, 0, ParseDocType, doctypePicks }, { TidyDoctype, MU, "doctype", ST, 0, ParseDocType, doctypePicks },
@ -229,9 +227,12 @@ static const TidyOptionImpl option_defs[] =
{ TidyOutFile, MS, "output-file", ST, 0, ParseString, NULL }, { TidyOutFile, MS, "output-file", ST, 0, ParseString, NULL },
{ TidyWriteBack, MS, "write-back", BL, no, ParseBool, boolPicks }, { TidyWriteBack, MS, "write-back", BL, no, ParseBool, boolPicks },
{ TidyShowMarkup, PP, "markup", BL, yes, ParseBool, boolPicks }, { TidyShowMarkup, PP, "markup", BL, yes, ParseBool, boolPicks },
{ TidyShowInfo, DG, "show-info", BL, yes, ParseBool, boolPicks },
{ TidyShowWarnings, DG, "show-warnings", BL, yes, ParseBool, boolPicks }, { TidyShowWarnings, DG, "show-warnings", BL, yes, ParseBool, boolPicks },
{ TidyQuiet, MS, "quiet", BL, no, ParseBool, boolPicks }, { TidyQuiet, MS, "quiet", BL, no, ParseBool, boolPicks },
{ TidyIndentContent, PP, "indent", IN, TidyNoState, ParseAutoBool, autoBoolPicks }, { TidyIndentContent, PP, "indent", IN, TidyNoState, ParseAutoBool, autoBoolPicks },
{ TidyCoerceEndTags, MU, "coerce-endtags", BL, yes, ParseBool, boolPicks },
{ TidyOmitOptionalTags, MU, "omit-optional-tags", BL, no, ParseBool, boolPicks },
{ TidyHideEndTags, MU, "hide-endtags", BL, no, ParseBool, boolPicks }, { TidyHideEndTags, MU, "hide-endtags", BL, no, ParseBool, boolPicks },
{ TidyXmlTags, MU, "input-xml", BL, no, ParseBool, boolPicks }, { TidyXmlTags, MU, "input-xml", BL, no, ParseBool, boolPicks },
{ TidyXmlOut, MU, "output-xml", BL, no, ParseBool, boolPicks }, { TidyXmlOut, MU, "output-xml", BL, no, ParseBool, boolPicks },
@ -242,9 +243,11 @@ static const TidyOptionImpl option_defs[] =
{ TidyUpperCaseAttrs, MU, "uppercase-attributes", BL, no, ParseBool, boolPicks }, { TidyUpperCaseAttrs, MU, "uppercase-attributes", BL, no, ParseBool, boolPicks },
{ TidyMakeBare, MU, "bare", BL, no, ParseBool, boolPicks }, { TidyMakeBare, MU, "bare", BL, no, ParseBool, boolPicks },
{ TidyMakeClean, MU, "clean", BL, no, ParseBool, boolPicks }, { TidyMakeClean, MU, "clean", BL, no, ParseBool, boolPicks },
{ TidyGDocClean, MU, "gdoc", BL, no, ParseBool, boolPicks },
{ TidyLogicalEmphasis, MU, "logical-emphasis", BL, no, ParseBool, boolPicks }, { TidyLogicalEmphasis, MU, "logical-emphasis", BL, no, ParseBool, boolPicks },
{ TidyDropPropAttrs, MU, "drop-proprietary-attributes", BL, no, ParseBool, boolPicks }, { TidyDropPropAttrs, MU, "drop-proprietary-attributes", BL, no, ParseBool, boolPicks },
{ TidyDropFontTags, MU, "drop-font-tags", BL, no, ParseBool, boolPicks }, { TidyDropFontTags, MU, "drop-font-tags", BL, no, ParseBool, boolPicks },
{ TidyDropEmptyElems, MU, "drop-empty-elements", BL, yes, ParseBool, boolPicks },
{ TidyDropEmptyParas, MU, "drop-empty-paras", BL, yes, ParseBool, boolPicks }, { TidyDropEmptyParas, MU, "drop-empty-paras", BL, yes, ParseBool, boolPicks },
{ TidyFixComments, MU, "fix-bad-comments", BL, yes, ParseBool, boolPicks }, { TidyFixComments, MU, "fix-bad-comments", BL, yes, ParseBool, boolPicks },
{ TidyBreakBeforeBR, PP, "break-before-br", BL, no, ParseBool, boolPicks }, { TidyBreakBeforeBR, PP, "break-before-br", BL, no, ParseBool, boolPicks },
@ -303,6 +306,7 @@ static const TidyOptionImpl option_defs[] =
#if SUPPORT_ASIAN_ENCODINGS #if SUPPORT_ASIAN_ENCODINGS
{ TidyPunctWrap, PP, "punctuation-wrap", BL, no, ParseBool, boolPicks }, { TidyPunctWrap, PP, "punctuation-wrap", BL, no, ParseBool, boolPicks },
#endif #endif
{ TidyMergeEmphasis, MU, "merge-emphasis", BL, yes, ParseBool, boolPicks },
{ TidyMergeDivs, MU, "merge-divs", IN, TidyAutoState, ParseAutoBool, autoBoolPicks }, { TidyMergeDivs, MU, "merge-divs", IN, TidyAutoState, ParseAutoBool, autoBoolPicks },
{ TidyDecorateInferredUL, MU, "decorate-inferred-ul", BL, no, ParseBool, boolPicks }, { TidyDecorateInferredUL, MU, "decorate-inferred-ul", BL, no, ParseBool, boolPicks },
{ TidyPreserveEntities, MU, "preserve-entities", BL, no, ParseBool, boolPicks }, { TidyPreserveEntities, MU, "preserve-entities", BL, no, ParseBool, boolPicks },
@ -1425,7 +1429,7 @@ ctmbstr TY_(CharEncodingOptName)( int encoding )
} }
/* /*
doctype: omit | auto | strict | loose | <fpi> doctype: html5 | omit | auto | strict | loose | <fpi>
where the fpi is a string similar to where the fpi is a string similar to
@ -1462,6 +1466,8 @@ Bool ParseDocType( TidyDocImpl* doc, const TidyOptionImpl* option )
if ( TY_(tmbstrcasecmp)(buf, "auto") == 0 ) if ( TY_(tmbstrcasecmp)(buf, "auto") == 0 )
dtmode = TidyDoctypeAuto; dtmode = TidyDoctypeAuto;
else if ( TY_(tmbstrcasecmp)(buf, "html5") == 0 )
dtmode = TidyDoctypeHtml5;
else if ( TY_(tmbstrcasecmp)(buf, "omit") == 0 ) else if ( TY_(tmbstrcasecmp)(buf, "omit") == 0 )
dtmode = TidyDoctypeOmit; dtmode = TidyDoctypeOmit;
else if ( TY_(tmbstrcasecmp)(buf, "strict") == 0 ) else if ( TY_(tmbstrcasecmp)(buf, "strict") == 0 )

View file

@ -2,7 +2,7 @@
(c) 1998-2008 (W3C) MIT, ERCIM, Keio University (c) 1998-2008 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice. See tidy.h for the copyright notice.
*/ */
/* /*
@ -39,6 +39,13 @@
#include "clean.h" #include "clean.h"
#include "utf8.h" #include "utf8.h"
#include "streamio.h" #include "streamio.h"
#ifdef _MSC_VER
#include "sprtf.h"
#endif
#ifndef SPRTF
#define SPRTF printf
#endif
/* Forward references /* Forward references
*/ */
@ -113,6 +120,9 @@ int TY_(HTMLVersion)(TidyDocImpl* doc)
!cfgBool(doc, TidyHtmlOut); !cfgBool(doc, TidyHtmlOut);
Bool html4 = dtmode == TidyDoctypeStrict || dtmode == TidyDoctypeLoose || VERS_FROM40 & dtver; Bool html4 = dtmode == TidyDoctypeStrict || dtmode == TidyDoctypeLoose || VERS_FROM40 & dtver;
if (xhtml && dtver == VERS_UNKNOWN) return XH50;
if (dtver == VERS_UNKNOWN) return HT50;
for (i = 0; W3C_Doctypes[i].name; ++i) for (i = 0; W3C_Doctypes[i].name; ++i)
{ {
if ((xhtml && !(VERS_XHTML & W3C_Doctypes[i].vers)) || if ((xhtml && !(VERS_XHTML & W3C_Doctypes[i].vers)) ||
@ -171,7 +181,7 @@ static uint GetVersFromFPI(ctmbstr fpi)
uint i; uint i;
for (i = 0; W3C_Doctypes[i].name; ++i) for (i = 0; W3C_Doctypes[i].name; ++i)
if (TY_(tmbstrcasecmp)(W3C_Doctypes[i].fpi, fpi) == 0) if (W3C_Doctypes[i].fpi != NULL && TY_(tmbstrcasecmp)(W3C_Doctypes[i].fpi, fpi) == 0)
return W3C_Doctypes[i].vers; return W3C_Doctypes[i].vers;
return 0; return 0;
@ -224,6 +234,11 @@ Bool TY_(IsLetter)(uint c)
return (map & letter)!=0; return (map & letter)!=0;
} }
Bool TY_(IsHTMLSpace)(uint c)
{
return c == 0x020 || c == 0x009 || c == 0x00a || c == 0x00c || c == 0x00d;
}
Bool TY_(IsNamechar)(uint c) Bool TY_(IsNamechar)(uint c)
{ {
uint map = MAP(c); uint map = MAP(c);
@ -1393,10 +1408,10 @@ Bool TY_(AddGenerator)( TidyDocImpl* doc )
if (head) if (head)
{ {
#ifdef PLATFORM_NAME #ifdef PLATFORM_NAME
TY_(tmbsnprintf)(buf, sizeof(buf), "HTML Tidy for "PLATFORM_NAME" (vers %s), see www.w3.org", TY_(tmbsnprintf)(buf, sizeof(buf), "HTML Tidy for HTML5 (experimental) for "PLATFORM_NAME" %s",
tidyReleaseDate()); tidyReleaseDate());
#else #else
TY_(tmbsnprintf)(buf, sizeof(buf), "HTML Tidy (vers %s), see www.w3.org", tidyReleaseDate()); TY_(tmbsnprintf)(buf, sizeof(buf), "HTML Tidy for HTML5 (experimental) %s", tidyReleaseDate());
#endif #endif
for ( node = head->content; node; node = node->next ) for ( node = head->content; node; node = node->next )
@ -1562,6 +1577,12 @@ Bool TY_(SetXHTMLDocType)( TidyDocImpl* doc )
switch(dtmode) switch(dtmode)
{ {
case TidyDoctypeHtml5:
/* HTML5 */
TY_(RepairAttrValue)(doc, doctype, pub, NULL);
TY_(RepairAttrValue)(doc, doctype, sys, NULL);
lexer->versionEmitted = XH50;
break;
case TidyDoctypeStrict: case TidyDoctypeStrict:
/* XHTML 1.0 Strict */ /* XHTML 1.0 Strict */
TY_(RepairAttrValue)(doc, doctype, pub, GetFPIFromVers(X10S)); TY_(RepairAttrValue)(doc, doctype, pub, GetFPIFromVers(X10S));
@ -1580,7 +1601,11 @@ Bool TY_(SetXHTMLDocType)( TidyDocImpl* doc )
TY_(RepairAttrValue)(doc, doctype, sys, ""); TY_(RepairAttrValue)(doc, doctype, sys, "");
break; break;
case TidyDoctypeAuto: case TidyDoctypeAuto:
if (lexer->versions & XH11 && lexer->doctype == XH11) if (lexer->doctype == VERS_UNKNOWN) {
lexer->versionEmitted = XH50;
return yes;
}
else if (lexer->versions & XH11 && lexer->doctype == XH11)
{ {
if (!TY_(GetAttrByName)(doctype, sys)) if (!TY_(GetAttrByName)(doctype, sys))
TY_(RepairAttrValue)(doc, doctype, sys, GetSIFromVers(XH11)); TY_(RepairAttrValue)(doc, doctype, sys, GetSIFromVers(XH11));
@ -1618,10 +1643,6 @@ Bool TY_(SetXHTMLDocType)( TidyDocImpl* doc )
TY_(RepairAttrValue)(doc, doctype, sys, GetSIFromVers(X10T)); TY_(RepairAttrValue)(doc, doctype, sys, GetSIFromVers(X10T));
lexer->versionEmitted = X10T; lexer->versionEmitted = X10T;
} }
else if (lexer->versions & XH50)
{
lexer->versionEmitted = XH50;
}
else else
{ {
if (doctype) if (doctype)
@ -1678,6 +1699,9 @@ Bool TY_(FixDocType)( TidyDocImpl* doc )
switch (dtmode) switch (dtmode)
{ {
case TidyDoctypeHtml5:
guessed = HT50;
break;
case TidyDoctypeStrict: case TidyDoctypeStrict:
guessed = H41S; guessed = H41S;
break; break;
@ -2010,6 +2034,7 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode );
Node* TY_(GetToken)( TidyDocImpl* doc, GetTokenMode mode ) Node* TY_(GetToken)( TidyDocImpl* doc, GetTokenMode mode )
{ {
Node *node;
Lexer* lexer = doc->lexer; Lexer* lexer = doc->lexer;
if (lexer->pushed || lexer->itoken) if (lexer->pushed || lexer->itoken)
@ -2030,33 +2055,61 @@ Node* TY_(GetToken)( TidyDocImpl* doc, GetTokenMode mode )
/* duplicate inlines in preference to pushed text nodes when appropriate */ /* duplicate inlines in preference to pushed text nodes when appropriate */
lexer->pushed = no; lexer->pushed = no;
if (lexer->token->type != TextNode if (lexer->token->type != TextNode
|| !(lexer->insert || lexer->inode)) || !(lexer->insert || lexer->inode)) {
return lexer->token; node = lexer->token;
return lexer->itoken = TY_(InsertedToken)( doc ); #if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Returning pushed token '%s'...\n", node->element ? node->element : "<blank>");
#endif
return node;
}
lexer->itoken = TY_(InsertedToken)( doc );
node = lexer->token;
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Returning inserted token '%s'...\n", node->element ? node->element : "<blank>");
#endif
return node;
} }
assert( !(lexer->pushed || lexer->itoken) ); assert( !(lexer->pushed || lexer->itoken) );
/* at start of block elements, unclosed inline /* at start of block elements, unclosed inline
elements are inserted into the token stream */ elements are inserted into the token stream */
if (lexer->insert || lexer->inode) if (lexer->insert || lexer->inode) {
return lexer->token = TY_(InsertedToken)( doc ); lexer->token = TY_(InsertedToken)( doc );
node = lexer->token;
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Returning Inserted token '%s'...\n", node->element ? node->element : "<blank>");
#endif
return node;
}
if (mode == CdataContent) if (mode == CdataContent)
{ {
assert( lexer->parent != NULL ); assert( lexer->parent != NULL );
return GetCDATA(doc, lexer->parent); node = GetCDATA(doc, lexer->parent);
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Returning Cdatacontent token '%s'...\n", node->element ? node->element : "<blank>");
#endif
return node;
} }
return GetTokenFromStream( doc, mode ); return GetTokenFromStream( doc, mode );
} }
#if !defined(NDEBUG) && defined(_MSC_VER)
static void check_me(char *name)
{
SPRTF("Have node %s\n", name);
}
#endif
static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode ) static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
{ {
Lexer* lexer = doc->lexer; Lexer* lexer = doc->lexer;
uint c, badcomment = 0; uint c, badcomment = 0;
Bool isempty = no; Bool isempty = no;
AttVal *attributes = NULL; AttVal *attributes = NULL;
Node *node;
/* Lexer->token must be set on return. Nullify it for safety. */ /* Lexer->token must be set on return. Nullify it for safety. */
lexer->token = NULL; lexer->token = NULL;
@ -2170,7 +2223,11 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
#ifdef TIDY_STORE_ORIGINAL_TEXT #ifdef TIDY_STORE_ORIGINAL_TEXT
StoreOriginalTextInToken(doc, lexer->token, 3); StoreOriginalTextInToken(doc, lexer->token, 3);
#endif #endif
return lexer->token; node = lexer->token;
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Returning text token len %d...\n", node->end - node->start );
#endif
return node;
} }
continue; /* no text so keep going */ continue; /* no text so keep going */
@ -2397,7 +2454,11 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
#ifdef TIDY_STORE_ORIGINAL_TEXT #ifdef TIDY_STORE_ORIGINAL_TEXT
StoreOriginalTextInToken(doc, lexer->token, 0); /* hmm... */ StoreOriginalTextInToken(doc, lexer->token, 0); /* hmm... */
#endif #endif
return lexer->token; /* the endtag token */ node = lexer->token;
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Returning endtag token '%s'...\n", node->element ? node->element : "<blank>");
#endif
return node; /* the endtag token */
case LEX_STARTTAG: /* first letter of tagname */ case LEX_STARTTAG: /* first letter of tagname */
c = TY_(ReadChar)(doc->docIn); c = TY_(ReadChar)(doc->docIn);
@ -2471,7 +2532,19 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
#ifdef TIDY_STORE_ORIGINAL_TEXT #ifdef TIDY_STORE_ORIGINAL_TEXT
StoreOriginalTextInToken(doc, lexer->token, 0); StoreOriginalTextInToken(doc, lexer->token, 0);
#endif #endif
return lexer->token; /* return start tag */ node = lexer->token;
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Returning starttag token '%s'...\n", node->element ? node->element : "<blank>");
if (node->element) {
//if (stricmp(node->element,"datalist") == 0) {
// check_me(node->element);
//} else
if (stricmp(node->element,"option") == 0) {
check_me(node->element);
}
}
#endif
return node; /* return start tag */
case LEX_COMMENT: /* seen <!-- so look for --> */ case LEX_COMMENT: /* seen <!-- so look for --> */
@ -2509,7 +2582,11 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
else else
TY_(UngetChar)(c, doc->docIn); TY_(UngetChar)(c, doc->docIn);
return lexer->token; node = lexer->token;
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Returning comment token '%s'...\n", node->element ? node->element : "<blank>");
#endif
return node;
} }
/* note position of first such error in the comment */ /* note position of first such error in the comment */
@ -2554,7 +2631,11 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
/* make a note of the version named by the 1st doctype */ /* make a note of the version named by the 1st doctype */
if (lexer->doctype == VERS_UNKNOWN && lexer->token && !cfgBool(doc, TidyXmlTags)) if (lexer->doctype == VERS_UNKNOWN && lexer->token && !cfgBool(doc, TidyXmlTags))
lexer->doctype = FindGivenVersion(doc, lexer->token); lexer->doctype = FindGivenVersion(doc, lexer->token);
return lexer->token; node = lexer->token;
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Returning doctype token '%s'...\n", node->element ? node->element : "<blank>");
#endif
return node;
case LEX_PROCINSTR: /* seen <? so look for '>' */ case LEX_PROCINSTR: /* seen <? so look for '>' */
/* check for PHP preprocessor instructions <?php ... ?> */ /* check for PHP preprocessor instructions <?php ... ?> */
@ -2636,7 +2717,11 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
lexer->state = LEX_CONTENT; lexer->state = LEX_CONTENT;
lexer->waswhite = no; lexer->waswhite = no;
return lexer->token; node = lexer->token;
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Returning procinstr token '%s'...\n", node->element ? node->element : "<blank>");
#endif
return node;
case LEX_ASP: /* seen <% so look for "%>" */ case LEX_ASP: /* seen <% so look for "%>" */
if (c != '%') if (c != '%')
@ -2657,7 +2742,14 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
lexer->lexbuf[lexer->lexsize] = '\0'; lexer->lexbuf[lexer->lexsize] = '\0';
lexer->state = LEX_CONTENT; lexer->state = LEX_CONTENT;
lexer->waswhite = no; lexer->waswhite = no;
return lexer->token = AspToken(doc); lexer->token = AspToken(doc);
node = lexer->token;
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Returning ASP token '%s'...\n", node->element ? node->element : "<blank>");
#endif
return node; /* the endtag token */
case LEX_JSTE: /* seen <# so look for "#>" */ case LEX_JSTE: /* seen <# so look for "#>" */
if (c != '#') if (c != '#')
@ -2678,7 +2770,13 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
lexer->lexbuf[lexer->lexsize] = '\0'; lexer->lexbuf[lexer->lexsize] = '\0';
lexer->state = LEX_CONTENT; lexer->state = LEX_CONTENT;
lexer->waswhite = no; lexer->waswhite = no;
return lexer->token = JsteToken(doc); lexer->token = JsteToken(doc);
node = lexer->token;
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Returning JSTE token '%s'...\n", node->element ? node->element : "<blank>");
#endif
return node; /* the JSTE token */
case LEX_PHP: /* seen "<?php" so look for "?>" */ case LEX_PHP: /* seen "<?php" so look for "?>" */
if (c != '?') if (c != '?')
@ -2698,7 +2796,12 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
lexer->lexbuf[lexer->lexsize] = '\0'; lexer->lexbuf[lexer->lexsize] = '\0';
lexer->state = LEX_CONTENT; lexer->state = LEX_CONTENT;
lexer->waswhite = no; lexer->waswhite = no;
return lexer->token = PhpToken(doc); lexer->token = PhpToken(doc);
node = lexer->token;
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Returning PHP token '%s'...\n", node->element ? node->element : "<blank>");
#endif
return node; /* the PHP token */
case LEX_XMLDECL: /* seen "<?xml" so look for "?>" */ case LEX_XMLDECL: /* seen "<?xml" so look for "?>" */
@ -2728,7 +2831,11 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
lexer->waswhite = no; lexer->waswhite = no;
lexer->token = XmlDeclToken(doc); lexer->token = XmlDeclToken(doc);
lexer->token->attributes = attributes; lexer->token->attributes = attributes;
return lexer->token; node = lexer->token;
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Returning xml token '%s'...\n", node->element ? node->element : "<blank>");
#endif
return node; /* the xml token */
} }
av = TY_(NewAttribute)(doc); av = TY_(NewAttribute)(doc);
@ -2756,7 +2863,11 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
lexer->waswhite = no; lexer->waswhite = no;
lexer->token = XmlDeclToken(doc); lexer->token = XmlDeclToken(doc);
lexer->token->attributes = attributes; lexer->token->attributes = attributes;
return lexer->token; node = lexer->token;
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Returning XML token '%s'...\n", node->element ? node->element : "<blank>");
#endif
return node; /* the XML token */
case LEX_SECTION: /* seen "<![" so look for "]>" */ case LEX_SECTION: /* seen "<![" so look for "]>" */
if (c == '[') if (c == '[')
@ -2787,7 +2898,12 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
lexer->lexbuf[lexer->lexsize] = '\0'; lexer->lexbuf[lexer->lexsize] = '\0';
lexer->state = LEX_CONTENT; lexer->state = LEX_CONTENT;
lexer->waswhite = no; lexer->waswhite = no;
return lexer->token = SectionToken(doc); lexer->token = SectionToken(doc);
node = lexer->token;
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Returning SECTION token '%s'...\n", node->element ? node->element : "<blank>");
#endif
return node; /* the SECTION token */
case LEX_CDATA: /* seen "<![CDATA[" so look for "]]>" */ case LEX_CDATA: /* seen "<![CDATA[" so look for "]]>" */
if (c != ']') if (c != ']')
@ -2817,7 +2933,12 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
lexer->lexbuf[lexer->lexsize] = '\0'; lexer->lexbuf[lexer->lexsize] = '\0';
lexer->state = LEX_CONTENT; lexer->state = LEX_CONTENT;
lexer->waswhite = no; lexer->waswhite = no;
return lexer->token = CDATAToken(doc); lexer->token = CDATAToken(doc);
node = lexer->token;
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Returning CDATA token '%s'...\n", node->element ? node->element : "<blank>");
#endif
return node; /* the CDATA token */
} }
} }
@ -2838,7 +2959,11 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
#ifdef TIDY_STORE_ORIGINAL_TEXT #ifdef TIDY_STORE_ORIGINAL_TEXT
StoreOriginalTextInToken(doc, lexer->token, 0); /* ? */ StoreOriginalTextInToken(doc, lexer->token, 0); /* ? */
#endif #endif
return lexer->token; node = lexer->token;
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Returning textstring token '%s'...\n", node->element ? node->element : "<blank>");
#endif
return node; /* the textstring token */
} }
} }
else if (lexer->state == LEX_COMMENT) /* comment */ else if (lexer->state == LEX_COMMENT) /* comment */
@ -2850,9 +2975,17 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
lexer->lexbuf[lexer->lexsize] = '\0'; lexer->lexbuf[lexer->lexsize] = '\0';
lexer->state = LEX_CONTENT; lexer->state = LEX_CONTENT;
lexer->waswhite = no; lexer->waswhite = no;
return lexer->token = CommentToken(doc); lexer->token = CommentToken(doc);
node = lexer->token;
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Returning COMMENT token '%s'...\n", node->element ? node->element : "<blank>");
#endif
return node; /* the COMMENT token */
} }
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Returning NULL...\n");
#endif
return NULL; return NULL;
} }

View file

@ -5,10 +5,7 @@
(c) 1998-2008 (W3C) MIT, ERCIM, Keio University (c) 1998-2008 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice. See tidy.h for the copyright notice.
*/
/*
Given an input source, it returns a sequence of tokens. Given an input source, it returns a sequence of tokens.
GetToken(source) gets the next token GetToken(source) gets the next token
@ -189,7 +186,7 @@ typedef enum
/* special flag */ /* special flag */
#define VERS_XML 65536u #define VERS_XML 65536u
/* "HTML5" */ /* HTML5 */
#define HT50 131072u #define HT50 131072u
#define XH50 262144u #define XH50 262144u
@ -202,6 +199,8 @@ typedef enum
#define VERS_FRAMESET (H40F|H41F|X10F) #define VERS_FRAMESET (H40F|H41F|X10F)
#define VERS_XHTML11 (XH11) #define VERS_XHTML11 (XH11)
#define VERS_BASIC (XB10) #define VERS_BASIC (XB10)
/* HTML5 */
#define VERS_HTML5 (HT50|XH50)
/* meta symbols */ /* meta symbols */
#define VERS_HTML40 (VERS_HTML40_STRICT|VERS_HTML40_LOOSE|VERS_FRAMESET) #define VERS_HTML40 (VERS_HTML40_STRICT|VERS_HTML40_LOOSE|VERS_FRAMESET)
@ -411,6 +410,7 @@ void TY_(ConstrainVersion)( TidyDocImpl* doc, uint vers );
Bool TY_(IsWhite)(uint c); Bool TY_(IsWhite)(uint c);
Bool TY_(IsDigit)(uint c); Bool TY_(IsDigit)(uint c);
Bool TY_(IsLetter)(uint c); Bool TY_(IsLetter)(uint c);
Bool TY_(IsHTMLSpace)(uint c);
Bool TY_(IsNewline)(uint c); Bool TY_(IsNewline)(uint c);
Bool TY_(IsNamechar)(uint c); Bool TY_(IsNamechar)(uint c);
Bool TY_(IsXMLLetter)(uint c); Bool TY_(IsXMLLetter)(uint c);

View file

@ -6,7 +6,7 @@
You should only need to edit this file and tidy.c You should only need to edit this file and tidy.c
to localize HTML tidy. *** This needs checking *** to localize HTML tidy. *** This needs checking ***
*/ */
#include "tidy-int.h" #include "tidy-int.h"
@ -101,6 +101,8 @@ static struct _msgfmt
{ NESTED_QUOTATION, "nested q elements, possible typo." }, /* Warning */ { NESTED_QUOTATION, "nested q elements, possible typo." }, /* Warning */
{ OBSOLETE_ELEMENT, "replacing obsolete element %s by %s" }, /* Warning */ { OBSOLETE_ELEMENT, "replacing obsolete element %s by %s" }, /* Warning */
{ COERCE_TO_ENDTAG_WARN, "<%s> is probably intended as </%s>" }, /* Warning */ { COERCE_TO_ENDTAG_WARN, "<%s> is probably intended as </%s>" }, /* Warning */
/* HTML5 */
{ REMOVED_HTML5, "%s element removed from HTML5" }, /* Warning */
/* ReportNotice */ /* ReportNotice */
{ TRIM_EMPTY_ELEMENT, "trimming empty %s" }, /* Notice */ { TRIM_EMPTY_ELEMENT, "trimming empty %s" }, /* Notice */
@ -320,7 +322,7 @@ static const TidyOptionId TidyIndentContentLinks[] =
static const TidyOptionId TidyIndentSpacesLinks[] = static const TidyOptionId TidyIndentSpacesLinks[] =
{ TidyIndentContent, TidyUnknownOption }; { TidyIndentContent, TidyUnknownOption };
static const TidyOptionId TidyWrapAttValsLinks[] = static const TidyOptionId TidyWrapAttValsLinks[] =
{ TidyWrapScriptlets, TidyUnknownOption }; { TidyWrapScriptlets, TidyLiteralAttribs, TidyUnknownOption };
static const TidyOptionId TidyWrapScriptletsLinks[] = static const TidyOptionId TidyWrapScriptletsLinks[] =
{ TidyWrapAttVals, TidyUnknownOption }; { TidyWrapAttVals, TidyUnknownOption };
static const TidyOptionId TidyCharEncodingLinks[] = static const TidyOptionId TidyCharEncodingLinks[] =
@ -353,6 +355,8 @@ static const TidyOptionId TidyDropFontTagsLinks[] =
{ TidyMakeClean, TidyUnknownOption }; { TidyMakeClean, TidyUnknownOption };
static const TidyOptionId TidyMakeCleanTagsLinks[] = static const TidyOptionId TidyMakeCleanTagsLinks[] =
{ TidyDropFontTags, TidyUnknownOption }; { TidyDropFontTags, TidyUnknownOption };
static const TidyOptionId TidyGDocCleanLinks[] =
{ TidyMakeClean, TidyUnknownOption };
/* Documentation of options */ /* Documentation of options */
static const TidyOptionDoc option_docs[] = static const TidyOptionDoc option_docs[] =
@ -399,14 +403,24 @@ static const TidyOptionDoc option_docs[] =
"on the HTML saved by Microsoft Office products. " "on the HTML saved by Microsoft Office products. "
, TidyMakeCleanTagsLinks , TidyMakeCleanTagsLinks
}, },
{TidyGDocClean,
"This option specifies if Tidy "
"should enable specific behavior for cleaning up HTML exported from "
"Google Docs. "
, TidyMakeCleanTagsLinks
},
{TidyDoctype, {TidyDoctype,
"This option specifies the DOCTYPE declaration generated by Tidy. If set " "This option specifies the DOCTYPE declaration generated by Tidy.<br />"
"to \"omit\" the output won't contain a DOCTYPE declaration. If set to " "If set to \"omit\" the output won't contain a DOCTYPE declaration.<br />"
"\"auto\" (the default) Tidy will use an educated guess based upon the " "If set to \"html5\" the DOCTYPE is set to \"&lt;!DOCTYPE html>\".<br />"
"contents of the document. If set to \"strict\", Tidy will set the DOCTYPE " "If set to \"auto\" (the default) Tidy will use an educated guess based "
"to the strict DTD. If set to \"loose\", the DOCTYPE is set to the loose " "upon the contents of the document.<br />"
"(transitional) DTD. Alternatively, you can supply a string for the formal " "If set to \"strict\", Tidy will set the DOCTYPE to the HTML4 or XHTML1 "
"public identifier (FPI).<br />" "strict DTD.<br />"
"If set to \"loose\", the DOCTYPE is set to the HTML4 or XHTML1 loose "
"(transitional) DTD. <br />"
"Alternatively, you can supply a string for the formal public identifier "
"(FPI).<br />"
"<br />" "<br />"
"For example: <br />" "For example: <br />"
"doctype: \"-//ACME//DTD HTML 3.14159//EN\"<br />" "doctype: \"-//ACME//DTD HTML 3.14159//EN\"<br />"
@ -419,6 +433,9 @@ static const TidyOptionDoc option_docs[] =
"<code>--numeric-entities yes</code>. This option does not offer a " "<code>--numeric-entities yes</code>. This option does not offer a "
"validation of the document conformance. " "validation of the document conformance. "
}, },
{TidyDropEmptyElems,
"This option specifies if Tidy should discard empty elements. "
},
{TidyDropEmptyParas, {TidyDropEmptyParas,
"This option specifies if Tidy should discard empty paragraphs. " "This option specifies if Tidy should discard empty paragraphs. "
}, },
@ -460,10 +477,22 @@ static const TidyOptionDoc option_docs[] =
{TidyHideComments, {TidyHideComments,
"This option specifies if Tidy should print out comments. " "This option specifies if Tidy should print out comments. "
}, },
{TidyCoerceEndTags,
"This option specifies if Tidy should coerce a start tag into an end tag "
"in cases where it looks like an end tag was probably intended; "
"for example, given &lt;span&gt;foo &lt;b&gt;bar&lt;b&gt; baz&lt;/span&gt;, "
"Tidy will output &lt;span&gt;foo &lt;b&gt;bar&lt;/b&gt; baz&lt;/span&gt;. "
},
{TidyOmitOptionalTags,
"This option specifies if Tidy should omit optional start tags and end tags "
"when generating output. Setting this option causes all tags for the "
"html, head, and body elements to be omitted from output, as well as such "
"end tags as &lt;/p&gt;, &lt;/li&gt;, &lt;/dt&gt;, &lt;/dd&gt;, "
"&lt;/option&gt;, &lt;/tr&gt;, &lt;/td&gt;, and &lt;/th&gt;. "
"This option is ignored for XML output. "
},
{TidyHideEndTags, {TidyHideEndTags,
"This option specifies if Tidy should omit optional end-tags when " "This option is an alias for omit-optional-tags. "
"generating the pretty printed markup. This option is ignored if you are "
"outputting to XML. "
}, },
{TidyIndentCdata, {TidyIndentCdata,
"This option specifies if Tidy should indent &lt;![CDATA[]]&gt; sections. " "This option specifies if Tidy should indent &lt;![CDATA[]]&gt; sections. "
@ -494,6 +523,12 @@ static const TidyOptionDoc option_docs[] =
"that takes a list of predefined values to lower case. This is required " "that takes a list of predefined values to lower case. This is required "
"for XHTML documents. " "for XHTML documents. "
}, },
{TidyMergeEmphasis,
"This option specifies if Tidy should merge nested &lt;b&gt; and &lt;i&gt; "
"elements; for example, for the case "
"&lt;b class=\"rtop-2\"&gt;foo &lt;b class=\"r2-2\"&gt;bar&lt;/b&gt; baz&lt;/b&gt;, "
"Tidy will output &lt;b class=\"rtop-2\"&gt;foo bar baz&lt;/b&gt;. "
},
{TidyMergeDivs, {TidyMergeDivs,
"Can be used to modify behavior of -c (--clean yes) option. " "Can be used to modify behavior of -c (--clean yes) option. "
"This option specifies if Tidy should merge nested &lt;div&gt; such as " "This option specifies if Tidy should merge nested &lt;div&gt; such as "
@ -644,6 +679,9 @@ static const TidyOptionDoc option_docs[] =
"This option specifies the number Tidy uses to determine if further errors " "This option specifies the number Tidy uses to determine if further errors "
"should be shown. If set to 0, then no errors are shown. " "should be shown. If set to 0, then no errors are shown. "
}, },
{TidyShowInfo,
"This option specifies if Tidy should display info-level messages. "
},
{TidyShowWarnings, {TidyShowWarnings,
"This option specifies if Tidy should suppress warnings. This can be " "This option specifies if Tidy should suppress warnings. This can be "
"useful when a few errors are hidden in a flurry of warnings. " "useful when a few errors are hidden in a flurry of warnings. "
@ -670,8 +708,14 @@ static const TidyOptionDoc option_docs[] =
,TidyIndentSpacesLinks ,TidyIndentSpacesLinks
}, },
{TidyLiteralAttribs, {TidyLiteralAttribs,
"This option specifies if Tidy should ensure that whitespace characters " "This option specifies how Tidy deals with whitespace characters within "
"within attribute values are passed through unchanged. " "attribute values. If the value is \"no\" (the default), Tidy \"munges\" "
"or \"normalizes\" attribute values by replacing any newline or tab "
"character with a single space character, and further by replacing "
"any sequences of multiple whitespace characters with a single space. "
"To force tidy to preserve the original, literal values of all attributes, "
"and ensure that whitespace characters within attribute values are passed "
"through unchanged, set this option to \"yes\". "
}, },
{TidyShowMarkup, {TidyShowMarkup,
"This option specifies if Tidy should generate a pretty printed version " "This option specifies if Tidy should generate a pretty printed version "
@ -706,9 +750,18 @@ static const TidyOptionDoc option_docs[] =
"pseudo elements, which look like: &lt;% ... %&gt;. " "pseudo elements, which look like: &lt;% ... %&gt;. "
}, },
{TidyWrapAttVals, {TidyWrapAttVals,
"This option specifies if Tidy should line wrap attribute values, for " "This option specifies if Tidy should line-wrap attribute values, for "
"easier editing. This option can be set independently of " "easier editing. Line wrapping means that if the value of an attribute "
"wrap-script-literals. " "causes a line to exceed the width specified by the \"wrap\" option, "
"tidy will add one or more line breaks to the value, causing it to "
"wrapped into multiple lines. Note that this option can be set "
"independently of wrap-script-literals. Also note that by default, Tidy "
"\"munges\" or \"normalizes\" attribute values by replacing any newline "
"or tab character with a single space character, and further by replacing "
"any sequences of multiple whitespace characters with a single space. "
"To force Tidy to preserve the original, literal values of all attributes, "
"and ensure that whitespace characters within attribute values are passed "
"through unchanged, set the literal-attributes option to \"yes\". "
,TidyWrapAttValsLinks ,TidyWrapAttValsLinks
}, },
{TidyWrapJste, {TidyWrapJste,
@ -1047,6 +1100,7 @@ __attribute__((format(printf, 2, 3)))
void message( TidyDocImpl* doc, TidyReportLevel level, ctmbstr msg, ... ) void message( TidyDocImpl* doc, TidyReportLevel level, ctmbstr msg, ... )
{ {
va_list args; va_list args;
if (level == TidyInfo && !cfgBool(doc, TidyShowInfo)) return;
va_start( args, msg ); va_start( args, msg );
messagePos( doc, level, 0, 0, msg, args ); messagePos( doc, level, 0, 0, msg, args );
va_end( args ); va_end( args );
@ -1367,14 +1421,14 @@ void TY_(ReportAccessWarning)( TidyDocImpl* doc, Node* node, uint code )
{ {
ctmbstr fmt = GetFormatFromCode(code); ctmbstr fmt = GetFormatFromCode(code);
doc->badAccess |= BA_WAI; doc->badAccess |= BA_WAI;
messageNode( doc, TidyAccess, node, fmt ); messageNode( doc, TidyAccess, node, "%s", fmt );
} }
void TY_(ReportAccessError)( TidyDocImpl* doc, Node* node, uint code ) void TY_(ReportAccessError)( TidyDocImpl* doc, Node* node, uint code )
{ {
ctmbstr fmt = GetFormatFromCode(code); ctmbstr fmt = GetFormatFromCode(code);
doc->badAccess |= BA_WAI; doc->badAccess |= BA_WAI;
messageNode( doc, TidyAccess, node, fmt ); messageNode( doc, TidyAccess, node, "%s", fmt );
} }
#endif /* SUPPORT_ACCESSIBILITY_CHECKS */ #endif /* SUPPORT_ACCESSIBILITY_CHECKS */
@ -1393,7 +1447,7 @@ void TY_(ReportWarning)(TidyDocImpl* doc, Node *element, Node *node, uint code)
switch (code) switch (code)
{ {
case NESTED_QUOTATION: case NESTED_QUOTATION:
messageNode(doc, TidyWarning, rpt, fmt); messageNode(doc, TidyWarning, rpt, "%s", fmt);
break; break;
case OBSOLETE_ELEMENT: case OBSOLETE_ELEMENT:
@ -1401,6 +1455,7 @@ void TY_(ReportWarning)(TidyDocImpl* doc, Node *element, Node *node, uint code)
messageNode(doc, TidyWarning, rpt, fmt, elemdesc, nodedesc); messageNode(doc, TidyWarning, rpt, fmt, elemdesc, nodedesc);
break; break;
case REMOVED_HTML5:
case NESTED_EMPHASIS: case NESTED_EMPHASIS:
messageNode(doc, TidyWarning, rpt, fmt, nodedesc); messageNode(doc, TidyWarning, rpt, fmt, nodedesc);
break; break;
@ -1474,7 +1529,7 @@ void TY_(ReportError)(TidyDocImpl* doc, Node *element, Node *node, uint code)
case INCONSISTENT_NAMESPACE: case INCONSISTENT_NAMESPACE:
case DOCTYPE_AFTER_TAGS: case DOCTYPE_AFTER_TAGS:
case DTYPE_NOT_UPPER_CASE: case DTYPE_NOT_UPPER_CASE:
messageNode(doc, TidyWarning, rpt, fmt); messageNode(doc, TidyWarning, rpt, "%s", fmt);
break; break;
case COERCE_TO_ENDTAG: case COERCE_TO_ENDTAG:
@ -1493,7 +1548,7 @@ void TY_(ReportError)(TidyDocImpl* doc, Node *element, Node *node, uint code)
case ENCODING_IO_CONFLICT: case ENCODING_IO_CONFLICT:
case MISSING_DOCTYPE: case MISSING_DOCTYPE:
case SPACE_PRECEDING_XMLDECL: case SPACE_PRECEDING_XMLDECL:
messageNode(doc, TidyWarning, node, fmt); messageNode(doc, TidyWarning, node, "%s", fmt);
break; break;
case TRIM_EMPTY_ELEMENT: case TRIM_EMPTY_ELEMENT:
@ -1542,7 +1597,7 @@ void TY_(ReportFatal)( TidyDocImpl* doc, Node *element, Node *node, uint code)
{ {
case SUSPECTED_MISSING_QUOTE: case SUSPECTED_MISSING_QUOTE:
case DUPLICATE_FRAMESET: case DUPLICATE_FRAMESET:
messageNode(doc, TidyError, rpt, fmt); messageNode(doc, TidyError, rpt, "%s", fmt);
break; break;
case UNKNOWN_ELEMENT: case UNKNOWN_ELEMENT:
@ -1775,11 +1830,14 @@ void TY_(NeedsAuthorIntervention)( TidyDocImpl* doc )
void TY_(GeneralInfo)( TidyDocImpl* doc ) void TY_(GeneralInfo)( TidyDocImpl* doc )
{ {
tidy_out(doc, "To learn more about HTML Tidy see http://tidy.sourceforge.net\n"); if (!cfgBool(doc, TidyShowInfo)) return;
tidy_out(doc, "Please fill bug reports and queries using the \"tracker\" on the Tidy web site.\n"); tidy_out(doc, "About this fork of Tidy: http://w3c.github.com/tidy-html5/\n");
tidy_out(doc, "Additionally, questions can be sent to html-tidy@w3.org\n"); tidy_out(doc, "Bug reports and comments: https://github.com/w3c/tidy-html5/issues/\n");
tidy_out(doc, "HTML and CSS specifications are available from http://www.w3.org/\n"); tidy_out(doc, "Or send questions and comments to html-tidy@w3.org\n");
tidy_out(doc, "Lobby your company to join W3C, see http://www.w3.org/Consortium\n"); tidy_out(doc, "Latest HTML specification: http://dev.w3.org/html5/spec-author-view/\n");
tidy_out(doc, "HTML language reference: http://dev.w3.org/html5/markup/\n");
tidy_out(doc, "Validate your HTML5 documents: http://validator.w3.org/nu/\n");
tidy_out(doc, "Lobby your company to join the W3C: http://www.w3.org/Consortium\n");
} }
#if SUPPORT_ACCESSIBILITY_CHECKS #if SUPPORT_ACCESSIBILITY_CHECKS

View file

@ -5,7 +5,7 @@
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University (c) 1998-2007 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice. See tidy.h for the copyright notice.
*/ */
#include "forward.h" #include "forward.h"
@ -154,7 +154,9 @@ void TY_(ReportFatal)(TidyDocImpl* doc, Node* element, Node* node, uint code);
#define MISSING_ATTRIBUTE 86 #define MISSING_ATTRIBUTE 86
#define WHITE_IN_URI 87 #define WHITE_IN_URI 87
#define PREVIOUS_LOCATION 88 /* last */ #define REMOVED_HTML5 88 /* this element removed from HTML5 */
#define PREVIOUS_LOCATION 89 /* last */
/* character encoding errors */ /* character encoding errors */

View file

@ -2,7 +2,7 @@
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University (c) 1998-2007 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice. See tidy.h for the copyright notice.
*/ */
#include "tidy-int.h" #include "tidy-int.h"
@ -12,6 +12,13 @@
#include "clean.h" #include "clean.h"
#include "tags.h" #include "tags.h"
#include "tmbstr.h" #include "tmbstr.h"
#ifdef _MSC_VER
#include "sprtf.h"
#endif
#ifndef SPRTF
#define SPRTF printf
#endif
#ifdef AUTO_INPUT_ENCODING #ifdef AUTO_INPUT_ENCODING
#include "charsets.h" #include "charsets.h"
@ -234,6 +241,9 @@ void TY_(InsertNodeAfterElement)(Node *element, Node *node)
static Bool CanPrune( TidyDocImpl* doc, Node *element ) static Bool CanPrune( TidyDocImpl* doc, Node *element )
{ {
if ( !cfgBool(doc, TidyDropEmptyElems) )
return no;
if ( TY_(nodeIsText)(element) ) if ( TY_(nodeIsText)(element) )
return yes; return yes;
@ -278,6 +288,13 @@ static Bool CanPrune( TidyDocImpl* doc, Node *element )
if (nodeIsTEXTAREA(element)) if (nodeIsTEXTAREA(element))
return no; return no;
/* fix for ISSUE #7 https://github.com/w3c/tidy-html5/issues/7 */
if (nodeIsCANVAS(element))
return no;
if (nodeIsPROGRESS(element))
return no;
if ( attrGetID(element) || attrGetNAME(element) ) if ( attrGetID(element) || attrGetNAME(element) )
return no; return no;
@ -296,6 +313,10 @@ static Bool CanPrune( TidyDocImpl* doc, Node *element )
if (nodeIsCOLGROUP(element)) if (nodeIsCOLGROUP(element))
return no; return no;
/* HTML5 - do NOT drop empty option if it has attributes */
if ( nodeIsOPTION(element) && element->attributes != NULL )
return no;
return yes; return yes;
} }
@ -811,13 +832,25 @@ static void AddClassNoIndent( TidyDocImpl* doc, Node *node )
*/ */
void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode) void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
{ {
#if !defined(NDEBUG) && defined(_MSC_VER)
static int in_parse_block = 0;
#endif
Lexer* lexer = doc->lexer; Lexer* lexer = doc->lexer;
Node *node; Node *node;
Bool checkstack = yes; Bool checkstack = yes;
uint istackbase = 0; uint istackbase = 0;
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_block++;
SPRTF("Entering ParseBlock %d...\n",in_parse_block);
#endif
if ( element->tag->model & CM_EMPTY ) if ( element->tag->model & CM_EMPTY ) {
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_block--;
SPRTF("Exit ParseBlockL 1 %d...\n",in_parse_block);
#endif
return; return;
}
if ( nodeIsFORM(element) && if ( nodeIsFORM(element) &&
DescendantOf(element, TidyTag_FORM) ) DescendantOf(element, TidyTag_FORM) )
@ -860,6 +893,10 @@ void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
element->closed = yes; element->closed = yes;
TrimSpaces( doc, element ); TrimSpaces( doc, element );
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_block--;
SPRTF("Exit ParseBlock 2 %d...\n",in_parse_block);
#endif
return; return;
} }
@ -951,6 +988,10 @@ void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
{ {
TY_(UngetToken)( doc ); TY_(UngetToken)( doc );
TrimSpaces( doc, element ); TrimSpaces( doc, element );
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_block--;
SPRTF("Exit ParseBlock 2 %d...\n",in_parse_block);
#endif
return; return;
} }
} }
@ -1111,6 +1152,10 @@ void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
{ {
TY_(UngetToken)( doc ); TY_(UngetToken)( doc );
TrimSpaces( doc, element ); TrimSpaces( doc, element );
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_block--;
SPRTF("Exit ParseBlock 3 %d...\n",in_parse_block);
#endif
return; return;
} }
} }
@ -1127,6 +1172,10 @@ void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
lexer->istackbase = istackbase; lexer->istackbase = istackbase;
TrimSpaces( doc, element ); TrimSpaces( doc, element );
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_block--;
SPRTF("Exit ParseBlock 4 %d...\n",in_parse_block);
#endif
return; return;
} }
} }
@ -1177,6 +1226,10 @@ void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
element->parent->tag->parser == TY_(ParseList) ) element->parent->tag->parser == TY_(ParseList) )
{ {
TrimSpaces( doc, element ); TrimSpaces( doc, element );
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_block--;
SPRTF("Exit ParseBlock 5 %d...\n",in_parse_block);
#endif
return; return;
} }
@ -1188,6 +1241,10 @@ void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
if ( nodeIsDL(element->parent) ) if ( nodeIsDL(element->parent) )
{ {
TrimSpaces( doc, element ); TrimSpaces( doc, element );
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_block--;
SPRTF("Exit ParseBlock 6 %d...\n",in_parse_block);
#endif
return; return;
} }
@ -1198,8 +1255,13 @@ void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
/* http://tidy.sf.net/issue/1316307 */ /* http://tidy.sf.net/issue/1316307 */
/* In exiled mode, return so table processing can /* In exiled mode, return so table processing can
continue. */ continue. */
if (lexer->exiled) if (lexer->exiled) {
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_block--;
SPRTF("Exit ParseBlock 7 %d...\n",in_parse_block);
#endif
return; return;
}
node = TY_(InferredTag)(doc, TidyTag_TABLE); node = TY_(InferredTag)(doc, TidyTag_TABLE);
} }
else if ( TY_(nodeHasCM)(element, CM_OBJECT) ) else if ( TY_(nodeHasCM)(element, CM_OBJECT) )
@ -1209,12 +1271,20 @@ void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
TY_(PopInline)( doc, NULL ); TY_(PopInline)( doc, NULL );
lexer->istackbase = istackbase; lexer->istackbase = istackbase;
TrimSpaces( doc, element ); TrimSpaces( doc, element );
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_block--;
SPRTF("Exit ParseBlock 8 %d...\n",in_parse_block);
#endif
return; return;
} }
else else
{ {
TrimSpaces( doc, element ); TrimSpaces( doc, element );
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_block--;
SPRTF("Exit ParseBlock 9 %d...\n",in_parse_block);
#endif
return; return;
} }
} }
@ -1278,15 +1348,31 @@ void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
} }
TrimSpaces( doc, element ); TrimSpaces( doc, element );
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_block--;
SPRTF("Exit ParseBlock 10 %d...\n",in_parse_block);
#endif
} }
void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode ) void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
{ {
#if !defined(NDEBUG) && defined(_MSC_VER)
static int in_parse_inline = 0;
#endif
Lexer* lexer = doc->lexer; Lexer* lexer = doc->lexer;
Node *node, *parent; Node *node, *parent;
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_inline++;
SPRTF("Entering ParseInline %d...\n",in_parse_inline);
#endif
if (element->tag->model & CM_EMPTY) if (element->tag->model & CM_EMPTY) {
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_inline--;
SPRTF("Exit ParseInline 1 %d...\n",in_parse_inline);
#endif
return; return;
}
/* /*
ParseInline is used for some block level elements like H1 to H6 ParseInline is used for some block level elements like H1 to H6
@ -1363,6 +1449,10 @@ void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
element->closed = yes; element->closed = yes;
TrimSpaces( doc, element ); TrimSpaces( doc, element );
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_inline--;
SPRTF("Exit ParseInline 2 %d...\n",in_parse_inline);
#endif
return; return;
} }
@ -1384,6 +1474,7 @@ void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
&& !nodeIsSUP(node) && !nodeIsSUP(node)
&& !nodeIsQ(node) && !nodeIsQ(node)
&& !nodeIsSPAN(node) && !nodeIsSPAN(node)
&& cfgBool(doc, TidyCoerceEndTags)
) )
{ {
/* proceeds only if "node" does not have any attribute and /* proceeds only if "node" does not have any attribute and
@ -1442,7 +1533,10 @@ void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
if (!(mode & Preformatted)) if (!(mode & Preformatted))
TrimSpaces(doc, element); TrimSpaces(doc, element);
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_inline--;
SPRTF("Exit ParseInline 3 %d...\n",in_parse_inline);
#endif
return; return;
} }
@ -1529,6 +1623,10 @@ void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
TY_(InlineDup1)( doc, NULL, element ); /* dupe the <i>, after </b> */ TY_(InlineDup1)( doc, NULL, element ); /* dupe the <i>, after </b> */
if (!(mode & Preformatted)) if (!(mode & Preformatted))
TrimSpaces( doc, element ); TrimSpaces( doc, element );
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_inline--;
SPRTF("Exit ParseInline 4 %d...\n",in_parse_inline);
#endif
return; /* close <i>, but will re-open it, after </b> */ return; /* close <i>, but will re-open it, after </b> */
} }
} }
@ -1549,7 +1647,10 @@ void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
if (!(mode & Preformatted)) if (!(mode & Preformatted))
TrimSpaces(doc, element); TrimSpaces(doc, element);
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_inline--;
SPRTF("Exit ParseInline 5 %d...\n",in_parse_inline);
#endif
return; return;
} }
@ -1563,6 +1664,10 @@ void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
{ {
TY_(UngetToken)( doc ); TY_(UngetToken)( doc );
TrimSpaces(doc, element); TrimSpaces(doc, element);
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_inline--;
SPRTF("Exit ParseInline 6 %d...\n",in_parse_inline);
#endif
return; return;
} }
} }
@ -1585,6 +1690,10 @@ void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
if (!(mode & Preformatted)) if (!(mode & Preformatted))
TrimSpaces(doc, element); TrimSpaces(doc, element);
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_inline--;
SPRTF("Exit ParseInline 7 %d...\n",in_parse_inline);
#endif
return; return;
} }
@ -1601,7 +1710,8 @@ void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
/* #427827 - fix by Randy Waki and Bjoern Hoehrmann 23 Aug 00 */ /* #427827 - fix by Randy Waki and Bjoern Hoehrmann 23 Aug 00 */
/* other fixes by Dave Raggett */ /* other fixes by Dave Raggett */
/* if (node->attributes == NULL) */ /* if (node->attributes == NULL) */
if (node->type != EndTag && node->attributes == NULL) if (node->type != EndTag && node->attributes == NULL
&& cfgBool(doc, TidyCoerceEndTags) )
{ {
node->type = EndTag; node->type = EndTag;
TY_(ReportError)(doc, element, node, COERCE_TO_ENDTAG); TY_(ReportError)(doc, element, node, COERCE_TO_ENDTAG);
@ -1617,6 +1727,10 @@ void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
if (!(mode & Preformatted)) if (!(mode & Preformatted))
TrimSpaces(doc, element); TrimSpaces(doc, element);
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_inline--;
SPRTF("Exit ParseInline 8 %d...\n",in_parse_inline);
#endif
return; return;
} }
@ -1739,6 +1853,10 @@ void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
if (!(mode & Preformatted)) if (!(mode & Preformatted))
TrimSpaces(doc, element); TrimSpaces(doc, element);
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_inline--;
SPRTF("Exit ParseInline 9 %d...\n",in_parse_inline);
#endif
return; return;
} }
} }
@ -1754,7 +1872,10 @@ void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
TY_(FreeNode)( doc, node); TY_(FreeNode)( doc, node);
continue; continue;
} }
/* HTML5 */
if (nodeIsDATALIST(element)) {
TY_(ConstrainVersion)( doc, ~VERS_HTML5 );
} else
if (!(element->tag->model & CM_OPT)) if (!(element->tag->model & CM_OPT))
TY_(ReportError)(doc, element, node, MISSING_ENDTAG_BEFORE); TY_(ReportError)(doc, element, node, MISSING_ENDTAG_BEFORE);
@ -1776,6 +1897,10 @@ void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
{ {
TY_(DiscardElement)( doc, element ); TY_(DiscardElement)( doc, element );
TY_(UngetToken)( doc ); TY_(UngetToken)( doc );
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_inline--;
SPRTF("Exit ParseInline 10 %d...\n",in_parse_inline);
#endif
return; return;
} }
} }
@ -1785,6 +1910,10 @@ void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
if (!(mode & Preformatted)) if (!(mode & Preformatted))
TrimSpaces(doc, element); TrimSpaces(doc, element);
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_inline--;
SPRTF("Exit ParseInline 11 %d...\n",in_parse_inline);
#endif
return; return;
} }
@ -1812,6 +1941,10 @@ void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
if (!(element->tag->model & CM_OPT)) if (!(element->tag->model & CM_OPT))
TY_(ReportError)(doc, element, node, MISSING_ENDTAG_FOR); TY_(ReportError)(doc, element, node, MISSING_ENDTAG_FOR);
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_inline--;
SPRTF("Exit ParseInline 12 %d...\n",in_parse_inline);
#endif
} }
void TY_(ParseEmpty)(TidyDocImpl* doc, Node *element, GetTokenMode mode) void TY_(ParseEmpty)(TidyDocImpl* doc, Node *element, GetTokenMode mode)
@ -1824,7 +1957,7 @@ void TY_(ParseEmpty)(TidyDocImpl* doc, Node *element, GetTokenMode mode)
{ {
if ( !(node->type == EndTag && node->tag == element->tag) ) if ( !(node->type == EndTag && node->tag == element->tag) )
{ {
TY_(ReportError)(doc, element, node, ELEMENT_NOT_EMPTY); /* TY_(ReportError)(doc, element, node, ELEMENT_NOT_EMPTY); */
TY_(UngetToken)( doc ); TY_(UngetToken)( doc );
} }
else else
@ -2895,10 +3028,17 @@ void TY_(ParseOptGroup)(TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(m
void TY_(ParseSelect)(TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(mode)) void TY_(ParseSelect)(TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(mode))
{ {
#if !defined(NDEBUG) && defined(_MSC_VER)
static int in_parse_select = 0;
#endif
Lexer* lexer = doc->lexer; Lexer* lexer = doc->lexer;
Node *node; Node *node;
lexer->insert = NULL; /* defer implicit inline start tags */ lexer->insert = NULL; /* defer implicit inline start tags */
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_select++;
SPRTF("Entering ParseSelect %d...\n",in_parse_select);
#endif
while ((node = TY_(GetToken)(doc, IgnoreWhitespace)) != NULL) while ((node = TY_(GetToken)(doc, IgnoreWhitespace)) != NULL)
{ {
@ -2907,6 +3047,10 @@ void TY_(ParseSelect)(TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(mod
TY_(FreeNode)( doc, node); TY_(FreeNode)( doc, node);
field->closed = yes; field->closed = yes;
TrimSpaces(doc, field); TrimSpaces(doc, field);
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_select--;
SPRTF("Exit ParseSelect 1 %d...\n",in_parse_select);
#endif
return; return;
} }
@ -2917,6 +3061,7 @@ void TY_(ParseSelect)(TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(mod
if ( node->type == StartTag && if ( node->type == StartTag &&
( nodeIsOPTION(node) || ( nodeIsOPTION(node) ||
nodeIsOPTGROUP(node) || nodeIsOPTGROUP(node) ||
nodeIsDATALIST(node) ||
nodeIsSCRIPT(node)) nodeIsSCRIPT(node))
) )
{ {
@ -2931,8 +3076,72 @@ void TY_(ParseSelect)(TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(mod
} }
TY_(ReportError)(doc, field, node, MISSING_ENDTAG_FOR); TY_(ReportError)(doc, field, node, MISSING_ENDTAG_FOR);
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_select--;
SPRTF("Exit ParseSelect 2 %d...\n",in_parse_select);
#endif
} }
/* HTML5 */
void TY_(ParseDatalist)(TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(mode))
{
#if !defined(NDEBUG) && defined(_MSC_VER)
static int in_parse_datalist = 0;
#endif
Lexer* lexer = doc->lexer;
Node *node;
lexer->insert = NULL; /* defer implicit inline start tags */
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_datalist++;
SPRTF("Entering ParseDatalist %d...\n",in_parse_datalist);
#endif
while ((node = TY_(GetToken)(doc, IgnoreWhitespace)) != NULL)
{
if (node->tag == field->tag && node->type == EndTag)
{
TY_(FreeNode)( doc, node);
field->closed = yes;
TrimSpaces(doc, field);
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_datalist--;
SPRTF("Exit ParseDatalist 1 %d...\n",in_parse_datalist);
#endif
return;
}
/* deal with comments etc. */
if (InsertMisc(field, node))
continue;
if ( node->type == StartTag &&
( nodeIsOPTION(node) ||
nodeIsOPTGROUP(node) ||
nodeIsDATALIST(node) ||
nodeIsSCRIPT(node))
)
{
TY_(InsertNodeAtEnd)(field, node);
ParseTag(doc, node, IgnoreWhitespace);
continue;
}
/* discard unexpected tags */
TY_(ReportError)(doc, field, node, DISCARDING_UNEXPECTED);
TY_(FreeNode)( doc, node);
}
TY_(ReportError)(doc, field, node, MISSING_ENDTAG_FOR);
#if !defined(NDEBUG) && defined(_MSC_VER)
in_parse_datalist--;
SPRTF("Exit ParseDatalist 2 %d...\n",in_parse_datalist);
#endif
}
void TY_(ParseText)(TidyDocImpl* doc, Node *field, GetTokenMode mode) void TY_(ParseText)(TidyDocImpl* doc, Node *field, GetTokenMode mode)
{ {
Lexer* lexer = doc->lexer; Lexer* lexer = doc->lexer;
@ -3006,7 +3215,8 @@ void TY_(ParseTitle)(TidyDocImpl* doc, Node *title, GetTokenMode ARG_UNUSED(mode
Node *node; Node *node;
while ((node = TY_(GetToken)(doc, MixedContent)) != NULL) while ((node = TY_(GetToken)(doc, MixedContent)) != NULL)
{ {
if (node->tag == title->tag && node->type == StartTag) if (node->tag == title->tag && node->type == StartTag
&& cfgBool(doc, TidyCoerceEndTags) )
{ {
TY_(ReportError)(doc, title, node, COERCE_TO_ENDTAG); TY_(ReportError)(doc, title, node, COERCE_TO_ENDTAG);
node->type = EndTag; node->type = EndTag;
@ -3129,6 +3339,9 @@ void TY_(ParseHead)(TidyDocImpl* doc, Node *head, GetTokenMode ARG_UNUSED(mode))
int HasTitle = 0; int HasTitle = 0;
int HasBase = 0; int HasBase = 0;
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Enter ParseHead...\n");
#endif
while ((node = TY_(GetToken)(doc, IgnoreWhitespace)) != NULL) while ((node = TY_(GetToken)(doc, IgnoreWhitespace)) != NULL)
{ {
if (node->tag == head->tag && node->type == EndTag) if (node->tag == head->tag && node->type == EndTag)
@ -3214,10 +3427,6 @@ void TY_(ParseHead)(TidyDocImpl* doc, Node *head, GetTokenMode ARG_UNUSED(mode))
head ? head ?
TOO_MANY_ELEMENTS_IN : TOO_MANY_ELEMENTS); TOO_MANY_ELEMENTS_IN : TOO_MANY_ELEMENTS);
} }
else if ( nodeIsNOSCRIPT(node) )
{
TY_(ReportError)(doc, head, node, TAG_NOT_ALLOWED_IN);
}
#ifdef AUTO_INPUT_ENCODING #ifdef AUTO_INPUT_ENCODING
else if (nodeIsMETA(node)) else if (nodeIsMETA(node))
@ -3271,6 +3480,9 @@ void TY_(ParseHead)(TidyDocImpl* doc, Node *head, GetTokenMode ARG_UNUSED(mode))
TY_(ReportError)(doc, head, node, DISCARDING_UNEXPECTED); TY_(ReportError)(doc, head, node, DISCARDING_UNEXPECTED);
TY_(FreeNode)( doc, node); TY_(FreeNode)( doc, node);
} }
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Exit ParseHead 1...\n");
#endif
} }
void TY_(ParseBody)(TidyDocImpl* doc, Node *body, GetTokenMode mode) void TY_(ParseBody)(TidyDocImpl* doc, Node *body, GetTokenMode mode)
@ -3283,6 +3495,9 @@ void TY_(ParseBody)(TidyDocImpl* doc, Node *body, GetTokenMode mode)
checkstack = yes; checkstack = yes;
TY_(BumpObject)( doc, body->parent ); TY_(BumpObject)( doc, body->parent );
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Enter ParseBody...\n");
#endif
while ((node = TY_(GetToken)(doc, mode)) != NULL) while ((node = TY_(GetToken)(doc, mode)) != NULL)
{ {
@ -3510,7 +3725,7 @@ void TY_(ParseBody)(TidyDocImpl* doc, Node *body, GetTokenMode mode)
if (TY_(nodeIsElement)(node)) if (TY_(nodeIsElement)(node))
{ {
if ( TY_(nodeHasCM)(node, CM_INLINE) && !TY_(nodeHasCM)(node, CM_MIXED) ) if ( TY_(nodeHasCM)(node, CM_INLINE) )
{ {
/* HTML4 strict doesn't allow inline content here */ /* HTML4 strict doesn't allow inline content here */
/* but HTML2 does allow img elements as children of body */ /* but HTML2 does allow img elements as children of body */
@ -3547,6 +3762,9 @@ void TY_(ParseBody)(TidyDocImpl* doc, Node *body, GetTokenMode mode)
TY_(ReportError)(doc, body, node, DISCARDING_UNEXPECTED); TY_(ReportError)(doc, body, node, DISCARDING_UNEXPECTED);
TY_(FreeNode)( doc, node); TY_(FreeNode)( doc, node);
} }
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Exit ParseBody 1...\n");
#endif
} }
void TY_(ParseNoFrames)(TidyDocImpl* doc, Node *noframes, GetTokenMode mode) void TY_(ParseNoFrames)(TidyDocImpl* doc, Node *noframes, GetTokenMode mode)
@ -3735,6 +3953,9 @@ void TY_(ParseHTML)(TidyDocImpl* doc, Node *html, GetTokenMode mode)
Node *frameset = NULL; Node *frameset = NULL;
Node *noframes = NULL; Node *noframes = NULL;
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Entering ParseHTML...\n");
#endif
TY_(SetOptionBool)( doc, TidyXmlTags, no ); TY_(SetOptionBool)( doc, TidyXmlTags, no );
for (;;) for (;;)
@ -3790,7 +4011,9 @@ void TY_(ParseHTML)(TidyDocImpl* doc, Node *html, GetTokenMode mode)
TY_(InsertNodeAtEnd)(html, node); TY_(InsertNodeAtEnd)(html, node);
TY_(ParseBody)(doc, node, mode); TY_(ParseBody)(doc, node, mode);
} }
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Exit ParseHTML 1...\n");
#endif
return; return;
} }
@ -3956,6 +4179,9 @@ void TY_(ParseHTML)(TidyDocImpl* doc, Node *html, GetTokenMode mode)
TY_(InsertNodeAtEnd)(html, node); TY_(InsertNodeAtEnd)(html, node);
ParseTag(doc, node, mode); ParseTag(doc, node, mode);
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("Exit ParseHTML 2...\n");
#endif
} }
static Bool nodeCMIsOnlyInline( Node* node ) static Bool nodeCMIsOnlyInline( Node* node )
@ -4048,7 +4274,9 @@ static void ReplaceObsoleteElements(TidyDocImpl* doc, Node* node)
{ {
next = node->next; next = node->next;
if (nodeIsDIR(node) || nodeIsMENU(node)) /* if (nodeIsDIR(node) || nodeIsMENU(node)) */
/* HTML5 - <menu ... > is no longer obsolete */
if (nodeIsDIR(node))
TY_(CoerceNode)(doc, node, TidyTag_UL, yes, yes); TY_(CoerceNode)(doc, node, TidyTag_UL, yes, yes);
if (nodeIsXMP(node) || nodeIsLISTING(node) || if (nodeIsXMP(node) || nodeIsLISTING(node) ||

View file

@ -3,7 +3,7 @@
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University (c) 1998-2007 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice. See tidy.h for the copyright notice.
*/ */
#include <stdio.h> #include <stdio.h>
@ -1152,7 +1152,7 @@ static void PPrintAttribute( TidyDocImpl* doc, uint indent,
{ {
if ( TY_(IsScript)(doc, name) ) if ( TY_(IsScript)(doc, name) )
wrappable = cfgBool( doc, TidyWrapScriptlets ); wrappable = cfgBool( doc, TidyWrapScriptlets );
else if (!(attrIsCONTENT(attr) || attrIsVALUE(attr) || attrIsALT(attr)) && wrapAttrs ) else if (!(attrIsCONTENT(attr) || attrIsVALUE(attr) || attrIsALT(attr) || attrIsTITLE(attr)) && wrapAttrs )
wrappable = yes; wrappable = yes;
} }
@ -2083,7 +2083,8 @@ void TY_(PPrintTree)( TidyDocImpl* doc, uint mode, uint indent, Node *node )
{ {
Bool indcont = ( cfgAutoBool(doc, TidyIndentContent) != TidyNoState ); Bool indcont = ( cfgAutoBool(doc, TidyIndentContent) != TidyNoState );
Bool indsmart = ( cfgAutoBool(doc, TidyIndentContent) == TidyAutoState ); Bool indsmart = ( cfgAutoBool(doc, TidyIndentContent) == TidyAutoState );
Bool hideend = cfgBool( doc, TidyHideEndTags ); Bool hideend = cfgBool( doc, TidyHideEndTags ) ||
cfgBool( doc, TidyOmitOptionalTags );
Bool classic = cfgBool( doc, TidyVertSpace ); Bool classic = cfgBool( doc, TidyVertSpace );
uint contentIndent = indent; uint contentIndent = indent;

View file

@ -269,6 +269,11 @@ Bool TIDY_CALL tidyNodeIsMENU( TidyNode tnod )
{ return nodeIsMENU( tidyNodeToImpl(tnod) ); { return nodeIsMENU( tidyNodeToImpl(tnod) );
} }
/* HTML5 */
Bool TIDY_CALL tidyNodeIsDATALIST( TidyNode tnod )
{ return nodeIsDATALIST( tidyNodeToImpl(tnod) );
}
/* /*
* local variables: * local variables:

1902
src/tags.c

File diff suppressed because it is too large Load diff

View file

@ -110,6 +110,7 @@ Parser TY_(ParseRow);
Parser TY_(ParseSelect); Parser TY_(ParseSelect);
Parser TY_(ParseOptGroup); Parser TY_(ParseOptGroup);
Parser TY_(ParseText); Parser TY_(ParseText);
Parser TY_(ParseDatalist);
CheckAttribs TY_(CheckAttributes); CheckAttribs TY_(CheckAttributes);
@ -224,6 +225,11 @@ uint TY_(nodeHeaderLevel)( Node* node ); /* 1, 2, ..., 6 */
#define nodeIsU( node ) TagIsId( node, TidyTag_U ) #define nodeIsU( node ) TagIsId( node, TidyTag_U )
#define nodeIsMENU( node ) TagIsId( node, TidyTag_MENU ) #define nodeIsMENU( node ) TagIsId( node, TidyTag_MENU )
#define nodeIsBUTTON( node ) TagIsId( node, TidyTag_BUTTON ) #define nodeIsBUTTON( node ) TagIsId( node, TidyTag_BUTTON )
#define nodeIsCANVAS( node ) TagIsId( node, TidyTag_CANVAS )
#define nodeIsPROGRESS( node ) TagIsId( node, TidyTag_PROGRESS )
/* HTML5 */
#define nodeIsDATALIST( node ) TagIsId( node, TidyTag_DATALIST )
#endif /* __TAGS_H__ */ #endif /* __TAGS_H__ */

View file

@ -4,14 +4,14 @@
See tidy.h for the copyright notice. See tidy.h for the copyright notice.
Defines HTML Tidy API implemented by tidy library. Defines HTML Tidy API implemented by tidy library.
Very rough initial cut for discussion purposes. Very rough initial cut for discussion purposes.
Public interface is const-correct and doesn't explicitly depend Public interface is const-correct and doesn't explicitly depend
on any globals. Thus, thread-safety may be introduced w/out on any globals. Thus, thread-safety may be introduced w/out
changing the interface. changing the interface.
Looking ahead to a C++ wrapper, C functions always pass Looking ahead to a C++ wrapper, C functions always pass
this-equivalent as 1st arg. this-equivalent as 1st arg.
Created 2001-05-20 by Charles Reitzel Created 2001-05-20 by Charles Reitzel
@ -23,6 +23,7 @@
#include "tidy-int.h" #include "tidy-int.h"
#include "parser.h" #include "parser.h"
#include "clean.h" #include "clean.h"
#include "gdoc.h"
#include "config.h" #include "config.h"
#include "message.h" #include "message.h"
#include "pprint.h" #include "pprint.h"
@ -111,7 +112,7 @@ TidyOption tidyImplToOption( const TidyOptionImpl* option )
** 0 -> SUCCESS ** 0 -> SUCCESS
** >0 -> WARNING ** >0 -> WARNING
** <0 -> ERROR ** <0 -> ERROR
** **
*/ */
TidyDoc TIDY_CALL tidyCreate(void) TidyDoc TIDY_CALL tidyCreate(void)
@ -622,8 +623,8 @@ Bool TIDY_CALL tidyOptCopyConfig( TidyDoc to, TidyDoc from )
/* I/O and Message handling interface /* I/O and Message handling interface
** **
** By default, Tidy will define, create and use ** By default, Tidy will define, create and use
** tdocances of input and output handlers for ** tdocances of input and output handlers for
** standard C buffered I/O (i.e. FILE* stdin, ** standard C buffered I/O (i.e. FILE* stdin,
** FILE* stdout and FILE* stderr for content ** FILE* stdout and FILE* stderr for content
** input, content output and diagnostic output, ** input, content output and diagnostic output,
@ -633,7 +634,7 @@ Bool TIDY_CALL tidyOptCopyConfig( TidyDoc to, TidyDoc from )
*/ */
/* Use TidyReportFilter to filter messages by diagnostic level: /* Use TidyReportFilter to filter messages by diagnostic level:
** info, warning, etc. Just set diagnostic output ** info, warning, etc. Just set diagnostic output
** handler to redirect all diagnostics output. Return true ** handler to redirect all diagnostics output. Return true
** to proceed with output, false to cancel. ** to proceed with output, false to cancel.
*/ */
@ -792,7 +793,7 @@ uint TIDY_CALL tidyConfigErrorCount( TidyDoc tdoc )
} }
/* Error reporting functions /* Error reporting functions
*/ */
void TIDY_CALL tidyErrorSummary( TidyDoc tdoc ) void TIDY_CALL tidyErrorSummary( TidyDoc tdoc )
{ {
@ -968,7 +969,7 @@ int tidyDocSaveFile( TidyDocImpl* doc, ctmbstr filnam )
if ( doc->errors > 0 && if ( doc->errors > 0 &&
cfgBool(doc, TidyWriteBack) && !cfgBool(doc, TidyForceOutput) ) cfgBool(doc, TidyWriteBack) && !cfgBool(doc, TidyForceOutput) )
status = tidyDocStatus( doc ); status = tidyDocStatus( doc );
else else
fout = fopen( filnam, "wb" ); fout = fopen( filnam, "wb" );
if ( fout ) if ( fout )
@ -1002,7 +1003,7 @@ int tidyDocSaveFile( TidyDocImpl* doc, ctmbstr filnam )
** The code has been left in in case it works w/ other compilers ** The code has been left in in case it works w/ other compilers
** or operating systems. If stdout is in Text mode, be aware that ** or operating systems. If stdout is in Text mode, be aware that
** it will garble UTF16 documents. In text mode, when it encounters ** it will garble UTF16 documents. In text mode, when it encounters
** a single byte of value 10 (0xA), it will insert a single byte ** a single byte of value 10 (0xA), it will insert a single byte
** value 13 (0xD) just before it. This has the effect of garbling ** value 13 (0xD) just before it. This has the effect of garbling
** the entire document. ** the entire document.
*/ */
@ -1067,7 +1068,7 @@ int tidyDocSaveString( TidyDocImpl* doc, tmbstr buffer, uint* buflen )
TidyBuffer outbuf; TidyBuffer outbuf;
StreamOut* out; StreamOut* out;
int status; int status;
tidyBufInitWithAllocator( &outbuf, doc->allocator ); tidyBufInitWithAllocator( &outbuf, doc->allocator );
out = TY_(BufferOutput)( doc, &outbuf, outenc, nl ); out = TY_(BufferOutput)( doc, &outbuf, outenc, nl );
status = tidyDocSaveStream( doc, out ); status = tidyDocSaveStream( doc, out );
@ -1091,7 +1092,7 @@ int tidyDocSaveBuffer( TidyDocImpl* doc, TidyBuffer* outbuf )
uint outenc = cfg( doc, TidyOutCharEncoding ); uint outenc = cfg( doc, TidyOutCharEncoding );
uint nl = cfg( doc, TidyNewline ); uint nl = cfg( doc, TidyNewline );
StreamOut* out = TY_(BufferOutput)( doc, outbuf, outenc, nl ); StreamOut* out = TY_(BufferOutput)( doc, outbuf, outenc, nl );
status = tidyDocSaveStream( doc, out ); status = tidyDocSaveStream( doc, out );
TidyDocFree( doc, out ); TidyDocFree( doc, out );
} }
@ -1138,7 +1139,7 @@ int TIDY_CALL tidyRunDiagnostics( TidyDoc tdoc )
/* Workhorse functions. /* Workhorse functions.
** **
** Parse requires input source, all input config items ** Parse requires input source, all input config items
** and diagnostic sink to have all been set before calling. ** and diagnostic sink to have all been set before calling.
** **
** Emit likewise requires that document sink and all ** Emit likewise requires that document sink and all
@ -1220,18 +1221,70 @@ int tidyDocRunDiagnostics( TidyDocImpl* doc )
TY_(ReportMarkupVersion)( doc ); TY_(ReportMarkupVersion)( doc );
TY_(ReportNumWarnings)( doc ); TY_(ReportNumWarnings)( doc );
} }
if ( doc->errors > 0 && !force ) if ( doc->errors > 0 && !force )
TY_(NeedsAuthorIntervention)( doc ); TY_(NeedsAuthorIntervention)( doc );
return tidyDocStatus( doc ); return tidyDocStatus( doc );
} }
static struct _html5Info
{
const char *tag;
uint id;
} const html5Info[] = {
{"acronym", TidyTag_ACRONYM},
{"applet", TidyTag_APPLET },
{"basefont",TidyTag_BASEFONT },
{ "big", TidyTag_BIG },
{ "center", TidyTag_CENTER },
{ "dir", TidyTag_DIR },
{ "font", TidyTag_FONT },
{ "frame", TidyTag_FRAME},
{ "frameset", TidyTag_FRAMESET},
{ "noframes", TidyTag_NOFRAMES },
{ "strike", TidyTag_STRIKE },
{ "tt", TidyTag_TT },
{ 0, 0 }
};
Bool inRemovedInfo( uint tid )
{
int i;
for (i = 0; ; i++) {
if (html5Info[i].tag == 0)
break;
if (html5Info[i].id == tid)
return yes;
}
return no;
}
void TY_(CheckHTML5)( TidyDocImpl* doc, Node* node )
{
Lexer* lexer = doc->lexer;
while (node)
{
if (TY_(nodeIsElement)(node)) {
if (node->tag) {
if ((!node->tag->versions & VERS_HTML5)||(inRemovedInfo(node->tag->id))) {
/* issue warning */
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
}
}
}
if (node->content)
TY_(CheckHTML5)( doc, node->content );
node = node->next;
}
}
int tidyDocCleanAndRepair( TidyDocImpl* doc ) int tidyDocCleanAndRepair( TidyDocImpl* doc )
{ {
Bool word2K = cfgBool( doc, TidyWord2000 ); Bool word2K = cfgBool( doc, TidyWord2000 );
Bool logical = cfgBool( doc, TidyLogicalEmphasis ); Bool logical = cfgBool( doc, TidyLogicalEmphasis );
Bool clean = cfgBool( doc, TidyMakeClean ); Bool clean = cfgBool( doc, TidyMakeClean );
Bool gdoc = cfgBool( doc, TidyGDocClean );
Bool dropFont = cfgBool( doc, TidyDropFontTags ); Bool dropFont = cfgBool( doc, TidyDropFontTags );
Bool htmlOut = cfgBool( doc, TidyHtmlOut ); Bool htmlOut = cfgBool( doc, TidyHtmlOut );
Bool xmlOut = cfgBool( doc, TidyXmlOut ); Bool xmlOut = cfgBool( doc, TidyXmlOut );
@ -1240,13 +1293,16 @@ int tidyDocCleanAndRepair( TidyDocImpl* doc )
Bool tidyMark = cfgBool( doc, TidyMark ); Bool tidyMark = cfgBool( doc, TidyMark );
Bool tidyXmlTags = cfgBool( doc, TidyXmlTags ); Bool tidyXmlTags = cfgBool( doc, TidyXmlTags );
Bool wantNameAttr = cfgBool( doc, TidyAnchorAsName ); Bool wantNameAttr = cfgBool( doc, TidyAnchorAsName );
Bool mergeEmphasis = cfgBool( doc, TidyMergeEmphasis );
ctmbstr sdef = NULL;
Node* node; Node* node;
if (tidyXmlTags) if (tidyXmlTags)
return tidyDocStatus( doc ); return tidyDocStatus( doc );
/* simplifies <b><b> ... </b> ...</b> etc. */ /* simplifies <b><b> ... </b> ...</b> etc. */
TY_(NestedEmphasis)( doc, &doc->root ); if ( mergeEmphasis )
TY_(NestedEmphasis)( doc, &doc->root );
/* cleans up <dir>indented text</dir> etc. */ /* cleans up <dir>indented text</dir> etc. */
TY_(List2BQ)( doc, &doc->root ); TY_(List2BQ)( doc, &doc->root );
@ -1270,6 +1326,10 @@ int tidyDocCleanAndRepair( TidyDocImpl* doc )
if ( clean || dropFont ) if ( clean || dropFont )
TY_(CleanDocument)( doc ); TY_(CleanDocument)( doc );
/* clean up html exported by Google Docs */
if ( gdoc )
TY_(CleanGoogleDocument)( doc );
/* Move terminating <br /> tags from out of paragraphs */ /* Move terminating <br /> tags from out of paragraphs */
/*! Do we want to do this for all block-level elements? */ /*! Do we want to do this for all block-level elements? */
@ -1291,6 +1351,12 @@ int tidyDocCleanAndRepair( TidyDocImpl* doc )
/* remember given doctype for reporting */ /* remember given doctype for reporting */
node = TY_(FindDocType)(doc); node = TY_(FindDocType)(doc);
sdef = tidyOptGetValue((TidyDoc)doc, TidyDoctype );
if (!sdef)
sdef = tidyOptGetCurrPick((TidyDoc) doc, TidyDoctypeMode );
if (sdef && (strcmp(sdef,"html5") == 0)) {
TY_(CheckHTML5)( doc, &doc->root );
}
if (node) if (node)
{ {
AttVal* fpi = TY_(GetAttrByName)(node, "PUBLIC"); AttVal* fpi = TY_(GetAttrByName)(node, "PUBLIC");
@ -1388,7 +1454,6 @@ int tidyDocSaveStream( TidyDocImpl* doc, StreamOut* out )
{ {
/* noop */ /* noop */
TY_(DropFontElements)(doc, &doc->root, NULL); TY_(DropFontElements)(doc, &doc->root, NULL);
TY_(WbrToSpace)(doc, &doc->root);
} }
if ((makeClean && asciiChars) || makeBare) if ((makeClean && asciiChars) || makeBare)
@ -1439,8 +1504,8 @@ int tidyDocSaveStream( TidyDocImpl* doc, StreamOut* out )
** **
** The big issue here is the degree to which we should mimic ** The big issue here is the degree to which we should mimic
** a DOM and/or SAX nodes. ** a DOM and/or SAX nodes.
** **
** Is it 100% possible (and, if so, how difficult is it) to ** Is it 100% possible (and, if so, how difficult is it) to
** emit SAX events from this API? If SAX events are possible, ** emit SAX events from this API? If SAX events are possible,
** is that 100% of data needed to build a DOM? ** is that 100% of data needed to build a DOM?
*/ */
@ -1571,7 +1636,7 @@ Bool TIDY_CALL tidyNodeGetText( TidyDoc tdoc, TidyNode tnod, TidyBuffer* outbuf
TY_(PFlushLine)( doc, 0 ); TY_(PFlushLine)( doc, 0 );
doc->docOut = NULL; doc->docOut = NULL;
TidyDocFree( doc, out ); TidyDocFree( doc, out );
return yes; return yes;
} }