main code updates to do HTML5
This commit is contained in:
parent
292145c8e2
commit
78c0080eb8
|
@ -9,6 +9,13 @@
|
|||
*/
|
||||
|
||||
#include "tidy.h"
|
||||
#ifdef _MSC_VER
|
||||
#include "sprtf.h"
|
||||
#endif
|
||||
|
||||
#ifndef SPRTF
|
||||
#define SPRTF printf
|
||||
#endif
|
||||
|
||||
static FILE* errout = NULL; /* set to stderr */
|
||||
/* static FILE* txtout = NULL; */ /* set to stdout */
|
||||
|
@ -176,6 +183,9 @@ static const CmdOptDesc cmdopt_defs[] = {
|
|||
{ "-bare",
|
||||
"strip out smart quotes and em dashes, etc.",
|
||||
"bare: yes", CmdOptProcDir, "-b" },
|
||||
{ "-gdoc",
|
||||
"produce clean version of html exported by google docs",
|
||||
"gdoc: yes", CmdOptProcDir, "-g" },
|
||||
{ "-numeric",
|
||||
"output numeric rather than named entities",
|
||||
"numeric-entities: yes", CmdOptProcDir, "-n" },
|
||||
|
@ -186,8 +196,8 @@ static const CmdOptDesc cmdopt_defs[] = {
|
|||
"suppress nonessential output",
|
||||
"quiet: yes", CmdOptProcDir, "-q" },
|
||||
{ "-omit",
|
||||
"omit optional end tags",
|
||||
"hide-endtags: yes", CmdOptProcDir },
|
||||
"omit optional start tags and end tags",
|
||||
"omit-optional-tags: yes", CmdOptProcDir },
|
||||
{ "-xml",
|
||||
"specify the input is well formed XML",
|
||||
"input-xml: yes", CmdOptProcDir },
|
||||
|
@ -411,14 +421,16 @@ static void help( ctmbstr prog )
|
|||
{
|
||||
printf( "%s [option...] [file...] [option...] [file...]\n", prog );
|
||||
printf( "Utility to clean up and pretty print HTML/XHTML/XML\n");
|
||||
printf( "See http://tidy.sourceforge.net/\n");
|
||||
printf( "\n");
|
||||
|
||||
printf( "This is an HTML5-aware experimental fork of HTML Tidy.\n");
|
||||
printf( "%s\n", tidyReleaseDate() );
|
||||
printf( "\n");
|
||||
|
||||
#ifdef PLATFORM_NAME
|
||||
printf( "Options for HTML Tidy for %s released on %s:\n",
|
||||
PLATFORM_NAME, tidyReleaseDate() );
|
||||
printf( "Options for HTML Tidy for %s:\n", PLATFORM_NAME );
|
||||
#else
|
||||
printf( "Options for HTML Tidy released on %s:\n", tidyReleaseDate() );
|
||||
printf( "Options for HTML Tidy:\n");
|
||||
#endif
|
||||
printf( "\n");
|
||||
|
||||
|
@ -429,9 +441,27 @@ static void help( ctmbstr prog )
|
|||
"to the man page.\n\n");
|
||||
|
||||
printf( "Input/Output default to stdin/stdout respectively.\n");
|
||||
printf( "\n");
|
||||
printf( "Single letter options apart from -f may be combined\n");
|
||||
printf( "as in: tidy -f errs.txt -imu foo.html\n");
|
||||
printf( "For further info on HTML see http://www.w3.org/MarkUp\n");
|
||||
printf( "\n");
|
||||
printf( "For more information on this HTML5-aware experimental fork of Tidy,\n" );
|
||||
printf( "see http://w3c.github.com/tidy-html5/\n" );
|
||||
printf( "\n");
|
||||
printf( "For more information on HTML, see the following:\n" );
|
||||
printf( "\n");
|
||||
printf( " HTML: Edition for Web Authors (the latest HTML specification)\n");
|
||||
printf( " http://dev.w3.org/html5/spec-author-view\n" );
|
||||
printf( "\n");
|
||||
printf( " HTML: The Markup Language (an HTML language reference)\n" );
|
||||
printf( " http://dev.w3.org/html5/markup/\n" );
|
||||
printf( "\n");
|
||||
printf( "File bug reports at https://github.com/w3c/tidy-html5/issues/\n" );
|
||||
printf( "or send questions and comments to html-tidy@w3.org\n" );
|
||||
printf( "\n");
|
||||
printf( "Validate your HTML documents using the W3C Nu Markup Validator:\n" );
|
||||
printf( "\n");
|
||||
printf( " http://validator.w3.org/nu/" );
|
||||
printf( "\n");
|
||||
}
|
||||
|
||||
|
@ -472,6 +502,7 @@ ctmbstr ConfigCategoryName( TidyConfigCategory id )
|
|||
fprintf(stderr, "Fatal error: impossible value for id='%d'.\n", (int)id);
|
||||
assert(0);
|
||||
abort();
|
||||
return "never_here"; /* only for the compiler warning */
|
||||
}
|
||||
|
||||
/* Description of an option */
|
||||
|
@ -898,10 +929,10 @@ static void optionvalues( TidyDoc tdoc )
|
|||
static void version( void )
|
||||
{
|
||||
#ifdef PLATFORM_NAME
|
||||
printf( "HTML Tidy for %s released on %s\n",
|
||||
printf( "HTML Tidy for HTML5 for %s %s\n",
|
||||
PLATFORM_NAME, tidyReleaseDate() );
|
||||
#else
|
||||
printf( "HTML Tidy released on %s\n", tidyReleaseDate() );
|
||||
printf( "HTML Tidy for HTML5 %s\n", tidyReleaseDate() );
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -923,6 +954,9 @@ int main( int argc, char** argv )
|
|||
|
||||
errout = stderr; /* initialize to stderr */
|
||||
status = 0;
|
||||
#ifdef _MSC_VER
|
||||
set_log_file((char *)"temptidy.txt", 0);
|
||||
#endif
|
||||
|
||||
#ifdef TIDY_CONFIG_FILE
|
||||
if ( tidyFileExists( tdoc, TIDY_CONFIG_FILE) )
|
||||
|
@ -977,7 +1011,7 @@ int main( int argc, char** argv )
|
|||
tidyOptResetToDefault( tdoc, TidyIndentSpaces );
|
||||
}
|
||||
else if ( strcasecmp(arg, "omit") == 0 )
|
||||
tidyOptSetBool( tdoc, TidyHideEndTags, yes );
|
||||
tidyOptSetBool( tdoc, TidyOmitOptionalTags, yes );
|
||||
|
||||
else if ( strcasecmp(arg, "upper") == 0 )
|
||||
tidyOptSetBool( tdoc, TidyUpperCaseTags, yes );
|
||||
|
@ -985,6 +1019,9 @@ int main( int argc, char** argv )
|
|||
else if ( strcasecmp(arg, "clean") == 0 )
|
||||
tidyOptSetBool( tdoc, TidyMakeClean, yes );
|
||||
|
||||
else if ( strcasecmp(arg, "gdoc") == 0 )
|
||||
tidyOptSetBool( tdoc, TidyGDocClean, yes );
|
||||
|
||||
else if ( strcasecmp(arg, "bare") == 0 )
|
||||
tidyOptSetBool( tdoc, TidyMakeBare, yes );
|
||||
|
||||
|
@ -1202,6 +1239,10 @@ int main( int argc, char** argv )
|
|||
tidyOptSetBool( tdoc, TidyMakeClean, yes );
|
||||
break;
|
||||
|
||||
case 'g':
|
||||
tidyOptSetBool( tdoc, TidyGDocClean, yes );
|
||||
break;
|
||||
|
||||
case 'b':
|
||||
tidyOptSetBool( tdoc, TidyMakeBare, yes );
|
||||
break;
|
||||
|
@ -1237,6 +1278,7 @@ int main( int argc, char** argv )
|
|||
if ( argc > 1 )
|
||||
{
|
||||
htmlfil = argv[1];
|
||||
SPRTF("Tidying '%s'\n", htmlfil);
|
||||
if ( tidyOptGetBool(tdoc, TidyEmacs) )
|
||||
tidyOptSetValue( tdoc, TidyEmacsFile, htmlfil );
|
||||
status = tidyParseFile( tdoc, htmlfil );
|
||||
|
@ -1263,10 +1305,17 @@ int main( int argc, char** argv )
|
|||
else
|
||||
{
|
||||
ctmbstr outfil = tidyOptGetValue( tdoc, TidyOutFile );
|
||||
if ( outfil )
|
||||
if ( outfil ) {
|
||||
status = tidySaveFile( tdoc, outfil );
|
||||
else
|
||||
} else {
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
static char tmp_buf[264];
|
||||
sprintf(tmp_buf,"%s.html",get_log_file());
|
||||
status = tidySaveFile( tdoc, tmp_buf );
|
||||
#else
|
||||
status = tidySaveStdout( tdoc );
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -937,6 +937,10 @@ TIDY_EXPORT Bool TIDY_CALL tidyNodeIsSTRIKE( TidyNode tnod );
|
|||
TIDY_EXPORT Bool TIDY_CALL tidyNodeIsU( TidyNode tnod );
|
||||
TIDY_EXPORT Bool TIDY_CALL tidyNodeIsMENU( TidyNode tnod );
|
||||
|
||||
/* HTML5 */
|
||||
TIDY_EXPORT Bool TIDY_CALL tidyNodeIsDATALIST( TidyNode tnod ); // bit like OPTIONS
|
||||
|
||||
|
||||
/** @} End NodeIsElementName group */
|
||||
|
||||
/** @} End NodeAsk group */
|
||||
|
|
|
@ -102,11 +102,14 @@ typedef enum
|
|||
TidyOutFile, /**< File name to write markup to */
|
||||
TidyWriteBack, /**< If true then output tidied markup */
|
||||
TidyShowMarkup, /**< If false, normal output is suppressed */
|
||||
TidyShowInfo, /**< If true, info-level messages are shown */
|
||||
TidyShowWarnings, /**< However errors are always shown */
|
||||
TidyQuiet, /**< No 'Parsing X', guessed DTD or summary */
|
||||
TidyIndentContent, /**< Indent content of appropriate tags */
|
||||
/**< "auto" does text/block level content indentation */
|
||||
TidyHideEndTags, /**< Suppress optional end tags */
|
||||
TidyCoerceEndTags, /**< Coerce end tags from start tags where probably intended */
|
||||
TidyOmitOptionalTags,/**< Suppress optional start tags and end tags */
|
||||
TidyHideEndTags, /**< Legacy name for TidyOmitOptionalTags */
|
||||
TidyXmlTags, /**< Treat input as XML */
|
||||
TidyXmlOut, /**< Create output as XML */
|
||||
TidyXhtmlOut, /**< Output extensible HTML */
|
||||
|
@ -117,9 +120,11 @@ typedef enum
|
|||
TidyUpperCaseAttrs, /**< Output attributes in upper not lower case */
|
||||
TidyMakeBare, /**< Make bare HTML: remove Microsoft cruft */
|
||||
TidyMakeClean, /**< Replace presentational clutter by style rules */
|
||||
TidyGDocClean, /**< Clean up HTML exported from Google Docs */
|
||||
TidyLogicalEmphasis, /**< Replace i by em and b by strong */
|
||||
TidyDropPropAttrs, /**< Discard proprietary attributes */
|
||||
TidyDropFontTags, /**< Discard presentation tags */
|
||||
TidyDropEmptyElems, /**< Discard empty elements */
|
||||
TidyDropEmptyParas, /**< Discard empty p elements */
|
||||
TidyFixComments, /**< Fix comments with adjacent hyphens */
|
||||
TidyBreakBeforeBR, /**< Output newline before <br> or not? */
|
||||
|
@ -192,6 +197,7 @@ typedef enum
|
|||
#else
|
||||
TidyPunctWrapNotUsed,
|
||||
#endif
|
||||
TidyMergeEmphasis, /**< Merge nested B and I elements */
|
||||
TidyMergeDivs, /**< Merge multiple DIVs */
|
||||
TidyDecorateInferredUL, /**< Mark inferred UL elements with no indent CSS */
|
||||
TidyPreserveEntities, /**< Preserve entities */
|
||||
|
@ -234,6 +240,7 @@ typedef enum
|
|||
*/
|
||||
typedef enum
|
||||
{
|
||||
TidyDoctypeHtml5, /**< <!DOCTYPE html> */
|
||||
TidyDoctypeOmit, /**< Omit DOCTYPE altogether */
|
||||
TidyDoctypeAuto, /**< Keep DOCTYPE in input. Set version to content */
|
||||
TidyDoctypeStrict, /**< Convert document to HTML 4 strict content model */
|
||||
|
@ -436,16 +443,20 @@ typedef enum
|
|||
TidyTag_ARTICLE,
|
||||
TidyTag_ASIDE,
|
||||
TidyTag_AUDIO,
|
||||
TidyTag_BDI,
|
||||
TidyTag_CANVAS,
|
||||
TidyTag_COMMAND,
|
||||
TidyTag_DATALIST,
|
||||
TidyTag_DETAILS,
|
||||
TidyTag_DIALOG,
|
||||
TidyTag_FIGCAPTION,
|
||||
TidyTag_FIGURE,
|
||||
TidyTag_FOOTER,
|
||||
TidyTag_HEADER,
|
||||
TidyTag_HGROUP,
|
||||
TidyTag_MAIN,
|
||||
TidyTag_MARK,
|
||||
TidyTag_MENUITEM,
|
||||
TidyTag_METER,
|
||||
TidyTag_NAV,
|
||||
TidyTag_OUTPUT,
|
||||
|
@ -531,6 +542,7 @@ typedef enum
|
|||
TidyAttr_HTTP_EQUIV, /**< HTTP_EQUIV= */
|
||||
TidyAttr_ID, /**< ID= */
|
||||
TidyAttr_ISMAP, /**< ISMAP= */
|
||||
TidyAttr_ITEMPROP, /**< ITEMPROP= */
|
||||
TidyAttr_LABEL, /**< LABEL= */
|
||||
TidyAttr_LANG, /**< LANG= */
|
||||
TidyAttr_LANGUAGE, /**< LANGUAGE= */
|
||||
|
|
488
src/attrdict.c
488
src/attrdict.c
File diff suppressed because it is too large
Load diff
|
@ -125,14 +125,18 @@ extern const AttrVersion TY_(W3CAttrsFor_HGROUP)[];
|
|||
extern const AttrVersion TY_(W3CAttrsFor_FIGURE)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_ARTICLE)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_ASIDE)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_BDI)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_NAV)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_SECTION)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_FOOTER)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_HEADER)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_DETAILS)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_DIALOG)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_COMMAND)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_MAIN)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_MARK)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_OUTPUT)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_MENUITEM)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_METER)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_PROGRESS)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_TIME)[];
|
||||
|
@ -141,5 +145,8 @@ extern const AttrVersion TY_(W3CAttrsFor_AUDIO)[];
|
|||
extern const AttrVersion TY_(W3CAttrsFor_VIDEO)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_CANVAS)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_SOURCE)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_EMBED)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_KEYGEN)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_WBR)[];
|
||||
|
||||
#endif /* __ATTRDICT_H__ */
|
||||
|
|
39
src/attrs.c
39
src/attrs.c
|
@ -152,6 +152,7 @@ static const Attribute attribute_defs [] =
|
|||
{ TidyAttr_HTTP_EQUIV, "http-equiv", CH_PCDATA }, /* META */
|
||||
{ TidyAttr_ID, "id", CH_IDDEF },
|
||||
{ TidyAttr_ISMAP, "ismap", CH_BOOL }, /* IMG */
|
||||
{ TidyAttr_ITEMPROP, "itemprop", CH_PCDATA },
|
||||
{ TidyAttr_LABEL, "label", CH_PCDATA }, /* OPT, OPTGROUP */
|
||||
{ TidyAttr_LANG, "lang", CH_LANG },
|
||||
{ TidyAttr_LANGUAGE, "language", CH_PCDATA }, /* SCRIPT */
|
||||
|
@ -253,7 +254,7 @@ static const Attribute attribute_defs [] =
|
|||
{ TidyAttr_SDASUFF, "sdasuff", CH_PCDATA }, /* SDATA attribute in HTML 2.0 */
|
||||
{ TidyAttr_URN, "urn", CH_PCDATA }, /* for <a>, never implemented */
|
||||
|
||||
/* "HTML5" */
|
||||
/* HTML5 */
|
||||
{ TidyAttr_ASYNC, "async", CH_PCDATA },
|
||||
{ TidyAttr_AUTOCOMPLETE, "autocomplete", CH_PCDATA },
|
||||
{ TidyAttr_AUTOFOCUS, "autofocus", CH_PCDATA },
|
||||
|
@ -362,7 +363,7 @@ static uint AttributeVersions(Node* node, AttVal* attval)
|
|||
{
|
||||
uint i;
|
||||
|
||||
/* "HTML5" data-* attributes */
|
||||
/* HTML5 data-* attributes */
|
||||
if (attval && attval->attribute)
|
||||
if (TY_(tmbstrncmp)(attval->attribute, "data-", 5) == 0)
|
||||
return (XH50 | HT50);
|
||||
|
@ -744,6 +745,27 @@ AttVal* TY_(GetAttrByName)( Node *node, ctmbstr name )
|
|||
return attr;
|
||||
}
|
||||
|
||||
void TY_(DropAttrByName)( TidyDocImpl* doc, Node *node, ctmbstr name )
|
||||
{
|
||||
AttVal *attr, *prev = NULL, *next;
|
||||
|
||||
for (attr = node->attributes; attr != NULL; prev = attr, attr = next)
|
||||
{
|
||||
next = attr->next;
|
||||
|
||||
if (attr->attribute && TY_(tmbstrcmp)(attr->attribute, name) == 0)
|
||||
{
|
||||
if (prev)
|
||||
prev->next = next;
|
||||
else
|
||||
node->attributes = next;
|
||||
|
||||
TY_(FreeAttribute)( doc, attr );
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
AttVal* TY_(AddAttribute)( TidyDocImpl* doc,
|
||||
Node *node, ctmbstr name, ctmbstr value )
|
||||
{
|
||||
|
@ -1360,11 +1382,8 @@ Bool TY_(IsValidHTMLID)(ctmbstr id)
|
|||
if (!s)
|
||||
return no;
|
||||
|
||||
if (!TY_(IsLetter)(*s++))
|
||||
return no;
|
||||
|
||||
while (*s)
|
||||
if (!TY_(IsNamechar)(*s++))
|
||||
if (TY_(IsHTMLSpace)(*s++))
|
||||
return no;
|
||||
|
||||
return yes;
|
||||
|
@ -1807,9 +1826,11 @@ void CheckLang( TidyDocImpl* doc, Node *node, AttVal *attval)
|
|||
/* checks type attribute */
|
||||
void CheckType( TidyDocImpl* doc, Node *node, AttVal *attval)
|
||||
{
|
||||
ctmbstr const valuesINPUT[] = {"text", "password", "checkbox", "radio",
|
||||
"submit", "reset", "file", "hidden",
|
||||
"image", "button", NULL};
|
||||
ctmbstr const valuesINPUT[] = {
|
||||
"text", "password", "checkbox", "radio", "submit", "reset", "file",
|
||||
"hidden", "image", "button", "color", "date", "datetime",
|
||||
"datetime-local", "email", "month", "number", "range", "search",
|
||||
"tel", "time", "url", "week", NULL};
|
||||
ctmbstr const valuesBUTTON[] = {"button", "submit", "reset", NULL};
|
||||
ctmbstr const valuesUL[] = {"disc", "square", "circle", NULL};
|
||||
ctmbstr const valuesOL[] = {"1", "a", "i", NULL};
|
||||
|
|
|
@ -81,6 +81,8 @@ const Attribute* TY_(FindAttribute)( TidyDocImpl* doc, AttVal *attval );
|
|||
|
||||
AttVal* TY_(GetAttrByName)( Node *node, ctmbstr name );
|
||||
|
||||
void TY_(DropAttrByName)( TidyDocImpl* doc, Node *node, ctmbstr name );
|
||||
|
||||
AttVal* TY_(AddAttribute)( TidyDocImpl* doc,
|
||||
Node *node, ctmbstr name, ctmbstr value );
|
||||
|
||||
|
@ -217,6 +219,7 @@ uint TY_(NodeAttributeVersions)( Node* node, TidyAttrId id );
|
|||
#define attrIsHTTP_EQUIV(av) AttrIsId( av, TidyAttr_HTTP_EQUIV )
|
||||
#define attrIsID(av) AttrIsId( av, TidyAttr_ID )
|
||||
#define attrIsISMAP(av) AttrIsId( av, TidyAttr_ISMAP )
|
||||
#define attrIsITEMPROP(av) AttrIsId( av, TidyAttr_ITEMPROP )
|
||||
#define attrIsLABEL(av) AttrIsId( av, TidyAttr_LABEL )
|
||||
#define attrIsLANG(av) AttrIsId( av, TidyAttr_LANG )
|
||||
#define attrIsLANGUAGE(av) AttrIsId( av, TidyAttr_LANGUAGE )
|
||||
|
|
22
src/config.c
22
src/config.c
|
@ -4,9 +4,6 @@
|
|||
(c) 1998-2008 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
*/
|
||||
|
||||
/*
|
||||
config files associate a property name with a value.
|
||||
|
||||
// comments can start at the beginning of a line
|
||||
|
@ -130,6 +127,7 @@ static const ctmbstr newlinePicks[] =
|
|||
|
||||
static const ctmbstr doctypePicks[] =
|
||||
{
|
||||
"html5",
|
||||
"omit",
|
||||
"auto",
|
||||
"strict",
|
||||
|
@ -200,7 +198,7 @@ static ParseProperty ParseSorter;
|
|||
static ParseProperty ParseCharEnc;
|
||||
static ParseProperty ParseNewline;
|
||||
|
||||
/* omit | auto | strict | loose | <fpi> */
|
||||
/* html5 | omit | auto | strict | loose | <fpi> */
|
||||
static ParseProperty ParseDocType;
|
||||
|
||||
/* keep-first or keep-last? */
|
||||
|
@ -213,9 +211,9 @@ static const TidyOptionImpl option_defs[] =
|
|||
{ TidyIndentSpaces, PP, "indent-spaces", IN, 2, ParseInt, NULL },
|
||||
{ TidyWrapLen, PP, "wrap", IN, 68, ParseInt, NULL },
|
||||
{ TidyTabSize, PP, "tab-size", IN, 8, ParseInt, NULL },
|
||||
{ TidyCharEncoding, CE, "char-encoding", IN, ASCII, ParseCharEnc, charEncPicks },
|
||||
{ TidyInCharEncoding, CE, "input-encoding", IN, LATIN1, ParseCharEnc, charEncPicks },
|
||||
{ TidyOutCharEncoding, CE, "output-encoding", IN, ASCII, ParseCharEnc, charEncPicks },
|
||||
{ TidyCharEncoding, CE, "char-encoding", IN, UTF8, ParseCharEnc, charEncPicks },
|
||||
{ TidyInCharEncoding, CE, "input-encoding", IN, UTF8, ParseCharEnc, charEncPicks },
|
||||
{ TidyOutCharEncoding, CE, "output-encoding", IN, UTF8, ParseCharEnc, charEncPicks },
|
||||
{ TidyNewline, CE, "newline", IN, DLF, ParseNewline, newlinePicks },
|
||||
{ TidyDoctypeMode, MU, "doctype-mode", IN, TidyDoctypeAuto, NULL, doctypePicks },
|
||||
{ TidyDoctype, MU, "doctype", ST, 0, ParseDocType, doctypePicks },
|
||||
|
@ -229,9 +227,12 @@ static const TidyOptionImpl option_defs[] =
|
|||
{ TidyOutFile, MS, "output-file", ST, 0, ParseString, NULL },
|
||||
{ TidyWriteBack, MS, "write-back", BL, no, ParseBool, boolPicks },
|
||||
{ TidyShowMarkup, PP, "markup", BL, yes, ParseBool, boolPicks },
|
||||
{ TidyShowInfo, DG, "show-info", BL, yes, ParseBool, boolPicks },
|
||||
{ TidyShowWarnings, DG, "show-warnings", BL, yes, ParseBool, boolPicks },
|
||||
{ TidyQuiet, MS, "quiet", BL, no, ParseBool, boolPicks },
|
||||
{ TidyIndentContent, PP, "indent", IN, TidyNoState, ParseAutoBool, autoBoolPicks },
|
||||
{ TidyCoerceEndTags, MU, "coerce-endtags", BL, yes, ParseBool, boolPicks },
|
||||
{ TidyOmitOptionalTags, MU, "omit-optional-tags", BL, no, ParseBool, boolPicks },
|
||||
{ TidyHideEndTags, MU, "hide-endtags", BL, no, ParseBool, boolPicks },
|
||||
{ TidyXmlTags, MU, "input-xml", BL, no, ParseBool, boolPicks },
|
||||
{ TidyXmlOut, MU, "output-xml", BL, no, ParseBool, boolPicks },
|
||||
|
@ -242,9 +243,11 @@ static const TidyOptionImpl option_defs[] =
|
|||
{ TidyUpperCaseAttrs, MU, "uppercase-attributes", BL, no, ParseBool, boolPicks },
|
||||
{ TidyMakeBare, MU, "bare", BL, no, ParseBool, boolPicks },
|
||||
{ TidyMakeClean, MU, "clean", BL, no, ParseBool, boolPicks },
|
||||
{ TidyGDocClean, MU, "gdoc", BL, no, ParseBool, boolPicks },
|
||||
{ TidyLogicalEmphasis, MU, "logical-emphasis", BL, no, ParseBool, boolPicks },
|
||||
{ TidyDropPropAttrs, MU, "drop-proprietary-attributes", BL, no, ParseBool, boolPicks },
|
||||
{ TidyDropFontTags, MU, "drop-font-tags", BL, no, ParseBool, boolPicks },
|
||||
{ TidyDropEmptyElems, MU, "drop-empty-elements", BL, yes, ParseBool, boolPicks },
|
||||
{ TidyDropEmptyParas, MU, "drop-empty-paras", BL, yes, ParseBool, boolPicks },
|
||||
{ TidyFixComments, MU, "fix-bad-comments", BL, yes, ParseBool, boolPicks },
|
||||
{ TidyBreakBeforeBR, PP, "break-before-br", BL, no, ParseBool, boolPicks },
|
||||
|
@ -303,6 +306,7 @@ static const TidyOptionImpl option_defs[] =
|
|||
#if SUPPORT_ASIAN_ENCODINGS
|
||||
{ TidyPunctWrap, PP, "punctuation-wrap", BL, no, ParseBool, boolPicks },
|
||||
#endif
|
||||
{ TidyMergeEmphasis, MU, "merge-emphasis", BL, yes, ParseBool, boolPicks },
|
||||
{ TidyMergeDivs, MU, "merge-divs", IN, TidyAutoState, ParseAutoBool, autoBoolPicks },
|
||||
{ TidyDecorateInferredUL, MU, "decorate-inferred-ul", BL, no, ParseBool, boolPicks },
|
||||
{ TidyPreserveEntities, MU, "preserve-entities", BL, no, ParseBool, boolPicks },
|
||||
|
@ -1425,7 +1429,7 @@ ctmbstr TY_(CharEncodingOptName)( int encoding )
|
|||
}
|
||||
|
||||
/*
|
||||
doctype: omit | auto | strict | loose | <fpi>
|
||||
doctype: html5 | omit | auto | strict | loose | <fpi>
|
||||
|
||||
where the fpi is a string similar to
|
||||
|
||||
|
@ -1462,6 +1466,8 @@ Bool ParseDocType( TidyDocImpl* doc, const TidyOptionImpl* option )
|
|||
|
||||
if ( TY_(tmbstrcasecmp)(buf, "auto") == 0 )
|
||||
dtmode = TidyDoctypeAuto;
|
||||
else if ( TY_(tmbstrcasecmp)(buf, "html5") == 0 )
|
||||
dtmode = TidyDoctypeHtml5;
|
||||
else if ( TY_(tmbstrcasecmp)(buf, "omit") == 0 )
|
||||
dtmode = TidyDoctypeOmit;
|
||||
else if ( TY_(tmbstrcasecmp)(buf, "strict") == 0 )
|
||||
|
|
191
src/lexer.c
191
src/lexer.c
|
@ -39,6 +39,13 @@
|
|||
#include "clean.h"
|
||||
#include "utf8.h"
|
||||
#include "streamio.h"
|
||||
#ifdef _MSC_VER
|
||||
#include "sprtf.h"
|
||||
#endif
|
||||
|
||||
#ifndef SPRTF
|
||||
#define SPRTF printf
|
||||
#endif
|
||||
|
||||
/* Forward references
|
||||
*/
|
||||
|
@ -113,6 +120,9 @@ int TY_(HTMLVersion)(TidyDocImpl* doc)
|
|||
!cfgBool(doc, TidyHtmlOut);
|
||||
Bool html4 = dtmode == TidyDoctypeStrict || dtmode == TidyDoctypeLoose || VERS_FROM40 & dtver;
|
||||
|
||||
if (xhtml && dtver == VERS_UNKNOWN) return XH50;
|
||||
if (dtver == VERS_UNKNOWN) return HT50;
|
||||
|
||||
for (i = 0; W3C_Doctypes[i].name; ++i)
|
||||
{
|
||||
if ((xhtml && !(VERS_XHTML & W3C_Doctypes[i].vers)) ||
|
||||
|
@ -171,7 +181,7 @@ static uint GetVersFromFPI(ctmbstr fpi)
|
|||
uint i;
|
||||
|
||||
for (i = 0; W3C_Doctypes[i].name; ++i)
|
||||
if (TY_(tmbstrcasecmp)(W3C_Doctypes[i].fpi, fpi) == 0)
|
||||
if (W3C_Doctypes[i].fpi != NULL && TY_(tmbstrcasecmp)(W3C_Doctypes[i].fpi, fpi) == 0)
|
||||
return W3C_Doctypes[i].vers;
|
||||
|
||||
return 0;
|
||||
|
@ -224,6 +234,11 @@ Bool TY_(IsLetter)(uint c)
|
|||
return (map & letter)!=0;
|
||||
}
|
||||
|
||||
Bool TY_(IsHTMLSpace)(uint c)
|
||||
{
|
||||
return c == 0x020 || c == 0x009 || c == 0x00a || c == 0x00c || c == 0x00d;
|
||||
}
|
||||
|
||||
Bool TY_(IsNamechar)(uint c)
|
||||
{
|
||||
uint map = MAP(c);
|
||||
|
@ -1393,10 +1408,10 @@ Bool TY_(AddGenerator)( TidyDocImpl* doc )
|
|||
if (head)
|
||||
{
|
||||
#ifdef PLATFORM_NAME
|
||||
TY_(tmbsnprintf)(buf, sizeof(buf), "HTML Tidy for "PLATFORM_NAME" (vers %s), see www.w3.org",
|
||||
TY_(tmbsnprintf)(buf, sizeof(buf), "HTML Tidy for HTML5 (experimental) for "PLATFORM_NAME" %s",
|
||||
tidyReleaseDate());
|
||||
#else
|
||||
TY_(tmbsnprintf)(buf, sizeof(buf), "HTML Tidy (vers %s), see www.w3.org", tidyReleaseDate());
|
||||
TY_(tmbsnprintf)(buf, sizeof(buf), "HTML Tidy for HTML5 (experimental) %s", tidyReleaseDate());
|
||||
#endif
|
||||
|
||||
for ( node = head->content; node; node = node->next )
|
||||
|
@ -1562,6 +1577,12 @@ Bool TY_(SetXHTMLDocType)( TidyDocImpl* doc )
|
|||
|
||||
switch(dtmode)
|
||||
{
|
||||
case TidyDoctypeHtml5:
|
||||
/* HTML5 */
|
||||
TY_(RepairAttrValue)(doc, doctype, pub, NULL);
|
||||
TY_(RepairAttrValue)(doc, doctype, sys, NULL);
|
||||
lexer->versionEmitted = XH50;
|
||||
break;
|
||||
case TidyDoctypeStrict:
|
||||
/* XHTML 1.0 Strict */
|
||||
TY_(RepairAttrValue)(doc, doctype, pub, GetFPIFromVers(X10S));
|
||||
|
@ -1580,7 +1601,11 @@ Bool TY_(SetXHTMLDocType)( TidyDocImpl* doc )
|
|||
TY_(RepairAttrValue)(doc, doctype, sys, "");
|
||||
break;
|
||||
case TidyDoctypeAuto:
|
||||
if (lexer->versions & XH11 && lexer->doctype == XH11)
|
||||
if (lexer->doctype == VERS_UNKNOWN) {
|
||||
lexer->versionEmitted = XH50;
|
||||
return yes;
|
||||
}
|
||||
else if (lexer->versions & XH11 && lexer->doctype == XH11)
|
||||
{
|
||||
if (!TY_(GetAttrByName)(doctype, sys))
|
||||
TY_(RepairAttrValue)(doc, doctype, sys, GetSIFromVers(XH11));
|
||||
|
@ -1618,10 +1643,6 @@ Bool TY_(SetXHTMLDocType)( TidyDocImpl* doc )
|
|||
TY_(RepairAttrValue)(doc, doctype, sys, GetSIFromVers(X10T));
|
||||
lexer->versionEmitted = X10T;
|
||||
}
|
||||
else if (lexer->versions & XH50)
|
||||
{
|
||||
lexer->versionEmitted = XH50;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (doctype)
|
||||
|
@ -1678,6 +1699,9 @@ Bool TY_(FixDocType)( TidyDocImpl* doc )
|
|||
|
||||
switch (dtmode)
|
||||
{
|
||||
case TidyDoctypeHtml5:
|
||||
guessed = HT50;
|
||||
break;
|
||||
case TidyDoctypeStrict:
|
||||
guessed = H41S;
|
||||
break;
|
||||
|
@ -2010,6 +2034,7 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode );
|
|||
|
||||
Node* TY_(GetToken)( TidyDocImpl* doc, GetTokenMode mode )
|
||||
{
|
||||
Node *node;
|
||||
Lexer* lexer = doc->lexer;
|
||||
|
||||
if (lexer->pushed || lexer->itoken)
|
||||
|
@ -2030,33 +2055,61 @@ Node* TY_(GetToken)( TidyDocImpl* doc, GetTokenMode mode )
|
|||
/* duplicate inlines in preference to pushed text nodes when appropriate */
|
||||
lexer->pushed = no;
|
||||
if (lexer->token->type != TextNode
|
||||
|| !(lexer->insert || lexer->inode))
|
||||
return lexer->token;
|
||||
return lexer->itoken = TY_(InsertedToken)( doc );
|
||||
|| !(lexer->insert || lexer->inode)) {
|
||||
node = lexer->token;
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
SPRTF("Returning pushed token '%s'...\n", node->element ? node->element : "<blank>");
|
||||
#endif
|
||||
return node;
|
||||
}
|
||||
lexer->itoken = TY_(InsertedToken)( doc );
|
||||
node = lexer->token;
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
SPRTF("Returning inserted token '%s'...\n", node->element ? node->element : "<blank>");
|
||||
#endif
|
||||
return node;
|
||||
}
|
||||
|
||||
assert( !(lexer->pushed || lexer->itoken) );
|
||||
|
||||
/* at start of block elements, unclosed inline
|
||||
elements are inserted into the token stream */
|
||||
if (lexer->insert || lexer->inode)
|
||||
return lexer->token = TY_(InsertedToken)( doc );
|
||||
if (lexer->insert || lexer->inode) {
|
||||
lexer->token = TY_(InsertedToken)( doc );
|
||||
node = lexer->token;
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
SPRTF("Returning Inserted token '%s'...\n", node->element ? node->element : "<blank>");
|
||||
#endif
|
||||
return node;
|
||||
}
|
||||
|
||||
if (mode == CdataContent)
|
||||
{
|
||||
assert( lexer->parent != NULL );
|
||||
return GetCDATA(doc, lexer->parent);
|
||||
node = GetCDATA(doc, lexer->parent);
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
SPRTF("Returning Cdatacontent token '%s'...\n", node->element ? node->element : "<blank>");
|
||||
#endif
|
||||
return node;
|
||||
}
|
||||
|
||||
return GetTokenFromStream( doc, mode );
|
||||
}
|
||||
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
static void check_me(char *name)
|
||||
{
|
||||
SPRTF("Have node %s\n", name);
|
||||
}
|
||||
#endif
|
||||
|
||||
static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
|
||||
{
|
||||
Lexer* lexer = doc->lexer;
|
||||
uint c, badcomment = 0;
|
||||
Bool isempty = no;
|
||||
AttVal *attributes = NULL;
|
||||
Node *node;
|
||||
|
||||
/* Lexer->token must be set on return. Nullify it for safety. */
|
||||
lexer->token = NULL;
|
||||
|
@ -2170,7 +2223,11 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
|
|||
#ifdef TIDY_STORE_ORIGINAL_TEXT
|
||||
StoreOriginalTextInToken(doc, lexer->token, 3);
|
||||
#endif
|
||||
return lexer->token;
|
||||
node = lexer->token;
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
SPRTF("Returning text token len %d...\n", node->end - node->start );
|
||||
#endif
|
||||
return node;
|
||||
}
|
||||
|
||||
continue; /* no text so keep going */
|
||||
|
@ -2397,7 +2454,11 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
|
|||
#ifdef TIDY_STORE_ORIGINAL_TEXT
|
||||
StoreOriginalTextInToken(doc, lexer->token, 0); /* hmm... */
|
||||
#endif
|
||||
return lexer->token; /* the endtag token */
|
||||
node = lexer->token;
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
SPRTF("Returning endtag token '%s'...\n", node->element ? node->element : "<blank>");
|
||||
#endif
|
||||
return node; /* the endtag token */
|
||||
|
||||
case LEX_STARTTAG: /* first letter of tagname */
|
||||
c = TY_(ReadChar)(doc->docIn);
|
||||
|
@ -2471,7 +2532,19 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
|
|||
#ifdef TIDY_STORE_ORIGINAL_TEXT
|
||||
StoreOriginalTextInToken(doc, lexer->token, 0);
|
||||
#endif
|
||||
return lexer->token; /* return start tag */
|
||||
node = lexer->token;
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
SPRTF("Returning starttag token '%s'...\n", node->element ? node->element : "<blank>");
|
||||
if (node->element) {
|
||||
//if (stricmp(node->element,"datalist") == 0) {
|
||||
// check_me(node->element);
|
||||
//} else
|
||||
if (stricmp(node->element,"option") == 0) {
|
||||
check_me(node->element);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return node; /* return start tag */
|
||||
|
||||
case LEX_COMMENT: /* seen <!-- so look for --> */
|
||||
|
||||
|
@ -2509,7 +2582,11 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
|
|||
else
|
||||
TY_(UngetChar)(c, doc->docIn);
|
||||
|
||||
return lexer->token;
|
||||
node = lexer->token;
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
SPRTF("Returning comment token '%s'...\n", node->element ? node->element : "<blank>");
|
||||
#endif
|
||||
return node;
|
||||
}
|
||||
|
||||
/* note position of first such error in the comment */
|
||||
|
@ -2554,7 +2631,11 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
|
|||
/* make a note of the version named by the 1st doctype */
|
||||
if (lexer->doctype == VERS_UNKNOWN && lexer->token && !cfgBool(doc, TidyXmlTags))
|
||||
lexer->doctype = FindGivenVersion(doc, lexer->token);
|
||||
return lexer->token;
|
||||
node = lexer->token;
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
SPRTF("Returning doctype token '%s'...\n", node->element ? node->element : "<blank>");
|
||||
#endif
|
||||
return node;
|
||||
|
||||
case LEX_PROCINSTR: /* seen <? so look for '>' */
|
||||
/* check for PHP preprocessor instructions <?php ... ?> */
|
||||
|
@ -2636,7 +2717,11 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
|
|||
|
||||
lexer->state = LEX_CONTENT;
|
||||
lexer->waswhite = no;
|
||||
return lexer->token;
|
||||
node = lexer->token;
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
SPRTF("Returning procinstr token '%s'...\n", node->element ? node->element : "<blank>");
|
||||
#endif
|
||||
return node;
|
||||
|
||||
case LEX_ASP: /* seen <% so look for "%>" */
|
||||
if (c != '%')
|
||||
|
@ -2657,7 +2742,14 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
|
|||
lexer->lexbuf[lexer->lexsize] = '\0';
|
||||
lexer->state = LEX_CONTENT;
|
||||
lexer->waswhite = no;
|
||||
return lexer->token = AspToken(doc);
|
||||
lexer->token = AspToken(doc);
|
||||
node = lexer->token;
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
SPRTF("Returning ASP token '%s'...\n", node->element ? node->element : "<blank>");
|
||||
#endif
|
||||
return node; /* the endtag token */
|
||||
|
||||
|
||||
|
||||
case LEX_JSTE: /* seen <# so look for "#>" */
|
||||
if (c != '#')
|
||||
|
@ -2678,7 +2770,13 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
|
|||
lexer->lexbuf[lexer->lexsize] = '\0';
|
||||
lexer->state = LEX_CONTENT;
|
||||
lexer->waswhite = no;
|
||||
return lexer->token = JsteToken(doc);
|
||||
lexer->token = JsteToken(doc);
|
||||
node = lexer->token;
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
SPRTF("Returning JSTE token '%s'...\n", node->element ? node->element : "<blank>");
|
||||
#endif
|
||||
return node; /* the JSTE token */
|
||||
|
||||
|
||||
case LEX_PHP: /* seen "<?php" so look for "?>" */
|
||||
if (c != '?')
|
||||
|
@ -2698,7 +2796,12 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
|
|||
lexer->lexbuf[lexer->lexsize] = '\0';
|
||||
lexer->state = LEX_CONTENT;
|
||||
lexer->waswhite = no;
|
||||
return lexer->token = PhpToken(doc);
|
||||
lexer->token = PhpToken(doc);
|
||||
node = lexer->token;
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
SPRTF("Returning PHP token '%s'...\n", node->element ? node->element : "<blank>");
|
||||
#endif
|
||||
return node; /* the PHP token */
|
||||
|
||||
case LEX_XMLDECL: /* seen "<?xml" so look for "?>" */
|
||||
|
||||
|
@ -2728,7 +2831,11 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
|
|||
lexer->waswhite = no;
|
||||
lexer->token = XmlDeclToken(doc);
|
||||
lexer->token->attributes = attributes;
|
||||
return lexer->token;
|
||||
node = lexer->token;
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
SPRTF("Returning xml token '%s'...\n", node->element ? node->element : "<blank>");
|
||||
#endif
|
||||
return node; /* the xml token */
|
||||
}
|
||||
|
||||
av = TY_(NewAttribute)(doc);
|
||||
|
@ -2756,7 +2863,11 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
|
|||
lexer->waswhite = no;
|
||||
lexer->token = XmlDeclToken(doc);
|
||||
lexer->token->attributes = attributes;
|
||||
return lexer->token;
|
||||
node = lexer->token;
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
SPRTF("Returning XML token '%s'...\n", node->element ? node->element : "<blank>");
|
||||
#endif
|
||||
return node; /* the XML token */
|
||||
|
||||
case LEX_SECTION: /* seen "<![" so look for "]>" */
|
||||
if (c == '[')
|
||||
|
@ -2787,7 +2898,12 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
|
|||
lexer->lexbuf[lexer->lexsize] = '\0';
|
||||
lexer->state = LEX_CONTENT;
|
||||
lexer->waswhite = no;
|
||||
return lexer->token = SectionToken(doc);
|
||||
lexer->token = SectionToken(doc);
|
||||
node = lexer->token;
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
SPRTF("Returning SECTION token '%s'...\n", node->element ? node->element : "<blank>");
|
||||
#endif
|
||||
return node; /* the SECTION token */
|
||||
|
||||
case LEX_CDATA: /* seen "<![CDATA[" so look for "]]>" */
|
||||
if (c != ']')
|
||||
|
@ -2817,7 +2933,12 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
|
|||
lexer->lexbuf[lexer->lexsize] = '\0';
|
||||
lexer->state = LEX_CONTENT;
|
||||
lexer->waswhite = no;
|
||||
return lexer->token = CDATAToken(doc);
|
||||
lexer->token = CDATAToken(doc);
|
||||
node = lexer->token;
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
SPRTF("Returning CDATA token '%s'...\n", node->element ? node->element : "<blank>");
|
||||
#endif
|
||||
return node; /* the CDATA token */
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2838,7 +2959,11 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
|
|||
#ifdef TIDY_STORE_ORIGINAL_TEXT
|
||||
StoreOriginalTextInToken(doc, lexer->token, 0); /* ? */
|
||||
#endif
|
||||
return lexer->token;
|
||||
node = lexer->token;
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
SPRTF("Returning textstring token '%s'...\n", node->element ? node->element : "<blank>");
|
||||
#endif
|
||||
return node; /* the textstring token */
|
||||
}
|
||||
}
|
||||
else if (lexer->state == LEX_COMMENT) /* comment */
|
||||
|
@ -2850,9 +2975,17 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
|
|||
lexer->lexbuf[lexer->lexsize] = '\0';
|
||||
lexer->state = LEX_CONTENT;
|
||||
lexer->waswhite = no;
|
||||
return lexer->token = CommentToken(doc);
|
||||
lexer->token = CommentToken(doc);
|
||||
node = lexer->token;
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
SPRTF("Returning COMMENT token '%s'...\n", node->element ? node->element : "<blank>");
|
||||
#endif
|
||||
return node; /* the COMMENT token */
|
||||
}
|
||||
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
SPRTF("Returning NULL...\n");
|
||||
#endif
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
|
|
@ -6,9 +6,6 @@
|
|||
(c) 1998-2008 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
*/
|
||||
|
||||
/*
|
||||
Given an input source, it returns a sequence of tokens.
|
||||
|
||||
GetToken(source) gets the next token
|
||||
|
@ -189,7 +186,7 @@ typedef enum
|
|||
/* special flag */
|
||||
#define VERS_XML 65536u
|
||||
|
||||
/* "HTML5" */
|
||||
/* HTML5 */
|
||||
#define HT50 131072u
|
||||
#define XH50 262144u
|
||||
|
||||
|
@ -202,6 +199,8 @@ typedef enum
|
|||
#define VERS_FRAMESET (H40F|H41F|X10F)
|
||||
#define VERS_XHTML11 (XH11)
|
||||
#define VERS_BASIC (XB10)
|
||||
/* HTML5 */
|
||||
#define VERS_HTML5 (HT50|XH50)
|
||||
|
||||
/* meta symbols */
|
||||
#define VERS_HTML40 (VERS_HTML40_STRICT|VERS_HTML40_LOOSE|VERS_FRAMESET)
|
||||
|
@ -411,6 +410,7 @@ void TY_(ConstrainVersion)( TidyDocImpl* doc, uint vers );
|
|||
Bool TY_(IsWhite)(uint c);
|
||||
Bool TY_(IsDigit)(uint c);
|
||||
Bool TY_(IsLetter)(uint c);
|
||||
Bool TY_(IsHTMLSpace)(uint c);
|
||||
Bool TY_(IsNewline)(uint c);
|
||||
Bool TY_(IsNamechar)(uint c);
|
||||
Bool TY_(IsXMLLetter)(uint c);
|
||||
|
|
112
src/localize.c
112
src/localize.c
|
@ -101,6 +101,8 @@ static struct _msgfmt
|
|||
{ NESTED_QUOTATION, "nested q elements, possible typo." }, /* Warning */
|
||||
{ OBSOLETE_ELEMENT, "replacing obsolete element %s by %s" }, /* Warning */
|
||||
{ COERCE_TO_ENDTAG_WARN, "<%s> is probably intended as </%s>" }, /* Warning */
|
||||
/* HTML5 */
|
||||
{ REMOVED_HTML5, "%s element removed from HTML5" }, /* Warning */
|
||||
|
||||
/* ReportNotice */
|
||||
{ TRIM_EMPTY_ELEMENT, "trimming empty %s" }, /* Notice */
|
||||
|
@ -320,7 +322,7 @@ static const TidyOptionId TidyIndentContentLinks[] =
|
|||
static const TidyOptionId TidyIndentSpacesLinks[] =
|
||||
{ TidyIndentContent, TidyUnknownOption };
|
||||
static const TidyOptionId TidyWrapAttValsLinks[] =
|
||||
{ TidyWrapScriptlets, TidyUnknownOption };
|
||||
{ TidyWrapScriptlets, TidyLiteralAttribs, TidyUnknownOption };
|
||||
static const TidyOptionId TidyWrapScriptletsLinks[] =
|
||||
{ TidyWrapAttVals, TidyUnknownOption };
|
||||
static const TidyOptionId TidyCharEncodingLinks[] =
|
||||
|
@ -353,6 +355,8 @@ static const TidyOptionId TidyDropFontTagsLinks[] =
|
|||
{ TidyMakeClean, TidyUnknownOption };
|
||||
static const TidyOptionId TidyMakeCleanTagsLinks[] =
|
||||
{ TidyDropFontTags, TidyUnknownOption };
|
||||
static const TidyOptionId TidyGDocCleanLinks[] =
|
||||
{ TidyMakeClean, TidyUnknownOption };
|
||||
|
||||
/* Documentation of options */
|
||||
static const TidyOptionDoc option_docs[] =
|
||||
|
@ -399,14 +403,24 @@ static const TidyOptionDoc option_docs[] =
|
|||
"on the HTML saved by Microsoft Office products. "
|
||||
, TidyMakeCleanTagsLinks
|
||||
},
|
||||
{TidyGDocClean,
|
||||
"This option specifies if Tidy "
|
||||
"should enable specific behavior for cleaning up HTML exported from "
|
||||
"Google Docs. "
|
||||
, TidyMakeCleanTagsLinks
|
||||
},
|
||||
{TidyDoctype,
|
||||
"This option specifies the DOCTYPE declaration generated by Tidy. If set "
|
||||
"to \"omit\" the output won't contain a DOCTYPE declaration. If set to "
|
||||
"\"auto\" (the default) Tidy will use an educated guess based upon the "
|
||||
"contents of the document. If set to \"strict\", Tidy will set the DOCTYPE "
|
||||
"to the strict DTD. If set to \"loose\", the DOCTYPE is set to the loose "
|
||||
"(transitional) DTD. Alternatively, you can supply a string for the formal "
|
||||
"public identifier (FPI).<br />"
|
||||
"This option specifies the DOCTYPE declaration generated by Tidy.<br />"
|
||||
"If set to \"omit\" the output won't contain a DOCTYPE declaration.<br />"
|
||||
"If set to \"html5\" the DOCTYPE is set to \"<!DOCTYPE html>\".<br />"
|
||||
"If set to \"auto\" (the default) Tidy will use an educated guess based "
|
||||
"upon the contents of the document.<br />"
|
||||
"If set to \"strict\", Tidy will set the DOCTYPE to the HTML4 or XHTML1 "
|
||||
"strict DTD.<br />"
|
||||
"If set to \"loose\", the DOCTYPE is set to the HTML4 or XHTML1 loose "
|
||||
"(transitional) DTD. <br />"
|
||||
"Alternatively, you can supply a string for the formal public identifier "
|
||||
"(FPI).<br />"
|
||||
"<br />"
|
||||
"For example: <br />"
|
||||
"doctype: \"-//ACME//DTD HTML 3.14159//EN\"<br />"
|
||||
|
@ -419,6 +433,9 @@ static const TidyOptionDoc option_docs[] =
|
|||
"<code>--numeric-entities yes</code>. This option does not offer a "
|
||||
"validation of the document conformance. "
|
||||
},
|
||||
{TidyDropEmptyElems,
|
||||
"This option specifies if Tidy should discard empty elements. "
|
||||
},
|
||||
{TidyDropEmptyParas,
|
||||
"This option specifies if Tidy should discard empty paragraphs. "
|
||||
},
|
||||
|
@ -460,10 +477,22 @@ static const TidyOptionDoc option_docs[] =
|
|||
{TidyHideComments,
|
||||
"This option specifies if Tidy should print out comments. "
|
||||
},
|
||||
{TidyCoerceEndTags,
|
||||
"This option specifies if Tidy should coerce a start tag into an end tag "
|
||||
"in cases where it looks like an end tag was probably intended; "
|
||||
"for example, given <span>foo <b>bar<b> baz</span>, "
|
||||
"Tidy will output <span>foo <b>bar</b> baz</span>. "
|
||||
},
|
||||
{TidyOmitOptionalTags,
|
||||
"This option specifies if Tidy should omit optional start tags and end tags "
|
||||
"when generating output. Setting this option causes all tags for the "
|
||||
"html, head, and body elements to be omitted from output, as well as such "
|
||||
"end tags as </p>, </li>, </dt>, </dd>, "
|
||||
"</option>, </tr>, </td>, and </th>. "
|
||||
"This option is ignored for XML output. "
|
||||
},
|
||||
{TidyHideEndTags,
|
||||
"This option specifies if Tidy should omit optional end-tags when "
|
||||
"generating the pretty printed markup. This option is ignored if you are "
|
||||
"outputting to XML. "
|
||||
"This option is an alias for omit-optional-tags. "
|
||||
},
|
||||
{TidyIndentCdata,
|
||||
"This option specifies if Tidy should indent <![CDATA[]]> sections. "
|
||||
|
@ -494,6 +523,12 @@ static const TidyOptionDoc option_docs[] =
|
|||
"that takes a list of predefined values to lower case. This is required "
|
||||
"for XHTML documents. "
|
||||
},
|
||||
{TidyMergeEmphasis,
|
||||
"This option specifies if Tidy should merge nested <b> and <i> "
|
||||
"elements; for example, for the case "
|
||||
"<b class=\"rtop-2\">foo <b class=\"r2-2\">bar</b> baz</b>, "
|
||||
"Tidy will output <b class=\"rtop-2\">foo bar baz</b>. "
|
||||
},
|
||||
{TidyMergeDivs,
|
||||
"Can be used to modify behavior of -c (--clean yes) option. "
|
||||
"This option specifies if Tidy should merge nested <div> such as "
|
||||
|
@ -644,6 +679,9 @@ static const TidyOptionDoc option_docs[] =
|
|||
"This option specifies the number Tidy uses to determine if further errors "
|
||||
"should be shown. If set to 0, then no errors are shown. "
|
||||
},
|
||||
{TidyShowInfo,
|
||||
"This option specifies if Tidy should display info-level messages. "
|
||||
},
|
||||
{TidyShowWarnings,
|
||||
"This option specifies if Tidy should suppress warnings. This can be "
|
||||
"useful when a few errors are hidden in a flurry of warnings. "
|
||||
|
@ -670,8 +708,14 @@ static const TidyOptionDoc option_docs[] =
|
|||
,TidyIndentSpacesLinks
|
||||
},
|
||||
{TidyLiteralAttribs,
|
||||
"This option specifies if Tidy should ensure that whitespace characters "
|
||||
"within attribute values are passed through unchanged. "
|
||||
"This option specifies how Tidy deals with whitespace characters within "
|
||||
"attribute values. If the value is \"no\" (the default), Tidy \"munges\" "
|
||||
"or \"normalizes\" attribute values by replacing any newline or tab "
|
||||
"character with a single space character, and further by replacing "
|
||||
"any sequences of multiple whitespace characters with a single space. "
|
||||
"To force tidy to preserve the original, literal values of all attributes, "
|
||||
"and ensure that whitespace characters within attribute values are passed "
|
||||
"through unchanged, set this option to \"yes\". "
|
||||
},
|
||||
{TidyShowMarkup,
|
||||
"This option specifies if Tidy should generate a pretty printed version "
|
||||
|
@ -706,9 +750,18 @@ static const TidyOptionDoc option_docs[] =
|
|||
"pseudo elements, which look like: <% ... %>. "
|
||||
},
|
||||
{TidyWrapAttVals,
|
||||
"This option specifies if Tidy should line wrap attribute values, for "
|
||||
"easier editing. This option can be set independently of "
|
||||
"wrap-script-literals. "
|
||||
"This option specifies if Tidy should line-wrap attribute values, for "
|
||||
"easier editing. Line wrapping means that if the value of an attribute "
|
||||
"causes a line to exceed the width specified by the \"wrap\" option, "
|
||||
"tidy will add one or more line breaks to the value, causing it to "
|
||||
"wrapped into multiple lines. Note that this option can be set "
|
||||
"independently of wrap-script-literals. Also note that by default, Tidy "
|
||||
"\"munges\" or \"normalizes\" attribute values by replacing any newline "
|
||||
"or tab character with a single space character, and further by replacing "
|
||||
"any sequences of multiple whitespace characters with a single space. "
|
||||
"To force Tidy to preserve the original, literal values of all attributes, "
|
||||
"and ensure that whitespace characters within attribute values are passed "
|
||||
"through unchanged, set the literal-attributes option to \"yes\". "
|
||||
,TidyWrapAttValsLinks
|
||||
},
|
||||
{TidyWrapJste,
|
||||
|
@ -1047,6 +1100,7 @@ __attribute__((format(printf, 2, 3)))
|
|||
void message( TidyDocImpl* doc, TidyReportLevel level, ctmbstr msg, ... )
|
||||
{
|
||||
va_list args;
|
||||
if (level == TidyInfo && !cfgBool(doc, TidyShowInfo)) return;
|
||||
va_start( args, msg );
|
||||
messagePos( doc, level, 0, 0, msg, args );
|
||||
va_end( args );
|
||||
|
@ -1367,14 +1421,14 @@ void TY_(ReportAccessWarning)( TidyDocImpl* doc, Node* node, uint code )
|
|||
{
|
||||
ctmbstr fmt = GetFormatFromCode(code);
|
||||
doc->badAccess |= BA_WAI;
|
||||
messageNode( doc, TidyAccess, node, fmt );
|
||||
messageNode( doc, TidyAccess, node, "%s", fmt );
|
||||
}
|
||||
|
||||
void TY_(ReportAccessError)( TidyDocImpl* doc, Node* node, uint code )
|
||||
{
|
||||
ctmbstr fmt = GetFormatFromCode(code);
|
||||
doc->badAccess |= BA_WAI;
|
||||
messageNode( doc, TidyAccess, node, fmt );
|
||||
messageNode( doc, TidyAccess, node, "%s", fmt );
|
||||
}
|
||||
|
||||
#endif /* SUPPORT_ACCESSIBILITY_CHECKS */
|
||||
|
@ -1393,7 +1447,7 @@ void TY_(ReportWarning)(TidyDocImpl* doc, Node *element, Node *node, uint code)
|
|||
switch (code)
|
||||
{
|
||||
case NESTED_QUOTATION:
|
||||
messageNode(doc, TidyWarning, rpt, fmt);
|
||||
messageNode(doc, TidyWarning, rpt, "%s", fmt);
|
||||
break;
|
||||
|
||||
case OBSOLETE_ELEMENT:
|
||||
|
@ -1401,6 +1455,7 @@ void TY_(ReportWarning)(TidyDocImpl* doc, Node *element, Node *node, uint code)
|
|||
messageNode(doc, TidyWarning, rpt, fmt, elemdesc, nodedesc);
|
||||
break;
|
||||
|
||||
case REMOVED_HTML5:
|
||||
case NESTED_EMPHASIS:
|
||||
messageNode(doc, TidyWarning, rpt, fmt, nodedesc);
|
||||
break;
|
||||
|
@ -1474,7 +1529,7 @@ void TY_(ReportError)(TidyDocImpl* doc, Node *element, Node *node, uint code)
|
|||
case INCONSISTENT_NAMESPACE:
|
||||
case DOCTYPE_AFTER_TAGS:
|
||||
case DTYPE_NOT_UPPER_CASE:
|
||||
messageNode(doc, TidyWarning, rpt, fmt);
|
||||
messageNode(doc, TidyWarning, rpt, "%s", fmt);
|
||||
break;
|
||||
|
||||
case COERCE_TO_ENDTAG:
|
||||
|
@ -1493,7 +1548,7 @@ void TY_(ReportError)(TidyDocImpl* doc, Node *element, Node *node, uint code)
|
|||
case ENCODING_IO_CONFLICT:
|
||||
case MISSING_DOCTYPE:
|
||||
case SPACE_PRECEDING_XMLDECL:
|
||||
messageNode(doc, TidyWarning, node, fmt);
|
||||
messageNode(doc, TidyWarning, node, "%s", fmt);
|
||||
break;
|
||||
|
||||
case TRIM_EMPTY_ELEMENT:
|
||||
|
@ -1542,7 +1597,7 @@ void TY_(ReportFatal)( TidyDocImpl* doc, Node *element, Node *node, uint code)
|
|||
{
|
||||
case SUSPECTED_MISSING_QUOTE:
|
||||
case DUPLICATE_FRAMESET:
|
||||
messageNode(doc, TidyError, rpt, fmt);
|
||||
messageNode(doc, TidyError, rpt, "%s", fmt);
|
||||
break;
|
||||
|
||||
case UNKNOWN_ELEMENT:
|
||||
|
@ -1775,11 +1830,14 @@ void TY_(NeedsAuthorIntervention)( TidyDocImpl* doc )
|
|||
|
||||
void TY_(GeneralInfo)( TidyDocImpl* doc )
|
||||
{
|
||||
tidy_out(doc, "To learn more about HTML Tidy see http://tidy.sourceforge.net\n");
|
||||
tidy_out(doc, "Please fill bug reports and queries using the \"tracker\" on the Tidy web site.\n");
|
||||
tidy_out(doc, "Additionally, questions can be sent to html-tidy@w3.org\n");
|
||||
tidy_out(doc, "HTML and CSS specifications are available from http://www.w3.org/\n");
|
||||
tidy_out(doc, "Lobby your company to join W3C, see http://www.w3.org/Consortium\n");
|
||||
if (!cfgBool(doc, TidyShowInfo)) return;
|
||||
tidy_out(doc, "About this fork of Tidy: http://w3c.github.com/tidy-html5/\n");
|
||||
tidy_out(doc, "Bug reports and comments: https://github.com/w3c/tidy-html5/issues/\n");
|
||||
tidy_out(doc, "Or send questions and comments to html-tidy@w3.org\n");
|
||||
tidy_out(doc, "Latest HTML specification: http://dev.w3.org/html5/spec-author-view/\n");
|
||||
tidy_out(doc, "HTML language reference: http://dev.w3.org/html5/markup/\n");
|
||||
tidy_out(doc, "Validate your HTML5 documents: http://validator.w3.org/nu/\n");
|
||||
tidy_out(doc, "Lobby your company to join the W3C: http://www.w3.org/Consortium\n");
|
||||
}
|
||||
|
||||
#if SUPPORT_ACCESSIBILITY_CHECKS
|
||||
|
|
|
@ -154,7 +154,9 @@ void TY_(ReportFatal)(TidyDocImpl* doc, Node* element, Node* node, uint code);
|
|||
#define MISSING_ATTRIBUTE 86
|
||||
#define WHITE_IN_URI 87
|
||||
|
||||
#define PREVIOUS_LOCATION 88 /* last */
|
||||
#define REMOVED_HTML5 88 /* this element removed from HTML5 */
|
||||
|
||||
#define PREVIOUS_LOCATION 89 /* last */
|
||||
|
||||
/* character encoding errors */
|
||||
|
||||
|
|
260
src/parser.c
260
src/parser.c
|
@ -12,6 +12,13 @@
|
|||
#include "clean.h"
|
||||
#include "tags.h"
|
||||
#include "tmbstr.h"
|
||||
#ifdef _MSC_VER
|
||||
#include "sprtf.h"
|
||||
#endif
|
||||
|
||||
#ifndef SPRTF
|
||||
#define SPRTF printf
|
||||
#endif
|
||||
|
||||
#ifdef AUTO_INPUT_ENCODING
|
||||
#include "charsets.h"
|
||||
|
@ -234,6 +241,9 @@ void TY_(InsertNodeAfterElement)(Node *element, Node *node)
|
|||
|
||||
static Bool CanPrune( TidyDocImpl* doc, Node *element )
|
||||
{
|
||||
if ( !cfgBool(doc, TidyDropEmptyElems) )
|
||||
return no;
|
||||
|
||||
if ( TY_(nodeIsText)(element) )
|
||||
return yes;
|
||||
|
||||
|
@ -278,6 +288,13 @@ static Bool CanPrune( TidyDocImpl* doc, Node *element )
|
|||
if (nodeIsTEXTAREA(element))
|
||||
return no;
|
||||
|
||||
/* fix for ISSUE #7 https://github.com/w3c/tidy-html5/issues/7 */
|
||||
if (nodeIsCANVAS(element))
|
||||
return no;
|
||||
|
||||
if (nodeIsPROGRESS(element))
|
||||
return no;
|
||||
|
||||
if ( attrGetID(element) || attrGetNAME(element) )
|
||||
return no;
|
||||
|
||||
|
@ -296,6 +313,10 @@ static Bool CanPrune( TidyDocImpl* doc, Node *element )
|
|||
if (nodeIsCOLGROUP(element))
|
||||
return no;
|
||||
|
||||
/* HTML5 - do NOT drop empty option if it has attributes */
|
||||
if ( nodeIsOPTION(element) && element->attributes != NULL )
|
||||
return no;
|
||||
|
||||
return yes;
|
||||
}
|
||||
|
||||
|
@ -811,13 +832,25 @@ static void AddClassNoIndent( TidyDocImpl* doc, Node *node )
|
|||
*/
|
||||
void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
|
||||
{
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
static int in_parse_block = 0;
|
||||
#endif
|
||||
Lexer* lexer = doc->lexer;
|
||||
Node *node;
|
||||
Bool checkstack = yes;
|
||||
uint istackbase = 0;
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
in_parse_block++;
|
||||
SPRTF("Entering ParseBlock %d...\n",in_parse_block);
|
||||
#endif
|
||||
|
||||
if ( element->tag->model & CM_EMPTY )
|
||||
if ( element->tag->model & CM_EMPTY ) {
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
in_parse_block--;
|
||||
SPRTF("Exit ParseBlockL 1 %d...\n",in_parse_block);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
if ( nodeIsFORM(element) &&
|
||||
DescendantOf(element, TidyTag_FORM) )
|
||||
|
@ -860,6 +893,10 @@ void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
|
|||
|
||||
element->closed = yes;
|
||||
TrimSpaces( doc, element );
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
in_parse_block--;
|
||||
SPRTF("Exit ParseBlock 2 %d...\n",in_parse_block);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -951,6 +988,10 @@ void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
|
|||
{
|
||||
TY_(UngetToken)( doc );
|
||||
TrimSpaces( doc, element );
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
in_parse_block--;
|
||||
SPRTF("Exit ParseBlock 2 %d...\n",in_parse_block);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -1111,6 +1152,10 @@ void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
|
|||
{
|
||||
TY_(UngetToken)( doc );
|
||||
TrimSpaces( doc, element );
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
in_parse_block--;
|
||||
SPRTF("Exit ParseBlock 3 %d...\n",in_parse_block);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -1127,6 +1172,10 @@ void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
|
|||
lexer->istackbase = istackbase;
|
||||
|
||||
TrimSpaces( doc, element );
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
in_parse_block--;
|
||||
SPRTF("Exit ParseBlock 4 %d...\n",in_parse_block);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -1177,6 +1226,10 @@ void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
|
|||
element->parent->tag->parser == TY_(ParseList) )
|
||||
{
|
||||
TrimSpaces( doc, element );
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
in_parse_block--;
|
||||
SPRTF("Exit ParseBlock 5 %d...\n",in_parse_block);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1188,6 +1241,10 @@ void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
|
|||
if ( nodeIsDL(element->parent) )
|
||||
{
|
||||
TrimSpaces( doc, element );
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
in_parse_block--;
|
||||
SPRTF("Exit ParseBlock 6 %d...\n",in_parse_block);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1198,8 +1255,13 @@ void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
|
|||
/* http://tidy.sf.net/issue/1316307 */
|
||||
/* In exiled mode, return so table processing can
|
||||
continue. */
|
||||
if (lexer->exiled)
|
||||
if (lexer->exiled) {
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
in_parse_block--;
|
||||
SPRTF("Exit ParseBlock 7 %d...\n",in_parse_block);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
node = TY_(InferredTag)(doc, TidyTag_TABLE);
|
||||
}
|
||||
else if ( TY_(nodeHasCM)(element, CM_OBJECT) )
|
||||
|
@ -1209,12 +1271,20 @@ void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
|
|||
TY_(PopInline)( doc, NULL );
|
||||
lexer->istackbase = istackbase;
|
||||
TrimSpaces( doc, element );
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
in_parse_block--;
|
||||
SPRTF("Exit ParseBlock 8 %d...\n",in_parse_block);
|
||||
#endif
|
||||
return;
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
TrimSpaces( doc, element );
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
in_parse_block--;
|
||||
SPRTF("Exit ParseBlock 9 %d...\n",in_parse_block);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -1278,15 +1348,31 @@ void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
|
|||
}
|
||||
|
||||
TrimSpaces( doc, element );
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
in_parse_block--;
|
||||
SPRTF("Exit ParseBlock 10 %d...\n",in_parse_block);
|
||||
#endif
|
||||
}
|
||||
|
||||
void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
|
||||
{
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
static int in_parse_inline = 0;
|
||||
#endif
|
||||
Lexer* lexer = doc->lexer;
|
||||
Node *node, *parent;
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
in_parse_inline++;
|
||||
SPRTF("Entering ParseInline %d...\n",in_parse_inline);
|
||||
#endif
|
||||
|
||||
if (element->tag->model & CM_EMPTY)
|
||||
if (element->tag->model & CM_EMPTY) {
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
in_parse_inline--;
|
||||
SPRTF("Exit ParseInline 1 %d...\n",in_parse_inline);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
ParseInline is used for some block level elements like H1 to H6
|
||||
|
@ -1363,6 +1449,10 @@ void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
|
|||
|
||||
element->closed = yes;
|
||||
TrimSpaces( doc, element );
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
in_parse_inline--;
|
||||
SPRTF("Exit ParseInline 2 %d...\n",in_parse_inline);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1384,6 +1474,7 @@ void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
|
|||
&& !nodeIsSUP(node)
|
||||
&& !nodeIsQ(node)
|
||||
&& !nodeIsSPAN(node)
|
||||
&& cfgBool(doc, TidyCoerceEndTags)
|
||||
)
|
||||
{
|
||||
/* proceeds only if "node" does not have any attribute and
|
||||
|
@ -1442,7 +1533,10 @@ void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
|
|||
|
||||
if (!(mode & Preformatted))
|
||||
TrimSpaces(doc, element);
|
||||
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
in_parse_inline--;
|
||||
SPRTF("Exit ParseInline 3 %d...\n",in_parse_inline);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1529,6 +1623,10 @@ void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
|
|||
TY_(InlineDup1)( doc, NULL, element ); /* dupe the <i>, after </b> */
|
||||
if (!(mode & Preformatted))
|
||||
TrimSpaces( doc, element );
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
in_parse_inline--;
|
||||
SPRTF("Exit ParseInline 4 %d...\n",in_parse_inline);
|
||||
#endif
|
||||
return; /* close <i>, but will re-open it, after </b> */
|
||||
}
|
||||
}
|
||||
|
@ -1549,7 +1647,10 @@ void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
|
|||
|
||||
if (!(mode & Preformatted))
|
||||
TrimSpaces(doc, element);
|
||||
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
in_parse_inline--;
|
||||
SPRTF("Exit ParseInline 5 %d...\n",in_parse_inline);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1563,6 +1664,10 @@ void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
|
|||
{
|
||||
TY_(UngetToken)( doc );
|
||||
TrimSpaces(doc, element);
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
in_parse_inline--;
|
||||
SPRTF("Exit ParseInline 6 %d...\n",in_parse_inline);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -1585,6 +1690,10 @@ void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
|
|||
if (!(mode & Preformatted))
|
||||
TrimSpaces(doc, element);
|
||||
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
in_parse_inline--;
|
||||
SPRTF("Exit ParseInline 7 %d...\n",in_parse_inline);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1601,7 +1710,8 @@ void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
|
|||
/* #427827 - fix by Randy Waki and Bjoern Hoehrmann 23 Aug 00 */
|
||||
/* other fixes by Dave Raggett */
|
||||
/* if (node->attributes == NULL) */
|
||||
if (node->type != EndTag && node->attributes == NULL)
|
||||
if (node->type != EndTag && node->attributes == NULL
|
||||
&& cfgBool(doc, TidyCoerceEndTags) )
|
||||
{
|
||||
node->type = EndTag;
|
||||
TY_(ReportError)(doc, element, node, COERCE_TO_ENDTAG);
|
||||
|
@ -1617,6 +1727,10 @@ void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
|
|||
if (!(mode & Preformatted))
|
||||
TrimSpaces(doc, element);
|
||||
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
in_parse_inline--;
|
||||
SPRTF("Exit ParseInline 8 %d...\n",in_parse_inline);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1739,6 +1853,10 @@ void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
|
|||
if (!(mode & Preformatted))
|
||||
TrimSpaces(doc, element);
|
||||
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
in_parse_inline--;
|
||||
SPRTF("Exit ParseInline 9 %d...\n",in_parse_inline);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -1754,7 +1872,10 @@ void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
|
|||
TY_(FreeNode)( doc, node);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* HTML5 */
|
||||
if (nodeIsDATALIST(element)) {
|
||||
TY_(ConstrainVersion)( doc, ~VERS_HTML5 );
|
||||
} else
|
||||
if (!(element->tag->model & CM_OPT))
|
||||
TY_(ReportError)(doc, element, node, MISSING_ENDTAG_BEFORE);
|
||||
|
||||
|
@ -1776,6 +1897,10 @@ void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
|
|||
{
|
||||
TY_(DiscardElement)( doc, element );
|
||||
TY_(UngetToken)( doc );
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
in_parse_inline--;
|
||||
SPRTF("Exit ParseInline 10 %d...\n",in_parse_inline);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -1785,6 +1910,10 @@ void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
|
|||
if (!(mode & Preformatted))
|
||||
TrimSpaces(doc, element);
|
||||
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
in_parse_inline--;
|
||||
SPRTF("Exit ParseInline 11 %d...\n",in_parse_inline);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1812,6 +1941,10 @@ void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
|
|||
if (!(element->tag->model & CM_OPT))
|
||||
TY_(ReportError)(doc, element, node, MISSING_ENDTAG_FOR);
|
||||
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
in_parse_inline--;
|
||||
SPRTF("Exit ParseInline 12 %d...\n",in_parse_inline);
|
||||
#endif
|
||||
}
|
||||
|
||||
void TY_(ParseEmpty)(TidyDocImpl* doc, Node *element, GetTokenMode mode)
|
||||
|
@ -1824,7 +1957,7 @@ void TY_(ParseEmpty)(TidyDocImpl* doc, Node *element, GetTokenMode mode)
|
|||
{
|
||||
if ( !(node->type == EndTag && node->tag == element->tag) )
|
||||
{
|
||||
TY_(ReportError)(doc, element, node, ELEMENT_NOT_EMPTY);
|
||||
/* TY_(ReportError)(doc, element, node, ELEMENT_NOT_EMPTY); */
|
||||
TY_(UngetToken)( doc );
|
||||
}
|
||||
else
|
||||
|
@ -2895,10 +3028,17 @@ void TY_(ParseOptGroup)(TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(m
|
|||
|
||||
void TY_(ParseSelect)(TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(mode))
|
||||
{
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
static int in_parse_select = 0;
|
||||
#endif
|
||||
Lexer* lexer = doc->lexer;
|
||||
Node *node;
|
||||
|
||||
lexer->insert = NULL; /* defer implicit inline start tags */
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
in_parse_select++;
|
||||
SPRTF("Entering ParseSelect %d...\n",in_parse_select);
|
||||
#endif
|
||||
|
||||
while ((node = TY_(GetToken)(doc, IgnoreWhitespace)) != NULL)
|
||||
{
|
||||
|
@ -2907,6 +3047,10 @@ void TY_(ParseSelect)(TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(mod
|
|||
TY_(FreeNode)( doc, node);
|
||||
field->closed = yes;
|
||||
TrimSpaces(doc, field);
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
in_parse_select--;
|
||||
SPRTF("Exit ParseSelect 1 %d...\n",in_parse_select);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -2917,6 +3061,7 @@ void TY_(ParseSelect)(TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(mod
|
|||
if ( node->type == StartTag &&
|
||||
( nodeIsOPTION(node) ||
|
||||
nodeIsOPTGROUP(node) ||
|
||||
nodeIsDATALIST(node) ||
|
||||
nodeIsSCRIPT(node))
|
||||
)
|
||||
{
|
||||
|
@ -2931,8 +3076,72 @@ void TY_(ParseSelect)(TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(mod
|
|||
}
|
||||
|
||||
TY_(ReportError)(doc, field, node, MISSING_ENDTAG_FOR);
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
in_parse_select--;
|
||||
SPRTF("Exit ParseSelect 2 %d...\n",in_parse_select);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* HTML5 */
|
||||
void TY_(ParseDatalist)(TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(mode))
|
||||
{
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
static int in_parse_datalist = 0;
|
||||
#endif
|
||||
Lexer* lexer = doc->lexer;
|
||||
Node *node;
|
||||
|
||||
lexer->insert = NULL; /* defer implicit inline start tags */
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
in_parse_datalist++;
|
||||
SPRTF("Entering ParseDatalist %d...\n",in_parse_datalist);
|
||||
#endif
|
||||
|
||||
while ((node = TY_(GetToken)(doc, IgnoreWhitespace)) != NULL)
|
||||
{
|
||||
if (node->tag == field->tag && node->type == EndTag)
|
||||
{
|
||||
TY_(FreeNode)( doc, node);
|
||||
field->closed = yes;
|
||||
TrimSpaces(doc, field);
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
in_parse_datalist--;
|
||||
SPRTF("Exit ParseDatalist 1 %d...\n",in_parse_datalist);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
/* deal with comments etc. */
|
||||
if (InsertMisc(field, node))
|
||||
continue;
|
||||
|
||||
if ( node->type == StartTag &&
|
||||
( nodeIsOPTION(node) ||
|
||||
nodeIsOPTGROUP(node) ||
|
||||
nodeIsDATALIST(node) ||
|
||||
nodeIsSCRIPT(node))
|
||||
)
|
||||
{
|
||||
TY_(InsertNodeAtEnd)(field, node);
|
||||
ParseTag(doc, node, IgnoreWhitespace);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* discard unexpected tags */
|
||||
TY_(ReportError)(doc, field, node, DISCARDING_UNEXPECTED);
|
||||
TY_(FreeNode)( doc, node);
|
||||
}
|
||||
|
||||
TY_(ReportError)(doc, field, node, MISSING_ENDTAG_FOR);
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
in_parse_datalist--;
|
||||
SPRTF("Exit ParseDatalist 2 %d...\n",in_parse_datalist);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void TY_(ParseText)(TidyDocImpl* doc, Node *field, GetTokenMode mode)
|
||||
{
|
||||
Lexer* lexer = doc->lexer;
|
||||
|
@ -3006,7 +3215,8 @@ void TY_(ParseTitle)(TidyDocImpl* doc, Node *title, GetTokenMode ARG_UNUSED(mode
|
|||
Node *node;
|
||||
while ((node = TY_(GetToken)(doc, MixedContent)) != NULL)
|
||||
{
|
||||
if (node->tag == title->tag && node->type == StartTag)
|
||||
if (node->tag == title->tag && node->type == StartTag
|
||||
&& cfgBool(doc, TidyCoerceEndTags) )
|
||||
{
|
||||
TY_(ReportError)(doc, title, node, COERCE_TO_ENDTAG);
|
||||
node->type = EndTag;
|
||||
|
@ -3129,6 +3339,9 @@ void TY_(ParseHead)(TidyDocImpl* doc, Node *head, GetTokenMode ARG_UNUSED(mode))
|
|||
int HasTitle = 0;
|
||||
int HasBase = 0;
|
||||
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
SPRTF("Enter ParseHead...\n");
|
||||
#endif
|
||||
while ((node = TY_(GetToken)(doc, IgnoreWhitespace)) != NULL)
|
||||
{
|
||||
if (node->tag == head->tag && node->type == EndTag)
|
||||
|
@ -3214,10 +3427,6 @@ void TY_(ParseHead)(TidyDocImpl* doc, Node *head, GetTokenMode ARG_UNUSED(mode))
|
|||
head ?
|
||||
TOO_MANY_ELEMENTS_IN : TOO_MANY_ELEMENTS);
|
||||
}
|
||||
else if ( nodeIsNOSCRIPT(node) )
|
||||
{
|
||||
TY_(ReportError)(doc, head, node, TAG_NOT_ALLOWED_IN);
|
||||
}
|
||||
|
||||
#ifdef AUTO_INPUT_ENCODING
|
||||
else if (nodeIsMETA(node))
|
||||
|
@ -3271,6 +3480,9 @@ void TY_(ParseHead)(TidyDocImpl* doc, Node *head, GetTokenMode ARG_UNUSED(mode))
|
|||
TY_(ReportError)(doc, head, node, DISCARDING_UNEXPECTED);
|
||||
TY_(FreeNode)( doc, node);
|
||||
}
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
SPRTF("Exit ParseHead 1...\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
void TY_(ParseBody)(TidyDocImpl* doc, Node *body, GetTokenMode mode)
|
||||
|
@ -3283,6 +3495,9 @@ void TY_(ParseBody)(TidyDocImpl* doc, Node *body, GetTokenMode mode)
|
|||
checkstack = yes;
|
||||
|
||||
TY_(BumpObject)( doc, body->parent );
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
SPRTF("Enter ParseBody...\n");
|
||||
#endif
|
||||
|
||||
while ((node = TY_(GetToken)(doc, mode)) != NULL)
|
||||
{
|
||||
|
@ -3510,7 +3725,7 @@ void TY_(ParseBody)(TidyDocImpl* doc, Node *body, GetTokenMode mode)
|
|||
|
||||
if (TY_(nodeIsElement)(node))
|
||||
{
|
||||
if ( TY_(nodeHasCM)(node, CM_INLINE) && !TY_(nodeHasCM)(node, CM_MIXED) )
|
||||
if ( TY_(nodeHasCM)(node, CM_INLINE) )
|
||||
{
|
||||
/* HTML4 strict doesn't allow inline content here */
|
||||
/* but HTML2 does allow img elements as children of body */
|
||||
|
@ -3547,6 +3762,9 @@ void TY_(ParseBody)(TidyDocImpl* doc, Node *body, GetTokenMode mode)
|
|||
TY_(ReportError)(doc, body, node, DISCARDING_UNEXPECTED);
|
||||
TY_(FreeNode)( doc, node);
|
||||
}
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
SPRTF("Exit ParseBody 1...\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
void TY_(ParseNoFrames)(TidyDocImpl* doc, Node *noframes, GetTokenMode mode)
|
||||
|
@ -3735,6 +3953,9 @@ void TY_(ParseHTML)(TidyDocImpl* doc, Node *html, GetTokenMode mode)
|
|||
Node *frameset = NULL;
|
||||
Node *noframes = NULL;
|
||||
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
SPRTF("Entering ParseHTML...\n");
|
||||
#endif
|
||||
TY_(SetOptionBool)( doc, TidyXmlTags, no );
|
||||
|
||||
for (;;)
|
||||
|
@ -3790,7 +4011,9 @@ void TY_(ParseHTML)(TidyDocImpl* doc, Node *html, GetTokenMode mode)
|
|||
TY_(InsertNodeAtEnd)(html, node);
|
||||
TY_(ParseBody)(doc, node, mode);
|
||||
}
|
||||
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
SPRTF("Exit ParseHTML 1...\n");
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -3956,6 +4179,9 @@ void TY_(ParseHTML)(TidyDocImpl* doc, Node *html, GetTokenMode mode)
|
|||
|
||||
TY_(InsertNodeAtEnd)(html, node);
|
||||
ParseTag(doc, node, mode);
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
SPRTF("Exit ParseHTML 2...\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
static Bool nodeCMIsOnlyInline( Node* node )
|
||||
|
@ -4048,7 +4274,9 @@ static void ReplaceObsoleteElements(TidyDocImpl* doc, Node* node)
|
|||
{
|
||||
next = node->next;
|
||||
|
||||
if (nodeIsDIR(node) || nodeIsMENU(node))
|
||||
/* if (nodeIsDIR(node) || nodeIsMENU(node)) */
|
||||
/* HTML5 - <menu ... > is no longer obsolete */
|
||||
if (nodeIsDIR(node))
|
||||
TY_(CoerceNode)(doc, node, TidyTag_UL, yes, yes);
|
||||
|
||||
if (nodeIsXMP(node) || nodeIsLISTING(node) ||
|
||||
|
|
|
@ -1152,7 +1152,7 @@ static void PPrintAttribute( TidyDocImpl* doc, uint indent,
|
|||
{
|
||||
if ( TY_(IsScript)(doc, name) )
|
||||
wrappable = cfgBool( doc, TidyWrapScriptlets );
|
||||
else if (!(attrIsCONTENT(attr) || attrIsVALUE(attr) || attrIsALT(attr)) && wrapAttrs )
|
||||
else if (!(attrIsCONTENT(attr) || attrIsVALUE(attr) || attrIsALT(attr) || attrIsTITLE(attr)) && wrapAttrs )
|
||||
wrappable = yes;
|
||||
}
|
||||
|
||||
|
@ -2083,7 +2083,8 @@ void TY_(PPrintTree)( TidyDocImpl* doc, uint mode, uint indent, Node *node )
|
|||
{
|
||||
Bool indcont = ( cfgAutoBool(doc, TidyIndentContent) != TidyNoState );
|
||||
Bool indsmart = ( cfgAutoBool(doc, TidyIndentContent) == TidyAutoState );
|
||||
Bool hideend = cfgBool( doc, TidyHideEndTags );
|
||||
Bool hideend = cfgBool( doc, TidyHideEndTags ) ||
|
||||
cfgBool( doc, TidyOmitOptionalTags );
|
||||
Bool classic = cfgBool( doc, TidyVertSpace );
|
||||
uint contentIndent = indent;
|
||||
|
||||
|
|
|
@ -269,6 +269,11 @@ Bool TIDY_CALL tidyNodeIsMENU( TidyNode tnod )
|
|||
{ return nodeIsMENU( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
|
||||
/* HTML5 */
|
||||
Bool TIDY_CALL tidyNodeIsDATALIST( TidyNode tnod )
|
||||
{ return nodeIsDATALIST( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* local variables:
|
||||
|
|
174
src/tags.c
174
src/tags.c
|
@ -17,11 +17,7 @@ static CheckAttribs CheckLINK;
|
|||
static CheckAttribs CheckAREA;
|
||||
static CheckAttribs CheckTABLE;
|
||||
static CheckAttribs CheckCaption;
|
||||
static CheckAttribs CheckSCRIPT;
|
||||
static CheckAttribs CheckSTYLE;
|
||||
static CheckAttribs CheckHTML;
|
||||
static CheckAttribs CheckFORM;
|
||||
static CheckAttribs CheckMETA;
|
||||
|
||||
#define VERS_ELEM_A (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10|HT50|XH50)
|
||||
#define VERS_ELEM_ABBR (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10|HT50|XH50)
|
||||
|
@ -128,16 +124,23 @@ static CheckAttribs CheckMETA;
|
|||
#define VERS_ELEM_ARTICLE (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
|
||||
#define VERS_ELEM_ASIDE (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
|
||||
#define VERS_ELEM_AUDIO (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
|
||||
#define VERS_ELEM_BDI (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
|
||||
#define VERS_ELEM_CANVAS (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
|
||||
#define VERS_ELEM_COMMAND (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
|
||||
#define VERS_ELEM_DATALIST (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
|
||||
#define VERS_ELEM_DETAILS (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
|
||||
#define VERS_ELEM_DIALOG (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
|
||||
#define VERS_ELEM_EMBED (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
|
||||
#define VERS_ELEM_FIGCAPTION (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
|
||||
#define VERS_ELEM_FIGURE (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
|
||||
#define VERS_ELEM_FOOTER (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
|
||||
#define VERS_ELEM_HEADER (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
|
||||
#define VERS_ELEM_HGROUP (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
|
||||
#define VERS_ELEM_KEYGEN (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
|
||||
#define VERS_ELEM_MAIN (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
|
||||
#define VERS_ELEM_MARK (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
|
||||
#define VERS_ELEM_MENUITEM (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
|
||||
#define VERS_ELEM_KEYGEN (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
|
||||
#define VERS_ELEM_METER (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
|
||||
#define VERS_ELEM_NAV (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
|
||||
#define VERS_ELEM_OUTPUT (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
|
||||
|
@ -148,16 +151,17 @@ static CheckAttribs CheckMETA;
|
|||
#define VERS_ELEM_TIME (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
|
||||
#define VERS_ELEM_TRACK (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
|
||||
#define VERS_ELEM_VIDEO (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
|
||||
#define VERS_ELEM_WBR (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
|
||||
|
||||
static const Dict tag_defs[] =
|
||||
{
|
||||
{ TidyTag_UNKNOWN, "unknown!", VERS_UNKNOWN, NULL, (0), NULL, NULL },
|
||||
|
||||
/* W3C defined elements */
|
||||
{ TidyTag_A, "a", VERS_ELEM_A, &TY_(W3CAttrsFor_A)[0], (CM_INLINE), TY_(ParseInline), NULL },
|
||||
{ TidyTag_A, "a", VERS_ELEM_A, &TY_(W3CAttrsFor_A)[0], (CM_INLINE|CM_BLOCK|CM_MIXED), TY_(ParseBlock), NULL },
|
||||
{ TidyTag_ABBR, "abbr", VERS_ELEM_ABBR, &TY_(W3CAttrsFor_ABBR)[0], (CM_INLINE), TY_(ParseInline), NULL },
|
||||
{ TidyTag_ACRONYM, "acronym", VERS_ELEM_ACRONYM, &TY_(W3CAttrsFor_ACRONYM)[0], (CM_INLINE), TY_(ParseInline), NULL },
|
||||
{ TidyTag_ADDRESS, "address", VERS_ELEM_ADDRESS, &TY_(W3CAttrsFor_ADDRESS)[0], (CM_BLOCK), TY_(ParseInline), NULL },
|
||||
{ TidyTag_ADDRESS, "address", VERS_ELEM_ADDRESS, &TY_(W3CAttrsFor_ADDRESS)[0], (CM_BLOCK), TY_(ParseBlock), NULL },
|
||||
{ TidyTag_APPLET, "applet", VERS_ELEM_APPLET, &TY_(W3CAttrsFor_APPLET)[0], (CM_OBJECT|CM_IMG|CM_INLINE|CM_PARAM), TY_(ParseBlock), NULL },
|
||||
{ TidyTag_AREA, "area", VERS_ELEM_AREA, &TY_(W3CAttrsFor_AREA)[0], (CM_BLOCK|CM_EMPTY), TY_(ParseEmpty), CheckAREA },
|
||||
{ TidyTag_B, "b", VERS_ELEM_B, &TY_(W3CAttrsFor_B)[0], (CM_INLINE), TY_(ParseInline), NULL },
|
||||
|
@ -185,7 +189,8 @@ static const Dict tag_defs[] =
|
|||
{ TidyTag_EM, "em", VERS_ELEM_EM, &TY_(W3CAttrsFor_EM)[0], (CM_INLINE), TY_(ParseInline), NULL },
|
||||
{ TidyTag_FIELDSET, "fieldset", VERS_ELEM_FIELDSET, &TY_(W3CAttrsFor_FIELDSET)[0], (CM_BLOCK), TY_(ParseBlock), NULL },
|
||||
{ TidyTag_FONT, "font", VERS_ELEM_FONT, &TY_(W3CAttrsFor_FONT)[0], (CM_INLINE), TY_(ParseInline), NULL },
|
||||
{ TidyTag_FORM, "form", VERS_ELEM_FORM, &TY_(W3CAttrsFor_FORM)[0], (CM_BLOCK), TY_(ParseBlock), CheckFORM },
|
||||
/* HTML5 Form Elements has several new elements and attributes - datalist keygen output */
|
||||
{ TidyTag_FORM, "form", VERS_ELEM_FORM, &TY_(W3CAttrsFor_FORM)[0], (CM_BLOCK), TY_(ParseBlock), NULL },
|
||||
{ TidyTag_FRAME, "frame", VERS_ELEM_FRAME, &TY_(W3CAttrsFor_FRAME)[0], (CM_FRAMES|CM_EMPTY), TY_(ParseEmpty), NULL },
|
||||
{ TidyTag_FRAMESET, "frameset", VERS_ELEM_FRAMESET, &TY_(W3CAttrsFor_FRAMESET)[0], (CM_HTML|CM_FRAMES), TY_(ParseFrameSet), NULL },
|
||||
{ TidyTag_H1, "h1", VERS_ELEM_H1, &TY_(W3CAttrsFor_H1)[0], (CM_BLOCK|CM_HEADING), TY_(ParseInline), NULL },
|
||||
|
@ -207,13 +212,13 @@ static const Dict tag_defs[] =
|
|||
{ TidyTag_LABEL, "label", VERS_ELEM_LABEL, &TY_(W3CAttrsFor_LABEL)[0], (CM_INLINE), TY_(ParseInline), NULL },
|
||||
{ TidyTag_LEGEND, "legend", VERS_ELEM_LEGEND, &TY_(W3CAttrsFor_LEGEND)[0], (CM_INLINE), TY_(ParseInline), NULL },
|
||||
{ TidyTag_LI, "li", VERS_ELEM_LI, &TY_(W3CAttrsFor_LI)[0], (CM_LIST|CM_OPT|CM_NO_INDENT), TY_(ParseBlock), NULL },
|
||||
{ TidyTag_LINK, "link", VERS_ELEM_LINK, &TY_(W3CAttrsFor_LINK)[0], (CM_HEAD|CM_EMPTY), TY_(ParseEmpty), CheckLINK },
|
||||
{ TidyTag_LINK, "link", VERS_ELEM_LINK, &TY_(W3CAttrsFor_LINK)[0], (CM_HEAD|CM_BLOCK|CM_EMPTY), TY_(ParseEmpty), CheckLINK },
|
||||
{ TidyTag_LISTING, "listing", VERS_ELEM_LISTING, &TY_(W3CAttrsFor_LISTING)[0], (CM_BLOCK|CM_OBSOLETE), TY_(ParsePre), NULL },
|
||||
{ TidyTag_MAP, "map", VERS_ELEM_MAP, &TY_(W3CAttrsFor_MAP)[0], (CM_INLINE), TY_(ParseBlock), NULL },
|
||||
{ TidyTag_MENU, "menu", VERS_ELEM_MENU, &TY_(W3CAttrsFor_MENU)[0], (CM_BLOCK|CM_OBSOLETE), TY_(ParseList), NULL },
|
||||
{ TidyTag_META, "meta", VERS_ELEM_META, &TY_(W3CAttrsFor_META)[0], (CM_HEAD|CM_EMPTY), TY_(ParseEmpty), CheckMETA },
|
||||
// { TidyTag_MENU, "menu", VERS_ELEM_MENU, &TY_(W3CAttrsFor_MENU)[0], (CM_BLOCK|CM_OBSOLETE), TY_(ParseList), NULL },
|
||||
{ TidyTag_META, "meta", VERS_ELEM_META, &TY_(W3CAttrsFor_META)[0], (CM_HEAD|CM_BLOCK|CM_EMPTY), TY_(ParseEmpty), NULL },
|
||||
{ TidyTag_NOFRAMES, "noframes", VERS_ELEM_NOFRAMES, &TY_(W3CAttrsFor_NOFRAMES)[0], (CM_BLOCK|CM_FRAMES), TY_(ParseNoFrames), NULL },
|
||||
{ TidyTag_NOSCRIPT, "noscript", VERS_ELEM_NOSCRIPT, &TY_(W3CAttrsFor_NOSCRIPT)[0], (CM_BLOCK|CM_INLINE|CM_MIXED), TY_(ParseBlock), NULL },
|
||||
{ TidyTag_NOSCRIPT, "noscript", VERS_ELEM_NOSCRIPT, &TY_(W3CAttrsFor_NOSCRIPT)[0], (CM_HEAD|CM_BLOCK|CM_INLINE|CM_MIXED), TY_(ParseBlock), NULL },
|
||||
{ TidyTag_OBJECT, "object", VERS_ELEM_OBJECT, &TY_(W3CAttrsFor_OBJECT)[0], (CM_OBJECT|CM_HEAD|CM_IMG|CM_INLINE|CM_PARAM), TY_(ParseBlock), NULL },
|
||||
{ TidyTag_OL, "ol", VERS_ELEM_OL, &TY_(W3CAttrsFor_OL)[0], (CM_BLOCK), TY_(ParseList), NULL },
|
||||
{ TidyTag_OPTGROUP, "optgroup", VERS_ELEM_OPTGROUP, &TY_(W3CAttrsFor_OPTGROUP)[0], (CM_FIELD|CM_OPT), TY_(ParseOptGroup), NULL },
|
||||
|
@ -231,13 +236,13 @@ static const Dict tag_defs[] =
|
|||
{ TidyTag_RUBY, "ruby", VERS_ELEM_RUBY, &TY_(W3CAttrsFor_RUBY)[0], (CM_INLINE), TY_(ParseInline), NULL },
|
||||
{ TidyTag_S, "s", VERS_ELEM_S, &TY_(W3CAttrsFor_S)[0], (CM_INLINE), TY_(ParseInline), NULL },
|
||||
{ TidyTag_SAMP, "samp", VERS_ELEM_SAMP, &TY_(W3CAttrsFor_SAMP)[0], (CM_INLINE), TY_(ParseInline), NULL },
|
||||
{ TidyTag_SCRIPT, "script", VERS_ELEM_SCRIPT, &TY_(W3CAttrsFor_SCRIPT)[0], (CM_HEAD|CM_MIXED|CM_BLOCK|CM_INLINE), TY_(ParseScript), CheckSCRIPT },
|
||||
{ TidyTag_SCRIPT, "script", VERS_ELEM_SCRIPT, &TY_(W3CAttrsFor_SCRIPT)[0], (CM_HEAD|CM_MIXED|CM_BLOCK|CM_INLINE), TY_(ParseScript), NULL },
|
||||
{ TidyTag_SELECT, "select", VERS_ELEM_SELECT, &TY_(W3CAttrsFor_SELECT)[0], (CM_INLINE|CM_FIELD), TY_(ParseSelect), NULL },
|
||||
{ TidyTag_SMALL, "small", VERS_ELEM_SMALL, &TY_(W3CAttrsFor_SMALL)[0], (CM_INLINE), TY_(ParseInline), NULL },
|
||||
{ TidyTag_SPAN, "span", VERS_ELEM_SPAN, &TY_(W3CAttrsFor_SPAN)[0], (CM_INLINE), TY_(ParseInline), NULL },
|
||||
{ TidyTag_STRIKE, "strike", VERS_ELEM_STRIKE, &TY_(W3CAttrsFor_STRIKE)[0], (CM_INLINE), TY_(ParseInline), NULL },
|
||||
{ TidyTag_STRONG, "strong", VERS_ELEM_STRONG, &TY_(W3CAttrsFor_STRONG)[0], (CM_INLINE), TY_(ParseInline), NULL },
|
||||
{ TidyTag_STYLE, "style", VERS_ELEM_STYLE, &TY_(W3CAttrsFor_STYLE)[0], (CM_HEAD), TY_(ParseScript), CheckSTYLE },
|
||||
{ TidyTag_STYLE, "style", VERS_ELEM_STYLE, &TY_(W3CAttrsFor_STYLE)[0], (CM_HEAD|CM_BLOCK), TY_(ParseScript), NULL },
|
||||
{ TidyTag_SUB, "sub", VERS_ELEM_SUB, &TY_(W3CAttrsFor_SUB)[0], (CM_INLINE), TY_(ParseInline), NULL },
|
||||
{ TidyTag_SUP, "sup", VERS_ELEM_SUP, &TY_(W3CAttrsFor_SUP)[0], (CM_INLINE), TY_(ParseInline), NULL },
|
||||
{ TidyTag_TABLE, "table", VERS_ELEM_TABLE, &TY_(W3CAttrsFor_TABLE)[0], (CM_BLOCK), TY_(ParseTableTag), CheckTABLE },
|
||||
|
@ -261,9 +266,7 @@ static const Dict tag_defs[] =
|
|||
{ TidyTag_BGSOUND, "bgsound", VERS_MICROSOFT, NULL, (CM_HEAD|CM_EMPTY), TY_(ParseEmpty), NULL },
|
||||
{ TidyTag_BLINK, "blink", VERS_PROPRIETARY, NULL, (CM_INLINE), TY_(ParseInline), NULL },
|
||||
{ TidyTag_COMMENT, "comment", VERS_MICROSOFT, NULL, (CM_INLINE), TY_(ParseInline), NULL },
|
||||
{ TidyTag_EMBED, "embed", VERS_NETSCAPE, NULL, (CM_INLINE|CM_IMG|CM_EMPTY), TY_(ParseEmpty), NULL },
|
||||
{ TidyTag_ILAYER, "ilayer", VERS_NETSCAPE, NULL, (CM_INLINE), TY_(ParseInline), NULL },
|
||||
{ TidyTag_KEYGEN, "keygen", VERS_NETSCAPE, NULL, (CM_INLINE|CM_EMPTY), TY_(ParseEmpty), NULL },
|
||||
{ TidyTag_LAYER, "layer", VERS_NETSCAPE, NULL, (CM_BLOCK), TY_(ParseBlock), NULL },
|
||||
{ TidyTag_MARQUEE, "marquee", VERS_MICROSOFT, NULL, (CM_INLINE|CM_OPT), TY_(ParseInline), NULL },
|
||||
{ TidyTag_MULTICOL, "multicol", VERS_NETSCAPE, NULL, (CM_BLOCK), TY_(ParseBlock), NULL },
|
||||
|
@ -274,32 +277,40 @@ static const Dict tag_defs[] =
|
|||
{ TidyTag_SERVER, "server", VERS_NETSCAPE, NULL, (CM_HEAD|CM_MIXED|CM_BLOCK|CM_INLINE), TY_(ParseScript), NULL },
|
||||
{ TidyTag_SERVLET, "servlet", VERS_SUN, NULL, (CM_OBJECT|CM_IMG|CM_INLINE|CM_PARAM), TY_(ParseBlock), NULL },
|
||||
{ TidyTag_SPACER, "spacer", VERS_NETSCAPE, NULL, (CM_INLINE|CM_EMPTY), TY_(ParseEmpty), NULL },
|
||||
{ TidyTag_WBR, "wbr", VERS_PROPRIETARY, NULL, (CM_INLINE|CM_EMPTY), TY_(ParseEmpty), NULL },
|
||||
|
||||
/* "HTML5" */
|
||||
/* HTML5 */
|
||||
{ TidyTag_ARTICLE, "article", VERS_ELEM_ARTICLE, &TY_(W3CAttrsFor_ARTICLE)[0], (CM_BLOCK), TY_(ParseBlock), NULL },
|
||||
{ TidyTag_ASIDE, "aside", VERS_ELEM_ASIDE, &TY_(W3CAttrsFor_ASIDE)[0], (CM_BLOCK), TY_(ParseBlock), NULL },
|
||||
{ TidyTag_AUDIO, "audio", VERS_ELEM_AUDIO, &TY_(W3CAttrsFor_AUDIO)[0], (CM_INLINE), TY_(ParseInline), NULL },
|
||||
{ TidyTag_AUDIO, "audio", VERS_ELEM_AUDIO, &TY_(W3CAttrsFor_AUDIO)[0], (CM_BLOCK|CM_INLINE), TY_(ParseBlock), NULL },
|
||||
{ TidyTag_BDI, "bdi", VERS_ELEM_BDI, &TY_(W3CAttrsFor_BDI)[0], (CM_BLOCK), TY_(ParseBlock), NULL },
|
||||
{ TidyTag_CANVAS, "canvas", VERS_ELEM_CANVAS, &TY_(W3CAttrsFor_CANVAS)[0], (CM_INLINE), TY_(ParseInline), NULL },
|
||||
{ TidyTag_COMMAND, "command", VERS_ELEM_COMMAND, &TY_(W3CAttrsFor_COMMAND)[0], (CM_HEAD|CM_INLINE|CM_EMPTY), TY_(ParseEmpty), NULL },
|
||||
{ TidyTag_DATALIST, "datalist", VERS_ELEM_DATALIST, &TY_(W3CAttrsFor_DATALIST)[0], (CM_INLINE), TY_(ParseInline), NULL },
|
||||
{ TidyTag_DATALIST, "datalist", VERS_ELEM_DATALIST, &TY_(W3CAttrsFor_DATALIST)[0], (CM_INLINE|CM_FIELD), TY_(ParseDatalist), NULL },
|
||||
//{ TidyTag_DATALIST, "datalist", VERS_ELEM_DATALIST, &TY_(W3CAttrsFor_DATALIST)[0], (CM_FIELD), TY_(ParseInline), NULL },
|
||||
{ TidyTag_DETAILS, "details", VERS_ELEM_DETAILS, &TY_(W3CAttrsFor_DETAILS)[0], (CM_BLOCK), TY_(ParseBlock), NULL },
|
||||
{ TidyTag_DIALOG, "dialog", VERS_ELEM_DIALOG, &TY_(W3CAttrsFor_DIALOG)[0], (CM_BLOCK), TY_(ParseBlock), NULL },
|
||||
{ TidyTag_EMBED, "embed", VERS_ELEM_EMBED, &TY_(W3CAttrsFor_EMBED)[0], (CM_INLINE|CM_IMG|CM_EMPTY), TY_(ParseEmpty), NULL },
|
||||
{ TidyTag_FIGCAPTION, "figcaption", VERS_ELEM_FIGCAPTION, &TY_(W3CAttrsFor_FIGCAPTION)[0], (CM_BLOCK), TY_(ParseBlock), NULL },
|
||||
{ TidyTag_FIGURE, "figure", VERS_ELEM_FIGURE, &TY_(W3CAttrsFor_FIGURE)[0], (CM_BLOCK), TY_(ParseBlock), NULL },
|
||||
{ TidyTag_FOOTER, "footer", VERS_ELEM_FOOTER, &TY_(W3CAttrsFor_FOOTER)[0], (CM_BLOCK), TY_(ParseBlock), NULL },
|
||||
{ TidyTag_HEADER, "header", VERS_ELEM_HEADER, &TY_(W3CAttrsFor_HEADER)[0], (CM_BLOCK), TY_(ParseBlock), NULL },
|
||||
{ TidyTag_HGROUP, "hgroup", VERS_ELEM_HGROUP, &TY_(W3CAttrsFor_HGROUP)[0], (CM_BLOCK), TY_(ParseBlock), NULL },
|
||||
{ TidyTag_KEYGEN, "keygen", VERS_ELEM_KEYGEN, &TY_(W3CAttrsFor_KEYGEN)[0], (CM_INLINE|CM_EMPTY), TY_(ParseEmpty), NULL },
|
||||
{ TidyTag_MAIN, "main", VERS_ELEM_MAIN, &TY_(W3CAttrsFor_MAIN)[0], (CM_BLOCK), TY_(ParseBlock), NULL },
|
||||
{ TidyTag_MARK, "mark", VERS_ELEM_MARK, &TY_(W3CAttrsFor_MARK)[0], (CM_INLINE), TY_(ParseInline), NULL },
|
||||
{ TidyTag_MENU, "menu", VERS_ELEM_MENU, &TY_(W3CAttrsFor_MENU)[0], (CM_BLOCK), TY_(ParseBlock), NULL },
|
||||
{ TidyTag_MENUITEM, "menuitem", VERS_ELEM_MENUITEM, &TY_(W3CAttrsFor_MENUITEM)[0], (CM_INLINE|CM_BLOCK|CM_MIXED), TY_(ParseInline), NULL },
|
||||
{ TidyTag_METER, "meter", VERS_ELEM_METER, &TY_(W3CAttrsFor_METER)[0], (CM_INLINE), TY_(ParseInline), NULL },
|
||||
{ TidyTag_NAV, "nav", VERS_ELEM_NAV, &TY_(W3CAttrsFor_NAV)[0], (CM_BLOCK), TY_(ParseBlock), NULL },
|
||||
{ TidyTag_OUTPUT, "output", VERS_ELEM_OUTPUT, &TY_(W3CAttrsFor_OUTPUT)[0], (CM_INLINE), TY_(ParseInline), NULL },
|
||||
{ TidyTag_PROGRESS, "progress", VERS_ELEM_PROGRESS, &TY_(W3CAttrsFor_PROGRESS)[0], (CM_INLINE), TY_(ParseInline), NULL },
|
||||
{ TidyTag_SECTION, "section", VERS_ELEM_SECTION, &TY_(W3CAttrsFor_SECTION)[0], (CM_BLOCK), TY_(ParseBlock), NULL },
|
||||
{ TidyTag_SOURCE, "source", VERS_ELEM_SOURCE, &TY_(W3CAttrsFor_SOURCE)[0], (CM_INLINE|CM_EMPTY), TY_(ParseEmpty), NULL },
|
||||
{ TidyTag_SOURCE, "source", VERS_ELEM_SOURCE, &TY_(W3CAttrsFor_SOURCE)[0], (CM_BLOCK|CM_EMPTY), TY_(ParseBlock), NULL },
|
||||
{ TidyTag_SUMMARY, "summary", VERS_ELEM_SUMMARY, &TY_(W3CAttrsFor_SUMMARY)[0], (CM_BLOCK), TY_(ParseInline), NULL },
|
||||
{ TidyTag_TIME, "time", VERS_ELEM_TIME, &TY_(W3CAttrsFor_TIME)[0], (CM_INLINE), TY_(ParseInline), NULL },
|
||||
{ TidyTag_TRACK, "track", VERS_ELEM_TRACK, &TY_(W3CAttrsFor_TRACK)[0], (CM_INLINE|CM_EMPTY), TY_(ParseEmpty), NULL },
|
||||
{ TidyTag_VIDEO, "video", VERS_ELEM_VIDEO, &TY_(W3CAttrsFor_VIDEO)[0], (CM_INLINE), TY_(ParseInline), NULL },
|
||||
{ TidyTag_TRACK, "track", VERS_ELEM_TRACK, &TY_(W3CAttrsFor_TRACK)[0], (CM_BLOCK|CM_EMPTY), TY_(ParseBlock), NULL },
|
||||
{ TidyTag_VIDEO, "video", VERS_ELEM_VIDEO, &TY_(W3CAttrsFor_VIDEO)[0], (CM_BLOCK|CM_INLINE), TY_(ParseBlock), NULL },
|
||||
{ TidyTag_WBR, "wbr", VERS_ELEM_WBR, &TY_(W3CAttrsFor_WBR)[0], (CM_INLINE|CM_EMPTY), TY_(ParseEmpty), NULL },
|
||||
|
||||
/* this must be the final entry */
|
||||
{ (TidyTagId)0, NULL, 0, NULL, (0), NULL, NULL }
|
||||
|
@ -760,21 +771,9 @@ void CheckAREA( TidyDocImpl* doc, Node *node )
|
|||
void CheckTABLE( TidyDocImpl* doc, Node *node )
|
||||
{
|
||||
AttVal* attval;
|
||||
Bool HasSummary = TY_(AttrGetById)(node, TidyAttr_SUMMARY) != NULL;
|
||||
|
||||
TY_(CheckAttributes)(doc, node);
|
||||
|
||||
/* a missing summary attribute is bad accessibility, no matter
|
||||
what HTML version is involved; a document without is valid */
|
||||
if (cfg(doc, TidyAccessibilityCheckLevel) == 0)
|
||||
{
|
||||
if (!HasSummary)
|
||||
{
|
||||
doc->badAccess |= BA_MISSING_SUMMARY;
|
||||
TY_(ReportMissingAttr)( doc, node, "summary");
|
||||
}
|
||||
}
|
||||
|
||||
/* convert <table border> to <table border="1"> */
|
||||
if ( cfgBool(doc, TidyXmlOut) && (attval = TY_(AttrGetById)(node, TidyAttr_BORDER)) )
|
||||
{
|
||||
|
@ -783,115 +782,24 @@ void CheckTABLE( TidyDocImpl* doc, Node *node )
|
|||
}
|
||||
}
|
||||
|
||||
/* add missing type attribute when appropriate */
|
||||
void CheckSCRIPT( TidyDocImpl* doc, Node *node )
|
||||
{
|
||||
AttVal *lang, *type;
|
||||
char buf[16];
|
||||
|
||||
TY_(CheckAttributes)(doc, node);
|
||||
|
||||
lang = TY_(AttrGetById)(node, TidyAttr_LANGUAGE);
|
||||
type = TY_(AttrGetById)(node, TidyAttr_TYPE);
|
||||
|
||||
if (!type)
|
||||
{
|
||||
/* check for javascript */
|
||||
if (lang)
|
||||
{
|
||||
/* Test #696799. lang->value can be NULL. */
|
||||
buf[0] = '\0';
|
||||
TY_(tmbstrncpy)(buf, lang->value, sizeof(buf));
|
||||
buf[10] = '\0';
|
||||
|
||||
if (TY_(tmbstrncasecmp)(buf, "javascript", 10) == 0 ||
|
||||
TY_(tmbstrncasecmp)(buf, "jscript", 7) == 0)
|
||||
{
|
||||
TY_(AddAttribute)(doc, node, "type", "text/javascript");
|
||||
}
|
||||
else if (TY_(tmbstrcasecmp)(buf, "vbscript") == 0)
|
||||
{
|
||||
/* per Randy Waki 8/6/01 */
|
||||
TY_(AddAttribute)(doc, node, "type", "text/vbscript");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
TY_(AddAttribute)(doc, node, "type", "text/javascript");
|
||||
}
|
||||
|
||||
type = TY_(AttrGetById)(node, TidyAttr_TYPE);
|
||||
|
||||
if (type != NULL)
|
||||
{
|
||||
TY_(ReportAttrError)(doc, node, type, INSERTING_ATTRIBUTE);
|
||||
}
|
||||
else
|
||||
{
|
||||
TY_(ReportMissingAttr)(doc, node, "type");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* add missing type attribute when appropriate */
|
||||
void CheckSTYLE( TidyDocImpl* doc, Node *node )
|
||||
{
|
||||
AttVal *type = TY_(AttrGetById)(node, TidyAttr_TYPE);
|
||||
|
||||
TY_(CheckAttributes)( doc, node );
|
||||
|
||||
if ( !type || !type->value || !TY_(tmbstrlen)(type->value) )
|
||||
{
|
||||
type = TY_(RepairAttrValue)(doc, node, "type", "text/css");
|
||||
TY_(ReportAttrError)( doc, node, type, INSERTING_ATTRIBUTE );
|
||||
}
|
||||
}
|
||||
|
||||
/* add missing type attribute when appropriate */
|
||||
/* report missing href attribute; report missing rel attribute */
|
||||
void CheckLINK( TidyDocImpl* doc, Node *node )
|
||||
{
|
||||
AttVal *rel = TY_(AttrGetById)(node, TidyAttr_REL);
|
||||
Bool HasHref = TY_(AttrGetById)(node, TidyAttr_HREF) != NULL;
|
||||
Bool HasRel = TY_(AttrGetById)(node, TidyAttr_REL) != NULL;
|
||||
Bool HasItemprop = TY_(AttrGetById)(node, TidyAttr_ITEMPROP) != NULL;
|
||||
|
||||
TY_(CheckAttributes)( doc, node );
|
||||
|
||||
/* todo: <link rel="alternate stylesheet"> */
|
||||
if (AttrValueIs(rel, "stylesheet"))
|
||||
if (!HasHref)
|
||||
{
|
||||
AttVal *type = TY_(AttrGetById)(node, TidyAttr_TYPE);
|
||||
if (!type)
|
||||
{
|
||||
TY_(AddAttribute)( doc, node, "type", "text/css" );
|
||||
type = TY_(AttrGetById)(node, TidyAttr_TYPE);
|
||||
TY_(ReportAttrError)( doc, node, type, INSERTING_ATTRIBUTE );
|
||||
}
|
||||
}
|
||||
TY_(ReportMissingAttr)( doc, node, "href" );
|
||||
}
|
||||
|
||||
/* reports missing action attribute */
|
||||
void CheckFORM( TidyDocImpl* doc, Node *node )
|
||||
if (!HasItemprop && !HasRel)
|
||||
{
|
||||
AttVal *action = TY_(AttrGetById)(node, TidyAttr_ACTION);
|
||||
|
||||
TY_(CheckAttributes)(doc, node);
|
||||
|
||||
if (!action)
|
||||
TY_(ReportMissingAttr)(doc, node, "action");
|
||||
TY_(ReportMissingAttr)( doc, node, "rel" );
|
||||
}
|
||||
|
||||
/* reports missing content attribute */
|
||||
void CheckMETA( TidyDocImpl* doc, Node *node )
|
||||
{
|
||||
AttVal *content = TY_(AttrGetById)(node, TidyAttr_CONTENT);
|
||||
|
||||
TY_(CheckAttributes)(doc, node);
|
||||
|
||||
if (!content)
|
||||
TY_(ReportMissingAttr)( doc, node, "content" );
|
||||
/* name or http-equiv attribute must also be set */
|
||||
}
|
||||
|
||||
|
||||
Bool TY_(nodeIsText)( Node* node )
|
||||
{
|
||||
return ( node && node->type == TextNode );
|
||||
|
|
|
@ -110,6 +110,7 @@ Parser TY_(ParseRow);
|
|||
Parser TY_(ParseSelect);
|
||||
Parser TY_(ParseOptGroup);
|
||||
Parser TY_(ParseText);
|
||||
Parser TY_(ParseDatalist);
|
||||
|
||||
CheckAttribs TY_(CheckAttributes);
|
||||
|
||||
|
@ -224,6 +225,11 @@ uint TY_(nodeHeaderLevel)( Node* node ); /* 1, 2, ..., 6 */
|
|||
#define nodeIsU( node ) TagIsId( node, TidyTag_U )
|
||||
#define nodeIsMENU( node ) TagIsId( node, TidyTag_MENU )
|
||||
#define nodeIsBUTTON( node ) TagIsId( node, TidyTag_BUTTON )
|
||||
#define nodeIsCANVAS( node ) TagIsId( node, TidyTag_CANVAS )
|
||||
#define nodeIsPROGRESS( node ) TagIsId( node, TidyTag_PROGRESS )
|
||||
|
||||
/* HTML5 */
|
||||
#define nodeIsDATALIST( node ) TagIsId( node, TidyTag_DATALIST )
|
||||
|
||||
|
||||
#endif /* __TAGS_H__ */
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#include "tidy-int.h"
|
||||
#include "parser.h"
|
||||
#include "clean.h"
|
||||
#include "gdoc.h"
|
||||
#include "config.h"
|
||||
#include "message.h"
|
||||
#include "pprint.h"
|
||||
|
@ -1227,11 +1228,63 @@ int tidyDocRunDiagnostics( TidyDocImpl* doc )
|
|||
return tidyDocStatus( doc );
|
||||
}
|
||||
|
||||
static struct _html5Info
|
||||
{
|
||||
const char *tag;
|
||||
uint id;
|
||||
} const html5Info[] = {
|
||||
{"acronym", TidyTag_ACRONYM},
|
||||
{"applet", TidyTag_APPLET },
|
||||
{"basefont",TidyTag_BASEFONT },
|
||||
{ "big", TidyTag_BIG },
|
||||
{ "center", TidyTag_CENTER },
|
||||
{ "dir", TidyTag_DIR },
|
||||
{ "font", TidyTag_FONT },
|
||||
{ "frame", TidyTag_FRAME},
|
||||
{ "frameset", TidyTag_FRAMESET},
|
||||
{ "noframes", TidyTag_NOFRAMES },
|
||||
{ "strike", TidyTag_STRIKE },
|
||||
{ "tt", TidyTag_TT },
|
||||
{ 0, 0 }
|
||||
};
|
||||
Bool inRemovedInfo( uint tid )
|
||||
{
|
||||
int i;
|
||||
for (i = 0; ; i++) {
|
||||
if (html5Info[i].tag == 0)
|
||||
break;
|
||||
if (html5Info[i].id == tid)
|
||||
return yes;
|
||||
}
|
||||
return no;
|
||||
}
|
||||
|
||||
void TY_(CheckHTML5)( TidyDocImpl* doc, Node* node )
|
||||
{
|
||||
Lexer* lexer = doc->lexer;
|
||||
while (node)
|
||||
{
|
||||
if (TY_(nodeIsElement)(node)) {
|
||||
if (node->tag) {
|
||||
if ((!node->tag->versions & VERS_HTML5)||(inRemovedInfo(node->tag->id))) {
|
||||
/* issue warning */
|
||||
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (node->content)
|
||||
TY_(CheckHTML5)( doc, node->content );
|
||||
|
||||
node = node->next;
|
||||
}
|
||||
}
|
||||
|
||||
int tidyDocCleanAndRepair( TidyDocImpl* doc )
|
||||
{
|
||||
Bool word2K = cfgBool( doc, TidyWord2000 );
|
||||
Bool logical = cfgBool( doc, TidyLogicalEmphasis );
|
||||
Bool clean = cfgBool( doc, TidyMakeClean );
|
||||
Bool gdoc = cfgBool( doc, TidyGDocClean );
|
||||
Bool dropFont = cfgBool( doc, TidyDropFontTags );
|
||||
Bool htmlOut = cfgBool( doc, TidyHtmlOut );
|
||||
Bool xmlOut = cfgBool( doc, TidyXmlOut );
|
||||
|
@ -1240,13 +1293,16 @@ int tidyDocCleanAndRepair( TidyDocImpl* doc )
|
|||
Bool tidyMark = cfgBool( doc, TidyMark );
|
||||
Bool tidyXmlTags = cfgBool( doc, TidyXmlTags );
|
||||
Bool wantNameAttr = cfgBool( doc, TidyAnchorAsName );
|
||||
Bool mergeEmphasis = cfgBool( doc, TidyMergeEmphasis );
|
||||
ctmbstr sdef = NULL;
|
||||
Node* node;
|
||||
|
||||
if (tidyXmlTags)
|
||||
return tidyDocStatus( doc );
|
||||
|
||||
/* simplifies <b><b> ... </b> ...</b> etc. */
|
||||
TY_(NestedEmphasis)( doc, &doc->root );
|
||||
if ( mergeEmphasis )
|
||||
TY_(NestedEmphasis)( doc, &doc->root );
|
||||
|
||||
/* cleans up <dir>indented text</dir> etc. */
|
||||
TY_(List2BQ)( doc, &doc->root );
|
||||
|
@ -1270,6 +1326,10 @@ int tidyDocCleanAndRepair( TidyDocImpl* doc )
|
|||
if ( clean || dropFont )
|
||||
TY_(CleanDocument)( doc );
|
||||
|
||||
/* clean up html exported by Google Docs */
|
||||
if ( gdoc )
|
||||
TY_(CleanGoogleDocument)( doc );
|
||||
|
||||
/* Move terminating <br /> tags from out of paragraphs */
|
||||
/*! Do we want to do this for all block-level elements? */
|
||||
|
||||
|
@ -1291,6 +1351,12 @@ int tidyDocCleanAndRepair( TidyDocImpl* doc )
|
|||
|
||||
/* remember given doctype for reporting */
|
||||
node = TY_(FindDocType)(doc);
|
||||
sdef = tidyOptGetValue((TidyDoc)doc, TidyDoctype );
|
||||
if (!sdef)
|
||||
sdef = tidyOptGetCurrPick((TidyDoc) doc, TidyDoctypeMode );
|
||||
if (sdef && (strcmp(sdef,"html5") == 0)) {
|
||||
TY_(CheckHTML5)( doc, &doc->root );
|
||||
}
|
||||
if (node)
|
||||
{
|
||||
AttVal* fpi = TY_(GetAttrByName)(node, "PUBLIC");
|
||||
|
@ -1388,7 +1454,6 @@ int tidyDocSaveStream( TidyDocImpl* doc, StreamOut* out )
|
|||
{
|
||||
/* noop */
|
||||
TY_(DropFontElements)(doc, &doc->root, NULL);
|
||||
TY_(WbrToSpace)(doc, &doc->root);
|
||||
}
|
||||
|
||||
if ((makeClean && asciiChars) || makeBare)
|
||||
|
|
Loading…
Reference in a new issue