Merge pull request #372 from htacg/attrdict_phase2

Attrdict phase2 - enforce strict tags and attributes
This commit is contained in:
Jim Derry 2016-02-16 11:12:32 +08:00
commit 468cc02cf3
11 changed files with 347 additions and 144 deletions

View file

@ -479,11 +479,12 @@ uint TY_(NodeAttributeVersions)( Node* node, TidyAttrId id )
return VERS_UNKNOWN; return VERS_UNKNOWN;
} }
/* returns true if the element is a W3C defined element */ /* returns true if the element is a W3C defined element
/* but the element/attribute combination is not. We're */ * but the element/attribute combination is not. We're
/* only defining as "proprietary" items that are not in */ * only defining as "proprietary" items that are not in
/* the element's AttrVersion structure. */ * the element's AttrVersion structure.
static Bool AttributeIsProprietary(Node* node, AttVal* attval) */
Bool TY_(AttributeIsProprietary)(Node* node, AttVal* attval)
{ {
if (!node || !attval) if (!node || !attval)
return no; return no;
@ -500,6 +501,34 @@ static Bool AttributeIsProprietary(Node* node, AttVal* attval)
return yes; return yes;
} }
/* returns true if the element is a W3C defined element
* but the element/attribute combination is not. We're
* considering it a mismatch if the document version
* does not allow the attribute as called out in its
* AttrVersion structure.
*/
Bool TY_(AttributeIsMismatched)(Node* node, AttVal* attval, TidyDocImpl* doc)
{
uint doctype;
if (!node || !attval)
return no;
if (!node->tag)
return no;
if (!(node->tag->versions & VERS_ALL))
return no;
doctype = doc->lexer->versionEmitted == 0 ? doc->lexer->doctype : doc->lexer->versionEmitted;
if (AttributeVersions(node, attval) & doctype)
return no;
return yes;
}
/* used by CheckColor() */ /* used by CheckColor() */
struct _colors struct _colors
{ {
@ -1358,14 +1387,6 @@ const Attribute* TY_(CheckAttribute)( TidyDocImpl* doc, Node *node, AttVal *attv
attribute->attrchk( doc, node, attval ); attribute->attrchk( doc, node, attval );
} }
if (AttributeIsProprietary(node, attval))
{
TY_(ReportAttrError)(doc, node, attval, PROPRIETARY_ATTRIBUTE);
if (cfgBool(doc, TidyDropPropAttrs))
TY_(RemoveAttribute)( doc, node, attval );
}
return attribute; return attribute;
} }

View file

@ -147,6 +147,10 @@ AttVal* TY_(AttrGetById)( Node* node, TidyAttrId id );
uint TY_(NodeAttributeVersions)( Node* node, TidyAttrId id ); uint TY_(NodeAttributeVersions)( Node* node, TidyAttrId id );
Bool TY_(AttributeIsProprietary)(Node* node, AttVal* attval);
Bool TY_(AttributeIsMismatched)(Node* node, AttVal* attval, TidyDocImpl* doc);
/* 0 == TidyAttr_UNKNOWN */ /* 0 == TidyAttr_UNKNOWN */
#define AttrId(av) ((av) && (av)->dict ? (av)->dict->id : TidyAttr_UNKNOWN) #define AttrId(av) ((av) && (av)->dict ? (av)->dict->id : TidyAttr_UNKNOWN)
#define AttrIsId(av, atid) ((av) && (av)->dict && ((av)->dict->id == atid)) #define AttrIsId(av, atid) ((av) && (av)->dict && ((av)->dict->id == atid))

View file

@ -1917,6 +1917,7 @@ void TY_(CleanWord2000)( TidyDocImpl* doc, Node *node)
/* used to a list from a sequence of bulletted p's */ /* used to a list from a sequence of bulletted p's */
Lexer* lexer = doc->lexer; Lexer* lexer = doc->lexer;
Node* list = NULL; Node* list = NULL;
AttVal *next_attr, *attval;
while ( node ) while ( node )
{ {
@ -1928,6 +1929,19 @@ void TY_(CleanWord2000)( TidyDocImpl* doc, Node *node)
!cfgBool(doc, TidyMakeBare) ) !cfgBool(doc, TidyMakeBare) )
return; return;
/* Output proprietary attributes to maintain errout compatability
* with traditional Tidy. This is a result of moving all of the
* proprietary checks to near the end of the cleanup process,
* meaning this result would not ordinarily be displayed.
*/
attval = node->attributes;
while ( attval ) {
next_attr = attval->next;
if ( strcmp(attval->attribute, "xmlns") != 0 )
TY_(ReportAttrError)(doc, node, attval, PROPRIETARY_ATTRIBUTE);
attval = next_attr;
}
TY_(FreeAttrs)( doc, node ); TY_(FreeAttrs)( doc, node );
} }
@ -2001,6 +2015,12 @@ void TY_(CleanWord2000)( TidyDocImpl* doc, Node *node)
/* discards <o:p> which encodes the paragraph mark */ /* discards <o:p> which encodes the paragraph mark */
if ( node->tag && TY_(tmbstrcmp)(node->tag->name,"o:p")==0) if ( node->tag && TY_(tmbstrcmp)(node->tag->name,"o:p")==0)
{ {
/* Output proprietary elements to maintain errout compatability
* with traditional Tidy. This is a result of moving all of the
* proprietary checks to near the end of the cleanup process,
* meaning this result would not ordinarily be displayed.
*/
TY_(ReportError)(doc, NULL, node, PROPRIETARY_ELEMENT);
Node* next; Node* next;
DiscardContainer( doc, node, &next ); DiscardContainer( doc, node, &next );
node = next; node = next;

View file

@ -322,7 +322,7 @@ static const TidyOptionImpl option_defs[] =
{ TidyAnchorAsName, MU, "anchor-as-name", BL, yes, ParseBool, boolPicks }, { TidyAnchorAsName, MU, "anchor-as-name", BL, yes, ParseBool, boolPicks },
{ TidyPPrintTabs, PP, "indent-with-tabs", BL, no, ParseTabs, boolPicks }, /* 20150515 - Issue #108 */ { TidyPPrintTabs, PP, "indent-with-tabs", BL, no, ParseTabs, boolPicks }, /* 20150515 - Issue #108 */
{ TidySkipNested, MU, "skip-nested", BL, yes, ParseBool, boolPicks }, /* 1642186 - Issue #65 */ { TidySkipNested, MU, "skip-nested", BL, yes, ParseBool, boolPicks }, /* 1642186 - Issue #65 */
{ TidyStrictTagsAttr, MU, "strict-tags-attributes", BL, yes, ParseBool, boolPicks }, /* 20160209 - Issue #350 */ { TidyStrictTagsAttr, MU, "strict-tags-attributes", BL, no, ParseBool, boolPicks }, /* 20160209 - Issue #350 */
{ N_TIDY_OPTIONS, XX, NULL, XY, 0, NULL, NULL } { N_TIDY_OPTIONS, XX, NULL, XY, 0, NULL, NULL }
}; };

View file

@ -262,6 +262,8 @@ static const tidyErrorFilterKeyItem tidyErrorFilterKeysStruct[] = {
{ "CANT_BE_NESTED", CANT_BE_NESTED }, { "CANT_BE_NESTED", CANT_BE_NESTED },
{ "OBSOLETE_ELEMENT", OBSOLETE_ELEMENT }, { "OBSOLETE_ELEMENT", OBSOLETE_ELEMENT },
{ "PROPRIETARY_ELEMENT", PROPRIETARY_ELEMENT }, { "PROPRIETARY_ELEMENT", PROPRIETARY_ELEMENT },
{ "ELEMENT_VERS_MISMATCH_ERROR", ELEMENT_VERS_MISMATCH_ERROR },
{ "ELEMENT_VERS_MISMATCH_WARN", ELEMENT_VERS_MISMATCH_WARN },
{ "UNKNOWN_ELEMENT", UNKNOWN_ELEMENT }, { "UNKNOWN_ELEMENT", UNKNOWN_ELEMENT },
{ "TRIM_EMPTY_ELEMENT", TRIM_EMPTY_ELEMENT }, { "TRIM_EMPTY_ELEMENT", TRIM_EMPTY_ELEMENT },
{ "COERCE_TO_ENDTAG", COERCE_TO_ENDTAG }, { "COERCE_TO_ENDTAG", COERCE_TO_ENDTAG },
@ -298,6 +300,8 @@ static const tidyErrorFilterKeyItem tidyErrorFilterKeysStruct[] = {
{ "BAD_ATTRIBUTE_VALUE", BAD_ATTRIBUTE_VALUE }, { "BAD_ATTRIBUTE_VALUE", BAD_ATTRIBUTE_VALUE },
{ "UNEXPECTED_GT", UNEXPECTED_GT }, { "UNEXPECTED_GT", UNEXPECTED_GT },
{ "PROPRIETARY_ATTRIBUTE", PROPRIETARY_ATTRIBUTE }, { "PROPRIETARY_ATTRIBUTE", PROPRIETARY_ATTRIBUTE },
{ "MISMATCHED_ATTRIBUTE_ERROR", MISMATCHED_ATTRIBUTE_ERROR },
{ "MISMATCHED_ATTRIBUTE_WARN", MISMATCHED_ATTRIBUTE_WARN },
{ "PROPRIETARY_ATTR_VALUE", PROPRIETARY_ATTR_VALUE }, { "PROPRIETARY_ATTR_VALUE", PROPRIETARY_ATTR_VALUE },
{ "REPEATED_ATTRIBUTE", REPEATED_ATTRIBUTE }, { "REPEATED_ATTRIBUTE", REPEATED_ATTRIBUTE },
{ "MISSING_IMAGEMAP", MISSING_IMAGEMAP }, { "MISSING_IMAGEMAP", MISSING_IMAGEMAP },
@ -322,8 +326,6 @@ static const tidyErrorFilterKeyItem tidyErrorFilterKeysStruct[] = {
{ "MISSING_ATTRIBUTE", MISSING_ATTRIBUTE }, { "MISSING_ATTRIBUTE", MISSING_ATTRIBUTE },
{ "WHITE_IN_URI", WHITE_IN_URI }, { "WHITE_IN_URI", WHITE_IN_URI },
{ "REMOVED_HTML5", REMOVED_HTML5 }, { "REMOVED_HTML5", REMOVED_HTML5 },
{ "BAD_BODY_HTML5", BAD_BODY_HTML5 },
{ "BAD_ALIGN_HTML5", BAD_ALIGN_HTML5 },
{ "BAD_SUMMARY_HTML5", BAD_SUMMARY_HTML5 }, { "BAD_SUMMARY_HTML5", BAD_SUMMARY_HTML5 },
{ "PREVIOUS_LOCATION", PREVIOUS_LOCATION }, { "PREVIOUS_LOCATION", PREVIOUS_LOCATION },
{ "VENDOR_SPECIFIC_CHARS", VENDOR_SPECIFIC_CHARS }, { "VENDOR_SPECIFIC_CHARS", VENDOR_SPECIFIC_CHARS },

View file

@ -350,6 +350,8 @@ static languageDefinition language_en = { whichPluralForm_en, {
{ MISSING_ATTR_VALUE, 0, "%s attribute \"%s\" lacks value" }, /* Warning in CheckUrl, Error otherwise */ { MISSING_ATTR_VALUE, 0, "%s attribute \"%s\" lacks value" }, /* Warning in CheckUrl, Error otherwise */
{ UNKNOWN_ATTRIBUTE, 0, "%s unknown attribute \"%s\"" }, /* Error */ { UNKNOWN_ATTRIBUTE, 0, "%s unknown attribute \"%s\"" }, /* Error */
{ PROPRIETARY_ATTRIBUTE, 0, "%s proprietary attribute \"%s\"" }, /* Error */ { PROPRIETARY_ATTRIBUTE, 0, "%s proprietary attribute \"%s\"" }, /* Error */
{ MISMATCHED_ATTRIBUTE_ERROR, 0, "%s attribute \"%s\" not allowed for %s" }, /* Error */
{ MISMATCHED_ATTRIBUTE_WARN, 0, "%s attribute \"%s\" not allowed for %s" }, /* Warning */
{ JOINING_ATTRIBUTE, 0, "%s joining values of repeated attribute \"%s\"" }, /* Error */ { JOINING_ATTRIBUTE, 0, "%s joining values of repeated attribute \"%s\"" }, /* Error */
{ XML_ATTRIBUTE_VALUE, 0, "%s has XML attribute \"%s\"" }, /* Error (but deprecated) */ { XML_ATTRIBUTE_VALUE, 0, "%s has XML attribute \"%s\"" }, /* Error (but deprecated) */
@ -392,8 +394,6 @@ static languageDefinition language_en = { whichPluralForm_en, {
{ OBSOLETE_ELEMENT, 0, "replacing obsolete element %s with %s" }, /* Warning */ { OBSOLETE_ELEMENT, 0, "replacing obsolete element %s with %s" }, /* Warning */
{ COERCE_TO_ENDTAG_WARN, 0, "<%s> is probably intended as </%s>" }, /* Warning */ { COERCE_TO_ENDTAG_WARN, 0, "<%s> is probably intended as </%s>" }, /* Warning */
{ REMOVED_HTML5, 0, "%s element removed from HTML5" }, /* Warning */ { REMOVED_HTML5, 0, "%s element removed from HTML5" }, /* Warning */
{ BAD_BODY_HTML5, 0, "Found attribute on body that is obsolete in HTML5. Use CSS" }, /* Warning */
{ BAD_ALIGN_HTML5, 0, "The align attribute on the %s element is obsolete. Use CSS" }, /* Warning */
{ BAD_SUMMARY_HTML5, 0, "The summary attribute on the %s element is obsolete in HTML5" }, /* Warning */ { BAD_SUMMARY_HTML5, 0, "The summary attribute on the %s element is obsolete in HTML5" }, /* Warning */
/* ReportNotice */ /* ReportNotice */
@ -415,6 +415,8 @@ static languageDefinition language_en = { whichPluralForm_en, {
{ INSERTING_TAG, 0, "inserting implicit <%s>" }, /* Error */ { INSERTING_TAG, 0, "inserting implicit <%s>" }, /* Error */
{ CANT_BE_NESTED, 0, "%s can't be nested" }, /* Error */ { CANT_BE_NESTED, 0, "%s can't be nested" }, /* Error */
{ PROPRIETARY_ELEMENT, 0, "%s is not approved by W3C" }, /* Error */ { PROPRIETARY_ELEMENT, 0, "%s is not approved by W3C" }, /* Error */
{ ELEMENT_VERS_MISMATCH_ERROR, 0, "%s element not available in %s" }, /* Error */
{ ELEMENT_VERS_MISMATCH_WARN, 0, "%s element not available in %s" }, /* Warning */
{ ILLEGAL_NESTING, 0, "%s shouldn't be nested" }, /* Error */ { ILLEGAL_NESTING, 0, "%s shouldn't be nested" }, /* Error */
{ NOFRAMES_CONTENT, 0, "%s not inside 'noframes' element" }, /* Error */ { NOFRAMES_CONTENT, 0, "%s not inside 'noframes' element" }, /* Error */
{ UNEXPECTED_END_OF_FILE, 0, "unexpected end of file %s" }, /* Error */ { UNEXPECTED_END_OF_FILE, 0, "unexpected end of file %s" }, /* Error */
@ -745,7 +747,9 @@ static languageDefinition language_en = { whichPluralForm_en, {
- The strings "Tidy" and "HTML Tidy" are the program name and must not be translated. */ - The strings "Tidy" and "HTML Tidy" are the program name and must not be translated. */
TidyDropPropAttrs, 0, TidyDropPropAttrs, 0,
"This option specifies if Tidy should strip out proprietary attributes, " "This option specifies if Tidy should strip out proprietary attributes, "
"such as Microsoft data binding attributes. " "such as Microsoft data binding attributes. Additionally attributes "
"that aren't permitted in the output version of HTML will be dropped "
"if used with <code>strict-tags-attributes</code>. "
}, },
{/* Please use _only_ <code></code>, <em></em>, <strong></strong>, and <br/>. {/* Please use _only_ <code></code>, <em></em>, <strong></strong>, and <br/>.
It's very important that <br/> be self-closing in this manner! It's very important that <br/> be self-closing in this manner!
@ -1574,6 +1578,21 @@ static languageDefinition language_en = { whichPluralForm_en, {
"This option specifies that Tidy should skip nested tags when parsing " "This option specifies that Tidy should skip nested tags when parsing "
"script and style data. " "script and style data. "
}, },
{/* Please use _only_ <code></code>, <em></em>, <strong></strong>, and <br/>.
It's very important that <br/> be self-closing in this manner!
- The strings "Tidy" and "HTML Tidy" are the program name and must not be translated. */
TidyStrictTagsAttr, 0,
"This options ensures that tags and attributes are applicable for the "
"version of HTML that Tidy outputs. When set to <code>yes</code> (the "
"default) and the output document type is a strict doctype, then Tidy "
"will report errors. If the output document type is a loose or "
"transitional doctype, then Tidy will report warnings. "
"<br/>"
"Additionally if <code>drop-proprietary-attributes</code> is enabled, "
"then not applicable attributes will be dropped, too. "
"<br/>"
"When set to <code>no</code>, these checks are not performed. "
},
/******************************************************** /********************************************************
** Console Application ** Console Application

View file

@ -2753,26 +2753,8 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
} }
else if ( !cfgBool(doc, TidyXmlTags) ) else if ( !cfgBool(doc, TidyXmlTags) )
{ {
Node* curr = lexer->token; TY_(ConstrainVersion)( doc, lexer->token->tag->versions );
TY_(ConstrainVersion)( doc, curr->tag->versions ); TY_(RepairDuplicateAttributes)( doc, lexer->token, no );
if ( curr->tag->versions & VERS_PROPRIETARY )
{
if ( !cfgBool(doc, TidyMakeClean) ||
( !nodeIsNOBR(curr) && !nodeIsWBR(curr) ) )
{
TY_(ReportError)(doc, NULL, curr, PROPRIETARY_ELEMENT );
if ( nodeIsLAYER(curr) )
doc->badLayout |= USING_LAYER;
else if ( nodeIsSPACER(curr) )
doc->badLayout |= USING_SPACER;
else if ( nodeIsNOBR(curr) )
doc->badLayout |= USING_NOBR;
}
}
TY_(RepairDuplicateAttributes)( doc, curr, no );
} else } else
TY_(RepairDuplicateAttributes)( doc, lexer->token, yes ); TY_(RepairDuplicateAttributes)( doc, lexer->token, yes );
#ifdef TIDY_STORE_ORIGINAL_TEXT #ifdef TIDY_STORE_ORIGINAL_TEXT

View file

@ -525,6 +525,8 @@ void TY_(ReportAttrError)(TidyDocImpl* doc, Node *node, AttVal *av, uint code)
char const *name = "NULL", *value = "NULL"; char const *name = "NULL", *value = "NULL";
char tagdesc[64]; char tagdesc[64];
ctmbstr fmt = tidyLocalizedString(code); ctmbstr fmt = tidyLocalizedString(code);
uint version;
ctmbstr extra_string;
assert( fmt != NULL ); assert( fmt != NULL );
@ -549,6 +551,22 @@ void TY_(ReportAttrError)(TidyDocImpl* doc, Node *node, AttVal *av, uint code)
messageNode(doc, TidyWarning, code, node, fmt, tagdesc, name); messageNode(doc, TidyWarning, code, node, fmt, tagdesc, name);
break; break;
case MISMATCHED_ATTRIBUTE_WARN:
version = doc->lexer->versionEmitted == 0 ? doc->lexer->doctype : doc->lexer->versionEmitted;
extra_string = TY_(HTMLVersionNameFromCode)(version, 0);
if (!extra_string)
extra_string = tidyLocalizedString(STRING_HTML_PROPRIETARY);
messageNode(doc, TidyWarning, code, node, fmt, tagdesc, name, extra_string);
break;
case MISMATCHED_ATTRIBUTE_ERROR:
version = doc->lexer->versionEmitted == 0 ? doc->lexer->doctype : doc->lexer->versionEmitted;
extra_string = TY_(HTMLVersionNameFromCode)(version, 0);
if (!extra_string)
extra_string = tidyLocalizedString(STRING_HTML_PROPRIETARY);
messageNode(doc, TidyError, code, node, fmt, tagdesc, name, extra_string);
break;
case BAD_ATTRIBUTE_VALUE: case BAD_ATTRIBUTE_VALUE:
case BAD_ATTRIBUTE_VALUE_REPLACED: case BAD_ATTRIBUTE_VALUE_REPLACED:
case INVALID_ATTRIBUTE: case INVALID_ATTRIBUTE:
@ -665,8 +683,6 @@ void TY_(ReportWarning)(TidyDocImpl* doc, Node *element, Node *node, uint code)
case NESTED_EMPHASIS: case NESTED_EMPHASIS:
case REMOVED_HTML5: case REMOVED_HTML5:
case BAD_BODY_HTML5:
case BAD_ALIGN_HTML5:
case BAD_SUMMARY_HTML5: case BAD_SUMMARY_HTML5:
messageNode(doc, TidyWarning, code, rpt, fmt, nodedesc); messageNode(doc, TidyWarning, code, rpt, fmt, nodedesc);
break; break;
@ -707,6 +723,8 @@ void TY_(ReportError)(TidyDocImpl* doc, Node *element, Node *node, uint code)
char elemdesc[ 256 ] = {0}; char elemdesc[ 256 ] = {0};
Node* rpt = ( element ? element : node ); Node* rpt = ( element ? element : node );
ctmbstr fmt = tidyLocalizedString(code); ctmbstr fmt = tidyLocalizedString(code);
uint versionEmitted, declared, version;
ctmbstr extra_string = NULL;
assert( fmt != NULL ); assert( fmt != NULL );
@ -729,6 +747,26 @@ void TY_(ReportError)(TidyDocImpl* doc, Node *element, Node *node, uint code)
messageNode(doc, TidyWarning, code, node, fmt, nodedesc); messageNode(doc, TidyWarning, code, node, fmt, nodedesc);
break; break;
case ELEMENT_VERS_MISMATCH_WARN:
versionEmitted = doc->lexer->versionEmitted;
declared = doc->lexer->doctype;
version = versionEmitted == 0 ? declared : versionEmitted;
extra_string = TY_(HTMLVersionNameFromCode)(version, 0);
if (!extra_string)
extra_string = tidyLocalizedString(STRING_HTML_PROPRIETARY);
messageNode(doc, TidyWarning, code, node, fmt, nodedesc, extra_string);
break;
case ELEMENT_VERS_MISMATCH_ERROR:
versionEmitted = doc->lexer->versionEmitted;
declared = doc->lexer->doctype;
version = versionEmitted == 0 ? declared : versionEmitted;
extra_string = TY_(HTMLVersionNameFromCode)(version, 0);
if (!extra_string)
extra_string = tidyLocalizedString(STRING_HTML_PROPRIETARY);
messageNode(doc, TidyError, code, node, fmt, nodedesc, extra_string);
break;
case MISSING_TITLE_ELEMENT: case MISSING_TITLE_ELEMENT:
case INCONSISTENT_VERSION: case INCONSISTENT_VERSION:
case MALFORMED_DOCTYPE: case MALFORMED_DOCTYPE:

View file

@ -98,6 +98,8 @@ typedef enum {
CANT_BE_NESTED, CANT_BE_NESTED,
OBSOLETE_ELEMENT, OBSOLETE_ELEMENT,
PROPRIETARY_ELEMENT, PROPRIETARY_ELEMENT,
ELEMENT_VERS_MISMATCH_ERROR,
ELEMENT_VERS_MISMATCH_WARN,
UNKNOWN_ELEMENT, UNKNOWN_ELEMENT,
TRIM_EMPTY_ELEMENT, TRIM_EMPTY_ELEMENT,
COERCE_TO_ENDTAG, COERCE_TO_ENDTAG,
@ -137,6 +139,8 @@ typedef enum {
BAD_ATTRIBUTE_VALUE, BAD_ATTRIBUTE_VALUE,
UNEXPECTED_GT, UNEXPECTED_GT,
PROPRIETARY_ATTRIBUTE, PROPRIETARY_ATTRIBUTE,
MISMATCHED_ATTRIBUTE_ERROR,
MISMATCHED_ATTRIBUTE_WARN,
PROPRIETARY_ATTR_VALUE, PROPRIETARY_ATTR_VALUE,
REPEATED_ATTRIBUTE, REPEATED_ATTRIBUTE,
MISSING_IMAGEMAP, MISSING_IMAGEMAP,
@ -168,8 +172,6 @@ typedef enum {
WHITE_IN_URI, WHITE_IN_URI,
REMOVED_HTML5, /* this element removed from HTML5 */ REMOVED_HTML5, /* this element removed from HTML5 */
BAD_BODY_HTML5, /* attr on body removed from HTML5 */
BAD_ALIGN_HTML5, /* use of align attr removed from HTML5 */
BAD_SUMMARY_HTML5, /* use of summary attr removed from HTML5 */ BAD_SUMMARY_HTML5, /* use of summary attr removed from HTML5 */
PREVIOUS_LOCATION, /* last */ PREVIOUS_LOCATION, /* last */

View file

@ -1300,9 +1300,9 @@ void tidyDocReportDoctype( TidyDocImpl* doc )
} }
/* ###################################################################################### /*****************************************************************************
HTML5 STUFF * HTML5 STUFF
*/ *****************************************************************************/
#if !defined(NDEBUG) && defined(_MSC_VER) #if !defined(NDEBUG) && defined(_MSC_VER)
extern void show_not_html5(void); extern void show_not_html5(void);
/* ----------------------------- /* -----------------------------
@ -1358,19 +1358,19 @@ Bool inRemovedInfo( uint tid )
return no; return no;
} }
static Bool BadBody5( Node* node ) /* Things that should not be in an HTML5 body. This is special for CheckHTML5(),
{ and we might just want to remove CheckHTML5()'s output altogether and count
if (TY_(AttrGetById)(node, TidyAttr_BACKGROUND) || on the default --strict-tags-attributes.
TY_(AttrGetById)(node, TidyAttr_BGCOLOR) || */
TY_(AttrGetById)(node, TidyAttr_TEXT) || static BadBody5Attribs[] = {
TY_(AttrGetById)(node, TidyAttr_LINK) || TidyAttr_BACKGROUND,
TY_(AttrGetById)(node, TidyAttr_VLINK) || TidyAttr_BGCOLOR,
TY_(AttrGetById)(node, TidyAttr_ALINK)) TidyAttr_TEXT,
{ TidyAttr_LINK,
return yes; TidyAttr_VLINK,
} TidyAttr_ALINK,
return no; TidyAttr_UNKNOWN /* Must be last! */
} };
static Bool nodeHasAlignAttr( Node *node ) static Bool nodeHasAlignAttr( Node *node )
{ {
@ -1383,73 +1383,94 @@ static Bool nodeHasAlignAttr( Node *node )
return no; return no;
} }
/* see http://www.whatwg.org/specs/web-apps/current-work/multipage/obsolete.html#obsolete */ /*
* Perform special checks for HTML, even when we're not using the default
* option `--strict-tags-attributes yes`. This will ensure that HTML5 warning
* and error output is given regardless of the new option, and ensure that
* cleanup takes place. This provides mostly consistent Tidy behavior even with
* the introduction of this new option. Note that strings have changed, though,
* in order to maintain consistency with the `--strict-tags-attributes`
* messages.
*
* See also: http://www.whatwg.org/specs/web-apps/current-work/multipage/obsolete.html#obsolete
*/
void TY_(CheckHTML5)( TidyDocImpl* doc, Node* node ) void TY_(CheckHTML5)( TidyDocImpl* doc, Node* node )
{ {
/* Lexer* lexer = doc->lexer; */
Bool clean = cfgBool( doc, TidyMakeClean ); Bool clean = cfgBool( doc, TidyMakeClean );
Bool already_strict = cfgBool( doc, TidyStrictTagsAttr );
Node* body = TY_(FindBody)( doc ); Node* body = TY_(FindBody)( doc );
Bool warn = yes; /* should this be a warning, error, or report??? */ Bool warn = yes; /* should this be a warning, error, or report??? */
AttVal* attr = NULL;
int i = 0;
#if !defined(NDEBUG) && defined(_MSC_VER) #if !defined(NDEBUG) && defined(_MSC_VER)
// list_not_html5(); // list_not_html5();
#endif #endif
while (node) while (node)
{ {
if ( nodeHasAlignAttr( node ) ) { if ( nodeHasAlignAttr( node ) ) {
/*\ /* @todo: Is this for ALL elements that accept an 'align' attribute,
* Is this for ALL elements that accept an 'align' attribute, or should * or should this be a sub-set test?
* this be a sub-set test */
\*/
TY_(ReportWarning)(doc, node, node, BAD_ALIGN_HTML5); /* We will only emit this message if `--strict-tags-attributes==no`;
* otherwise if yes this message will be output during later
* checking.
*/
if ( !already_strict )
TY_(ReportAttrError)(doc, node, TY_(AttrGetById)(node, TidyAttr_ALIGN), MISMATCHED_ATTRIBUTE_WARN);
} }
if ( node == body ) { if ( node == body ) {
if ( BadBody5(body) ) { i = 0;
/* perhaps need a new/different warning for this, like /* We will only emit these messages if `--strict-tags-attributes==no`;
* The background 'attribute" on the body element is obsolete. Use CSS instead. * otherwise if yes these messages will be output during later
* but how to pass an attribute name to be embedded in the message. * checking.
\*/ */
TY_(ReportWarning)(doc, node, body, BAD_BODY_HTML5); if ( !already_strict ) {
while ( BadBody5Attribs[i] != TidyAttr_UNKNOWN ) {
attr = TY_(AttrGetById)(node, BadBody5Attribs[i]);
if ( attr )
TY_(ReportAttrError)(doc, node, attr , MISMATCHED_ATTRIBUTE_WARN);
i++;
}
} }
} else } else
if ( nodeIsACRONYM(node) ) { if ( nodeIsACRONYM(node) ) {
if (clean) { if (clean) {
/* replace with 'abbr' with warning to that effect /* Replace with 'abbr' with warning to that effect.
* maybe should use static void RenameElem( TidyDocImpl* doc, Node* node, TidyTagId tid ) * Maybe should use static void RenameElem( TidyDocImpl* doc, Node* node, TidyTagId tid )
*/ */
TY_(CoerceNode)(doc, node, TidyTag_ABBR, warn, no); TY_(CoerceNode)(doc, node, TidyTag_ABBR, warn, no);
} else { } else {
/* sadly, this stops writing of the tidied document, unless 'forced' if ( !already_strict )
TY_(ReportError)(doc, node, node, REMOVED_HTML5); TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
so go back to a 'warning' for now...
*/
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
} }
} else } else
if ( nodeIsAPPLET(node) ) { if ( nodeIsAPPLET(node) ) {
if (clean) { if (clean) {
/* replace with 'object' with warning to that effect /* replace with 'object' with warning to that effect
* maybe should use static void RenameElem( TidyDocImpl* doc, Node* node, TidyTagId tid ) * maybe should use static void RenameElem( TidyDocImpl* doc, Node* node, TidyTagId tid )
*/ */
TY_(CoerceNode)(doc, node, TidyTag_OBJECT, warn, no); TY_(CoerceNode)(doc, node, TidyTag_OBJECT, warn, no);
} else { } else {
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5); if ( !already_strict )
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
} }
} else } else
if ( nodeIsBASEFONT(node) ) { if ( nodeIsBASEFONT(node) ) {
/*\ /* basefont: CSS equivalent 'font-size', 'font-family' and 'color'
* basefont: CSS equivalen 'font-size', 'font-family' and 'color' on body or class on each subsequent element * on body or class on each subsequent element.
* Difficult - If it is the first body element, then could consider adding that * Difficult - If it is the first body element, then could consider
* to the <body> as a whole, else could perhaps apply it to all subsequent element. * adding that to the <body> as a whole, else could perhaps apply it
* But also in consideration is the fact that it was NOT supported in many browsers * to all subsequent elements. But also in consideration is the fact
* For now just report a warning * that it was NOT supported in many browsers.
\*/ * - For now just report a warning
*/
if ( !already_strict )
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5); TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
} else } else
if ( nodeIsBIG(node) ) { if ( nodeIsBIG(node) ) {
/*\ /* big: CSS equivalent 'font-size:larger'
* big: CSS equivalent 'font-size:larger' * so could replace the <big> ... </big> with
* so could replace the <big> ... </big> with
* <span style="font-size: larger"> ... </span> * <span style="font-size: larger"> ... </span>
* then replace <big> with <span> * then replace <big> with <span>
* Need to think about that... * Need to think about that...
@ -1461,94 +1482,185 @@ void TY_(CheckHTML5)( TidyDocImpl* doc, Node* node )
* Also maybe need a specific message like * Also maybe need a specific message like
* Element '%s' replaced with 'span' with a 'font-size: larger style attribute * Element '%s' replaced with 'span' with a 'font-size: larger style attribute
* maybe should use static void RenameElem( TidyDocImpl* doc, Node* node, TidyTagId tid ) * maybe should use static void RenameElem( TidyDocImpl* doc, Node* node, TidyTagId tid )
* */
\*/
if (clean) { if (clean) {
TY_(AddStyleProperty)( doc, node, "font-size: larger" ); TY_(AddStyleProperty)( doc, node, "font-size: larger" );
TY_(CoerceNode)(doc, node, TidyTag_SPAN, warn, no); TY_(CoerceNode)(doc, node, TidyTag_SPAN, warn, no);
} else { } else {
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5); if ( !already_strict )
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
} }
} else } else
if ( nodeIsCENTER(node) ) { if ( nodeIsCENTER(node) ) {
/*\ /* center: CSS equivalent 'text-align:center'
* center: CSS equivalent 'text-align:center' * and 'margin-left:auto; margin-right:auto' on descendant blocks
* and 'margin-left:auto; margin-right:auto' on descendant blocks * Tidy already handles this if 'clean' by SILENTLY generating the
* Tidy already handles this if 'clean' by SILENTLY generating the <style> * <style> and adding a <div class="c1"> around the elements.
* and adding a <div class="c1"> around the elements.
* see: static Bool Center2Div( TidyDocImpl* doc, Node *node, Node **pnode) * see: static Bool Center2Div( TidyDocImpl* doc, Node *node, Node **pnode)
\*/ */
if ( !already_strict )
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5); TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
} else } else
if ( nodeIsDIR(node) ) { if ( nodeIsDIR(node) ) {
/*\ /* dir: replace by <ul>
* dir: replace by <ul> * Tidy already actions this and issues a warning
* Tidy already actions this and issues a warning * Should this be CHANGED???
* Should this be CHANGED??? */
\*/ if ( !already_strict )
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5); TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
} else } else
if ( nodeIsFONT(node) ) { if ( nodeIsFONT(node) ) {
/*\ /* Tidy already handles this -
* Tidy already handles this - * If 'clean' replaced by CSS, else
* If 'clean' replaced by CSS, else
* if is NOT clean, and doctype html5 then warnings issued * if is NOT clean, and doctype html5 then warnings issued
* done in Bool Font2Span( TidyDocImpl* doc, Node *node, Node **pnode ) (I think?) * done in Bool Font2Span( TidyDocImpl* doc, Node *node, Node **pnode ) (I think?)
* */
\*/ if ( !already_strict )
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5); TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
} else } else
if (( nodesIsFRAME(node) ) || ( nodeIsFRAMESET(node) ) || ( nodeIsNOFRAMES(node) )) { if (( nodesIsFRAME(node) ) || ( nodeIsFRAMESET(node) ) || ( nodeIsNOFRAMES(node) )) {
/*\ /* YOW: What to do here?????? Maybe <iframe>????
* YOW: What to do here?????? Maybe <iframe>???? */
\*/ if ( !already_strict )
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5); TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
} else } else
if ( nodeIsSTRIKE(node) ) { if ( nodeIsSTRIKE(node) ) {
/*\ /* strike: CSS equivalent 'text-decoration:line-through'
* strike: CSS equivalent 'text-decoration:line-through'
* maybe should use static void RenameElem( TidyDocImpl* doc, Node* node, TidyTagId tid ) * maybe should use static void RenameElem( TidyDocImpl* doc, Node* node, TidyTagId tid )
\*/ */
if (clean) { if (clean) {
TY_(AddStyleProperty)( doc, node, "text-decoration: line-through" ); TY_(AddStyleProperty)( doc, node, "text-decoration: line-through" );
TY_(CoerceNode)(doc, node, TidyTag_SPAN, warn, no); TY_(CoerceNode)(doc, node, TidyTag_SPAN, warn, no);
} else { } else {
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5); if ( !already_strict )
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
} }
} else } else
if ( nodeIsTT(node) ) { if ( nodeIsTT(node) ) {
/*\ /* tt: CSS equivalent 'font-family:monospace'
* tt: CSS equivalent 'font-family:monospace'
* Tidy presently does nothing. Tidy5 issues a warning * Tidy presently does nothing. Tidy5 issues a warning
* But like the 'clean' <font> replacement this could also be replaced with CSS * But like the 'clean' <font> replacement this could also be replaced with CSS
* maybe should use static void RenameElem( TidyDocImpl* doc, Node* node, TidyTagId tid ) * maybe should use static void RenameElem( TidyDocImpl* doc, Node* node, TidyTagId tid )
* */
\*/
if (clean) { if (clean) {
TY_(AddStyleProperty)( doc, node, "font-family: monospace" ); TY_(AddStyleProperty)( doc, node, "font-family: monospace" );
TY_(CoerceNode)(doc, node, TidyTag_SPAN, warn, no); TY_(CoerceNode)(doc, node, TidyTag_SPAN, warn, no);
} else { } else {
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5); if ( !already_strict )
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
} }
} else } else
if (TY_(nodeIsElement)(node)) { if (TY_(nodeIsElement)(node)) {
if (node->tag) { if (node->tag) {
if ((!(node->tag->versions & VERS_HTML5))||(inRemovedInfo(node->tag->id))) { if ( (!(node->tag->versions & VERS_HTML5) && !(node->tag->versions & VERS_PROPRIETARY)) || (inRemovedInfo(node->tag->id)) ) {
/* issue warning for elements like 'markquee' */ if ( !already_strict )
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5); TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
}
}
}
if (node->content)
TY_(CheckHTML5)( doc, node->content );
node = node->next;
}
}
/*****************************************************************************
* END HTML5 STUFF
*****************************************************************************/
/*
* Check and report HTML tags and attributes that are:
* - Proprietary, and/or
* - Not supported in the current version of HTML, defined as the version
* of HTML that we are emitting.
* Proprietary items are reported as WARNINGS, and version mismatches will
* be reported as WARNING or ERROR in the following conditions:
* - ERROR if the emitted doctype is a strict doctype.
* - WARNING if the emitted doctype is a non-strict doctype.
* The propriety checks are *always* run as they have always been an integral
* part of Tidy. The version checks are controlled by `strict-tags-attributes`.
*/
void TY_(CheckHTMLTagsAttribsVersions)( TidyDocImpl* doc, Node* node )
{
uint versionEmitted = doc->lexer->versionEmitted;
uint declared = doc->lexer->doctype;
uint version = versionEmitted == 0 ? declared : versionEmitted;
int tagReportType = VERS_STRICT & version ? ELEMENT_VERS_MISMATCH_ERROR : ELEMENT_VERS_MISMATCH_WARN;
int attrReportType = VERS_STRICT & version ? MISMATCHED_ATTRIBUTE_ERROR : MISMATCHED_ATTRIBUTE_WARN;
Bool check_versions = cfgBool( doc, TidyStrictTagsAttr );
AttVal *next_attr, *attval;
Bool attrIsProprietary = no;
Bool attrIsMismatched = yes;
while (node)
{
/* This bit here handles our HTML tags */
if ( TY_(nodeIsElement)(node) && node->tag ) {
/* Leave XML stuff alone. */
if ( !cfgBool(doc, TidyXmlTags) )
{
/* Version mismatches take priority. */
if ( check_versions && !(node->tag->versions & version) )
{
TY_(ReportError)(doc, NULL, node, tagReportType );
}
/* If it's not mismatched, it could still be proprietary. */
else if ( node->tag->versions & VERS_PROPRIETARY )
{
if ( !cfgBool(doc, TidyMakeClean) ||
( !nodeIsNOBR(node) && !nodeIsWBR(node) ) )
{
TY_(ReportError)(doc, NULL, node, PROPRIETARY_ELEMENT );
if ( nodeIsLAYER(node) )
doc->badLayout |= USING_LAYER;
else if ( nodeIsSPACER(node) )
doc->badLayout |= USING_SPACER;
else if ( nodeIsNOBR(node) )
doc->badLayout |= USING_NOBR;
}
} }
} }
} }
if (node->content) /* And this bit here handles our attributes */
TY_(CheckHTML5)( doc, node->content ); if (TY_(nodeIsElement)(node))
{
attval = node->attributes;
while (attval)
{
next_attr = attval->next;
attrIsProprietary = TY_(AttributeIsProprietary)(node, attval);
attrIsMismatched = check_versions ? TY_(AttributeIsMismatched)(node, attval, doc) : no;
/* Let the PROPRIETARY_ATTRIBUTE warning have precedence. */
if ( attrIsProprietary )
TY_(ReportAttrError)(doc, node, attval, PROPRIETARY_ATTRIBUTE);
else if ( attrIsMismatched )
{
TY_(ReportAttrError)(doc, node, attval, attrReportType);
}
/* @todo: do we need a new option to drop mismatches? Or should we
simply drop them? */
if ( ( attrIsProprietary || attrIsMismatched ) && cfgBool(doc, TidyDropPropAttrs) )
TY_(RemoveAttribute)( doc, node, attval );
attval = next_attr;
}
}
if (node->content)
TY_(CheckHTMLTagsAttribsVersions)( doc, node->content );
node = node->next; node = node->next;
} }
} }
/* END HTML5 STUFF
######################################################################################
*/
#if !defined(NDEBUG) && defined(_MSC_VER) #if !defined(NDEBUG) && defined(_MSC_VER)
/* *** FOR DEBUG ONLY *** */ /* *** FOR DEBUG ONLY *** */
@ -1686,7 +1798,6 @@ int tidyDocCleanAndRepair( TidyDocImpl* doc )
Bool tidyXmlTags = cfgBool( doc, TidyXmlTags ); Bool tidyXmlTags = cfgBool( doc, TidyXmlTags );
Bool wantNameAttr = cfgBool( doc, TidyAnchorAsName ); Bool wantNameAttr = cfgBool( doc, TidyAnchorAsName );
Bool mergeEmphasis = cfgBool( doc, TidyMergeEmphasis ); Bool mergeEmphasis = cfgBool( doc, TidyMergeEmphasis );
ctmbstr sdef = NULL;
Node* node; Node* node;
#if !defined(NDEBUG) && defined(_MSC_VER) #if !defined(NDEBUG) && defined(_MSC_VER)
@ -1747,12 +1858,7 @@ int tidyDocCleanAndRepair( TidyDocImpl* doc )
/* remember given doctype for reporting */ /* remember given doctype for reporting */
node = TY_(FindDocType)(doc); node = TY_(FindDocType)(doc);
sdef = tidyOptGetValue((TidyDoc)doc, TidyDoctype );
if (!sdef)
sdef = tidyOptGetCurrPick((TidyDoc) doc, TidyDoctypeMode );
if (sdef && (strcmp(sdef,"html5") == 0)) {
TY_(CheckHTML5)( doc, &doc->root );
}
if (node) if (node)
{ {
AttVal* fpi = TY_(GetAttrByName)(node, "PUBLIC"); AttVal* fpi = TY_(GetAttrByName)(node, "PUBLIC");
@ -1798,6 +1904,14 @@ int tidyDocCleanAndRepair( TidyDocImpl* doc )
if ( xmlOut && xmlDecl ) if ( xmlOut && xmlDecl )
TY_(FixXmlDecl)( doc ); TY_(FixXmlDecl)( doc );
/* At this point the apparent doctype is going to be as stable as
it can ever be, so we can start detecting things that shouldn't
be in this version of HTML
*/
if (doc->lexer->versionEmitted & VERS_HTML5)
TY_(CheckHTML5)( doc, &doc->root );
TY_(CheckHTMLTagsAttribsVersions)( doc, &doc->root );
#if !defined(NDEBUG) && defined(_MSC_VER) #if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("All nodes AFTER clean and repair\n"); SPRTF("All nodes AFTER clean and repair\n");
dbg_show_all_nodes( doc, &doc->root, 0 ); dbg_show_all_nodes( doc, &doc->root, 0 );

View file

@ -1,2 +1,3 @@
5.1.38 5.1.39
2016.02.16 2016.02.16