diff --git a/src/attrs.c b/src/attrs.c
index 983caee..f678245 100644
--- a/src/attrs.c
+++ b/src/attrs.c
@@ -479,11 +479,12 @@ uint TY_(NodeAttributeVersions)( Node* node, TidyAttrId id )
return VERS_UNKNOWN;
}
-/* returns true if the element is a W3C defined element */
-/* but the element/attribute combination is not. We're */
-/* only defining as "proprietary" items that are not in */
-/* the element's AttrVersion structure. */
-static Bool AttributeIsProprietary(Node* node, AttVal* attval)
+/* returns true if the element is a W3C defined element
+ * but the element/attribute combination is not. We're
+ * only defining as "proprietary" items that are not in
+ * the element's AttrVersion structure.
+ */
+Bool TY_(AttributeIsProprietary)(Node* node, AttVal* attval)
{
if (!node || !attval)
return no;
@@ -500,6 +501,34 @@ static Bool AttributeIsProprietary(Node* node, AttVal* attval)
return yes;
}
+/* returns true if the element is a W3C defined element
+ * but the element/attribute combination is not. We're
+ * considering it a mismatch if the document version
+ * does not allow the attribute as called out in its
+ * AttrVersion structure.
+ */
+Bool TY_(AttributeIsMismatched)(Node* node, AttVal* attval, TidyDocImpl* doc)
+{
+ uint doctype;
+
+ if (!node || !attval)
+ return no;
+
+ if (!node->tag)
+ return no;
+
+ if (!(node->tag->versions & VERS_ALL))
+ return no;
+
+ doctype = doc->lexer->versionEmitted == 0 ? doc->lexer->doctype : doc->lexer->versionEmitted;
+
+ if (AttributeVersions(node, attval) & doctype)
+ return no;
+
+ return yes;
+}
+
+
/* used by CheckColor() */
struct _colors
{
@@ -1358,14 +1387,6 @@ const Attribute* TY_(CheckAttribute)( TidyDocImpl* doc, Node *node, AttVal *attv
attribute->attrchk( doc, node, attval );
}
- if (AttributeIsProprietary(node, attval))
- {
- TY_(ReportAttrError)(doc, node, attval, PROPRIETARY_ATTRIBUTE);
-
- if (cfgBool(doc, TidyDropPropAttrs))
- TY_(RemoveAttribute)( doc, node, attval );
- }
-
return attribute;
}
diff --git a/src/attrs.h b/src/attrs.h
index 2a70faf..e5b0fa9 100644
--- a/src/attrs.h
+++ b/src/attrs.h
@@ -147,6 +147,10 @@ AttVal* TY_(AttrGetById)( Node* node, TidyAttrId id );
uint TY_(NodeAttributeVersions)( Node* node, TidyAttrId id );
+Bool TY_(AttributeIsProprietary)(Node* node, AttVal* attval);
+Bool TY_(AttributeIsMismatched)(Node* node, AttVal* attval, TidyDocImpl* doc);
+
+
/* 0 == TidyAttr_UNKNOWN */
#define AttrId(av) ((av) && (av)->dict ? (av)->dict->id : TidyAttr_UNKNOWN)
#define AttrIsId(av, atid) ((av) && (av)->dict && ((av)->dict->id == atid))
diff --git a/src/language.c b/src/language.c
index 6d60389..1959409 100644
--- a/src/language.c
+++ b/src/language.c
@@ -262,6 +262,8 @@ static const tidyErrorFilterKeyItem tidyErrorFilterKeysStruct[] = {
{ "CANT_BE_NESTED", CANT_BE_NESTED },
{ "OBSOLETE_ELEMENT", OBSOLETE_ELEMENT },
{ "PROPRIETARY_ELEMENT", PROPRIETARY_ELEMENT },
+ { "ELEMENT_VERS_MISMATCH_ERROR", ELEMENT_VERS_MISMATCH_ERROR },
+ { "ELEMENT_VERS_MISMATCH_WARN", ELEMENT_VERS_MISMATCH_WARN },
{ "UNKNOWN_ELEMENT", UNKNOWN_ELEMENT },
{ "TRIM_EMPTY_ELEMENT", TRIM_EMPTY_ELEMENT },
{ "COERCE_TO_ENDTAG", COERCE_TO_ENDTAG },
@@ -298,6 +300,8 @@ static const tidyErrorFilterKeyItem tidyErrorFilterKeysStruct[] = {
{ "BAD_ATTRIBUTE_VALUE", BAD_ATTRIBUTE_VALUE },
{ "UNEXPECTED_GT", UNEXPECTED_GT },
{ "PROPRIETARY_ATTRIBUTE", PROPRIETARY_ATTRIBUTE },
+ { "MISMATCHED_ATTRIBUTE_ERROR", MISMATCHED_ATTRIBUTE_ERROR },
+ { "MISMATCHED_ATTRIBUTE_WARN", MISMATCHED_ATTRIBUTE_WARN },
{ "PROPRIETARY_ATTR_VALUE", PROPRIETARY_ATTR_VALUE },
{ "REPEATED_ATTRIBUTE", REPEATED_ATTRIBUTE },
{ "MISSING_IMAGEMAP", MISSING_IMAGEMAP },
@@ -322,8 +326,6 @@ static const tidyErrorFilterKeyItem tidyErrorFilterKeysStruct[] = {
{ "MISSING_ATTRIBUTE", MISSING_ATTRIBUTE },
{ "WHITE_IN_URI", WHITE_IN_URI },
{ "REMOVED_HTML5", REMOVED_HTML5 },
- { "BAD_BODY_HTML5", BAD_BODY_HTML5 },
- { "BAD_ALIGN_HTML5", BAD_ALIGN_HTML5 },
{ "BAD_SUMMARY_HTML5", BAD_SUMMARY_HTML5 },
{ "PREVIOUS_LOCATION", PREVIOUS_LOCATION },
{ "VENDOR_SPECIFIC_CHARS", VENDOR_SPECIFIC_CHARS },
diff --git a/src/language_en.h b/src/language_en.h
index fbfb1a8..a06d58c 100644
--- a/src/language_en.h
+++ b/src/language_en.h
@@ -350,6 +350,8 @@ static languageDefinition language_en = { whichPluralForm_en, {
{ MISSING_ATTR_VALUE, 0, "%s attribute \"%s\" lacks value" }, /* Warning in CheckUrl, Error otherwise */
{ UNKNOWN_ATTRIBUTE, 0, "%s unknown attribute \"%s\"" }, /* Error */
{ PROPRIETARY_ATTRIBUTE, 0, "%s proprietary attribute \"%s\"" }, /* Error */
+ { MISMATCHED_ATTRIBUTE_ERROR, 0, "%s attribute \"%s\" not allowed for %s" }, /* Error */
+ { MISMATCHED_ATTRIBUTE_WARN, 0, "%s attribute \"%s\" not allowed for %s" }, /* Warning */
{ JOINING_ATTRIBUTE, 0, "%s joining values of repeated attribute \"%s\"" }, /* Error */
{ XML_ATTRIBUTE_VALUE, 0, "%s has XML attribute \"%s\"" }, /* Error (but deprecated) */
@@ -392,8 +394,6 @@ static languageDefinition language_en = { whichPluralForm_en, {
{ OBSOLETE_ELEMENT, 0, "replacing obsolete element %s with %s" }, /* Warning */
{ COERCE_TO_ENDTAG_WARN, 0, "<%s> is probably intended as %s>" }, /* Warning */
{ REMOVED_HTML5, 0, "%s element removed from HTML5" }, /* Warning */
- { BAD_BODY_HTML5, 0, "Found attribute on body that is obsolete in HTML5. Use CSS" }, /* Warning */
- { BAD_ALIGN_HTML5, 0, "The align attribute on the %s element is obsolete. Use CSS" }, /* Warning */
{ BAD_SUMMARY_HTML5, 0, "The summary attribute on the %s element is obsolete in HTML5" }, /* Warning */
/* ReportNotice */
@@ -415,6 +415,8 @@ static languageDefinition language_en = { whichPluralForm_en, {
{ INSERTING_TAG, 0, "inserting implicit <%s>" }, /* Error */
{ CANT_BE_NESTED, 0, "%s can't be nested" }, /* Error */
{ PROPRIETARY_ELEMENT, 0, "%s is not approved by W3C" }, /* Error */
+ { ELEMENT_VERS_MISMATCH_ERROR, 0, "%s element not available in %s" }, /* Error */
+ { ELEMENT_VERS_MISMATCH_WARN, 0, "%s element not available in %s" }, /* Warning */
{ ILLEGAL_NESTING, 0, "%s shouldn't be nested" }, /* Error */
{ NOFRAMES_CONTENT, 0, "%s not inside 'noframes' element" }, /* Error */
{ UNEXPECTED_END_OF_FILE, 0, "unexpected end of file %s" }, /* Error */
@@ -745,7 +747,9 @@ static languageDefinition language_en = { whichPluralForm_en, {
- The strings "Tidy" and "HTML Tidy" are the program name and must not be translated. */
TidyDropPropAttrs, 0,
"This option specifies if Tidy should strip out proprietary attributes, "
- "such as Microsoft data binding attributes. "
+ "such as Microsoft data binding attributes. Additionally attributes "
+ "that aren't permitted in the output version of HTML will be dropped "
+ "if used with strict-tags-attributes
. "
},
{/* Please use _only_ , , , and
.
It's very important that
be self-closing in this manner!
@@ -1574,6 +1578,21 @@ static languageDefinition language_en = { whichPluralForm_en, {
"This option specifies that Tidy should skip nested tags when parsing "
"script and style data. "
},
+ {/* Please use _only_ , , , and
.
+ It's very important that
be self-closing in this manner!
+ - The strings "Tidy" and "HTML Tidy" are the program name and must not be translated. */
+ TidyStrictTagsAttr, 0,
+ "This options ensures that tags and attributes are applicable for the "
+ "version of HTML that Tidy outputs. When set to yes
(the "
+ "default) and the output document type is a strict doctype, then Tidy "
+ "will report errors. If the output document type is a loose or "
+ "transitional doctype, then Tidy will report warnings. "
+ "
"
+ "Additionally if drop-proprietary-attributes
is enabled, "
+ "then not applicable attributes will be dropped, too. "
+ "
"
+ "When set to no
, these checks are not performed. "
+ },
/********************************************************
** Console Application
diff --git a/src/lexer.c b/src/lexer.c
index 24b2343..00193a8 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -2709,26 +2709,8 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
{
Node* curr = lexer->token;
TY_(ConstrainVersion)( doc, curr->tag->versions );
-
- if ( curr->tag->versions & VERS_PROPRIETARY )
- {
- if ( !cfgBool(doc, TidyMakeClean) ||
- ( !nodeIsNOBR(curr) && !nodeIsWBR(curr) ) )
- {
- TY_(ReportError)(doc, NULL, curr, PROPRIETARY_ELEMENT );
-
- if ( nodeIsLAYER(curr) )
- doc->badLayout |= USING_LAYER;
- else if ( nodeIsSPACER(curr) )
- doc->badLayout |= USING_SPACER;
- else if ( nodeIsNOBR(curr) )
- doc->badLayout |= USING_NOBR;
- }
- }
-
- TY_(RepairDuplicateAttributes)( doc, curr, no );
- } else
TY_(RepairDuplicateAttributes)( doc, lexer->token, yes );
+
#ifdef TIDY_STORE_ORIGINAL_TEXT
StoreOriginalTextInToken(doc, lexer->token, 0);
#endif
diff --git a/src/message.c b/src/message.c
index be82246..2328515 100755
--- a/src/message.c
+++ b/src/message.c
@@ -525,6 +525,8 @@ void TY_(ReportAttrError)(TidyDocImpl* doc, Node *node, AttVal *av, uint code)
char const *name = "NULL", *value = "NULL";
char tagdesc[64];
ctmbstr fmt = tidyLocalizedString(code);
+ uint version;
+ ctmbstr extra_string;
assert( fmt != NULL );
@@ -549,6 +551,22 @@ void TY_(ReportAttrError)(TidyDocImpl* doc, Node *node, AttVal *av, uint code)
messageNode(doc, TidyWarning, code, node, fmt, tagdesc, name);
break;
+ case MISMATCHED_ATTRIBUTE_WARN:
+ version = doc->lexer->versionEmitted == 0 ? doc->lexer->doctype : doc->lexer->versionEmitted;
+ extra_string = TY_(HTMLVersionNameFromCode)(version, 0);
+ if (!extra_string)
+ extra_string = tidyLocalizedString(STRING_HTML_PROPRIETARY);
+ messageNode(doc, TidyWarning, code, node, fmt, tagdesc, name, extra_string);
+ break;
+
+ case MISMATCHED_ATTRIBUTE_ERROR:
+ version = doc->lexer->versionEmitted == 0 ? doc->lexer->doctype : doc->lexer->versionEmitted;
+ extra_string = TY_(HTMLVersionNameFromCode)(version, 0);
+ if (!extra_string)
+ extra_string = tidyLocalizedString(STRING_HTML_PROPRIETARY);
+ messageNode(doc, TidyError, code, node, fmt, tagdesc, name, extra_string);
+ break;
+
case BAD_ATTRIBUTE_VALUE:
case BAD_ATTRIBUTE_VALUE_REPLACED:
case INVALID_ATTRIBUTE:
@@ -665,8 +683,6 @@ void TY_(ReportWarning)(TidyDocImpl* doc, Node *element, Node *node, uint code)
case NESTED_EMPHASIS:
case REMOVED_HTML5:
- case BAD_BODY_HTML5:
- case BAD_ALIGN_HTML5:
case BAD_SUMMARY_HTML5:
messageNode(doc, TidyWarning, code, rpt, fmt, nodedesc);
break;
@@ -707,6 +723,8 @@ void TY_(ReportError)(TidyDocImpl* doc, Node *element, Node *node, uint code)
char elemdesc[ 256 ] = {0};
Node* rpt = ( element ? element : node );
ctmbstr fmt = tidyLocalizedString(code);
+ uint versionEmitted, declared, version;
+ ctmbstr extra_string = NULL;
assert( fmt != NULL );
@@ -729,6 +747,26 @@ void TY_(ReportError)(TidyDocImpl* doc, Node *element, Node *node, uint code)
messageNode(doc, TidyWarning, code, node, fmt, nodedesc);
break;
+ case ELEMENT_VERS_MISMATCH_WARN:
+ versionEmitted = doc->lexer->versionEmitted;
+ declared = doc->lexer->doctype;
+ version = versionEmitted == 0 ? declared : versionEmitted;
+ extra_string = TY_(HTMLVersionNameFromCode)(version, 0);
+ if (!extra_string)
+ extra_string = tidyLocalizedString(STRING_HTML_PROPRIETARY);
+ messageNode(doc, TidyWarning, code, node, fmt, nodedesc, extra_string);
+ break;
+
+ case ELEMENT_VERS_MISMATCH_ERROR:
+ versionEmitted = doc->lexer->versionEmitted;
+ declared = doc->lexer->doctype;
+ version = versionEmitted == 0 ? declared : versionEmitted;
+ extra_string = TY_(HTMLVersionNameFromCode)(version, 0);
+ if (!extra_string)
+ extra_string = tidyLocalizedString(STRING_HTML_PROPRIETARY);
+ messageNode(doc, TidyError, code, node, fmt, nodedesc, extra_string);
+ break;
+
case MISSING_TITLE_ELEMENT:
case INCONSISTENT_VERSION:
case MALFORMED_DOCTYPE:
diff --git a/src/message.h b/src/message.h
index a519766..7c49499 100644
--- a/src/message.h
+++ b/src/message.h
@@ -98,6 +98,8 @@ typedef enum {
CANT_BE_NESTED,
OBSOLETE_ELEMENT,
PROPRIETARY_ELEMENT,
+ ELEMENT_VERS_MISMATCH_ERROR,
+ ELEMENT_VERS_MISMATCH_WARN,
UNKNOWN_ELEMENT,
TRIM_EMPTY_ELEMENT,
COERCE_TO_ENDTAG,
@@ -137,6 +139,8 @@ typedef enum {
BAD_ATTRIBUTE_VALUE,
UNEXPECTED_GT,
PROPRIETARY_ATTRIBUTE,
+ MISMATCHED_ATTRIBUTE_ERROR,
+ MISMATCHED_ATTRIBUTE_WARN,
PROPRIETARY_ATTR_VALUE,
REPEATED_ATTRIBUTE,
MISSING_IMAGEMAP,
@@ -168,8 +172,6 @@ typedef enum {
WHITE_IN_URI,
REMOVED_HTML5, /* this element removed from HTML5 */
- BAD_BODY_HTML5, /* attr on body removed from HTML5 */
- BAD_ALIGN_HTML5, /* use of align attr removed from HTML5 */
BAD_SUMMARY_HTML5, /* use of summary attr removed from HTML5 */
PREVIOUS_LOCATION, /* last */
diff --git a/src/tidylib.c b/src/tidylib.c
index 7e1e4bf..0dbdf05 100755
--- a/src/tidylib.c
+++ b/src/tidylib.c
@@ -1300,9 +1300,9 @@ void tidyDocReportDoctype( TidyDocImpl* doc )
}
-/* ######################################################################################
- HTML5 STUFF
- */
+/*****************************************************************************
+ * HTML5 STUFF
+ *****************************************************************************/
#if !defined(NDEBUG) && defined(_MSC_VER)
extern void show_not_html5(void);
/* -----------------------------
@@ -1358,19 +1358,19 @@ Bool inRemovedInfo( uint tid )
return no;
}
-static Bool BadBody5( Node* node )
-{
- if (TY_(AttrGetById)(node, TidyAttr_BACKGROUND) ||
- TY_(AttrGetById)(node, TidyAttr_BGCOLOR) ||
- TY_(AttrGetById)(node, TidyAttr_TEXT) ||
- TY_(AttrGetById)(node, TidyAttr_LINK) ||
- TY_(AttrGetById)(node, TidyAttr_VLINK) ||
- TY_(AttrGetById)(node, TidyAttr_ALINK))
- {
- return yes;
- }
- return no;
-}
+/* Things that should not be in an HTML5 body. This is special for CheckHTML5(),
+ and we might just want to remove CheckHTML5()'s output altogether and count
+ on the default --strict-tags-attributes.
+ */
+static BadBody5Attribs[] = {
+ TidyAttr_BACKGROUND,
+ TidyAttr_BGCOLOR,
+ TidyAttr_TEXT,
+ TidyAttr_LINK,
+ TidyAttr_VLINK,
+ TidyAttr_ALINK,
+ TidyAttr_UNKNOWN /* Must be last! */
+};
static Bool nodeHasAlignAttr( Node *node )
{
@@ -1383,73 +1383,94 @@ static Bool nodeHasAlignAttr( Node *node )
return no;
}
-/* see http://www.whatwg.org/specs/web-apps/current-work/multipage/obsolete.html#obsolete */
-
+/*
+ * Perform special checks for HTML, even when we're not using the default
+ * option `--strict-tags-attributes yes`. This will ensure that HTML5 warning
+ * and error output is given regardless of the new option, and ensure that
+ * cleanup takes place. This provides mostly consistent Tidy behavior even with
+ * the introduction of this new option. Note that strings have changed, though,
+ * in order to maintain consistency with the `--strict-tags-attributes`
+ * messages.
+ *
+ * See also: http://www.whatwg.org/specs/web-apps/current-work/multipage/obsolete.html#obsolete
+ */
void TY_(CheckHTML5)( TidyDocImpl* doc, Node* node )
{
- /* Lexer* lexer = doc->lexer; */
Bool clean = cfgBool( doc, TidyMakeClean );
+ Bool already_strict = cfgBool( doc, TidyStrictTagsAttr );
Node* body = TY_(FindBody)( doc );
Bool warn = yes; /* should this be a warning, error, or report??? */
+ AttVal* attr = NULL;
+ int i = 0;
#if !defined(NDEBUG) && defined(_MSC_VER)
-// list_not_html5();
+ // list_not_html5();
#endif
while (node)
{
if ( nodeHasAlignAttr( node ) ) {
- /*\
- * Is this for ALL elements that accept an 'align' attribute, or should
- * this be a sub-set test
- \*/
- TY_(ReportWarning)(doc, node, node, BAD_ALIGN_HTML5);
+ /* @todo: Is this for ALL elements that accept an 'align' attribute,
+ * or should this be a sub-set test?
+ */
+
+ /* We will only emit this message if `--strict-tags-attributes==no`;
+ * otherwise if yes this message will be output during later
+ * checking.
+ */
+ if ( !already_strict )
+ TY_(ReportAttrError)(doc, node, TY_(AttrGetById)(node, TidyAttr_ALIGN), MISMATCHED_ATTRIBUTE_WARN);
}
if ( node == body ) {
- if ( BadBody5(body) ) {
- /* perhaps need a new/different warning for this, like
- * The background 'attribute" on the body element is obsolete. Use CSS instead.
- * but how to pass an attribute name to be embedded in the message.
- \*/
- TY_(ReportWarning)(doc, node, body, BAD_BODY_HTML5);
+ i = 0;
+ /* We will only emit these messages if `--strict-tags-attributes==no`;
+ * otherwise if yes these messages will be output during later
+ * checking.
+ */
+ if ( !already_strict ) {
+ while ( BadBody5Attribs[i] != TidyAttr_UNKNOWN ) {
+ attr = TY_(AttrGetById)(node, BadBody5Attribs[i]);
+ if ( attr )
+ TY_(ReportAttrError)(doc, node, attr , MISMATCHED_ATTRIBUTE_WARN);
+ i++;
+ }
}
} else
if ( nodeIsACRONYM(node) ) {
if (clean) {
- /* replace with 'abbr' with warning to that effect
- * maybe should use static void RenameElem( TidyDocImpl* doc, Node* node, TidyTagId tid )
+ /* Replace with 'abbr' with warning to that effect.
+ * Maybe should use static void RenameElem( TidyDocImpl* doc, Node* node, TidyTagId tid )
*/
TY_(CoerceNode)(doc, node, TidyTag_ABBR, warn, no);
} else {
- /* sadly, this stops writing of the tidied document, unless 'forced'
- TY_(ReportError)(doc, node, node, REMOVED_HTML5);
- so go back to a 'warning' for now...
- */
- TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
+ if ( !already_strict )
+ TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
}
- } else
+ } else
if ( nodeIsAPPLET(node) ) {
if (clean) {
- /* replace with 'object' with warning to that effect
+ /* replace with 'object' with warning to that effect
* maybe should use static void RenameElem( TidyDocImpl* doc, Node* node, TidyTagId tid )
*/
TY_(CoerceNode)(doc, node, TidyTag_OBJECT, warn, no);
} else {
- TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
+ if ( !already_strict )
+ TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
}
} else
if ( nodeIsBASEFONT(node) ) {
- /*\
- * basefont: CSS equivalen 'font-size', 'font-family' and 'color' on body or class on each subsequent element
- * Difficult - If it is the first body element, then could consider adding that
- * to the