Merge pull request #372 from htacg/attrdict_phase2
Attrdict phase2 - enforce strict tags and attributes
This commit is contained in:
commit
468cc02cf3
47
src/attrs.c
47
src/attrs.c
|
@ -479,11 +479,12 @@ uint TY_(NodeAttributeVersions)( Node* node, TidyAttrId id )
|
|||
return VERS_UNKNOWN;
|
||||
}
|
||||
|
||||
/* returns true if the element is a W3C defined element */
|
||||
/* but the element/attribute combination is not. We're */
|
||||
/* only defining as "proprietary" items that are not in */
|
||||
/* the element's AttrVersion structure. */
|
||||
static Bool AttributeIsProprietary(Node* node, AttVal* attval)
|
||||
/* returns true if the element is a W3C defined element
|
||||
* but the element/attribute combination is not. We're
|
||||
* only defining as "proprietary" items that are not in
|
||||
* the element's AttrVersion structure.
|
||||
*/
|
||||
Bool TY_(AttributeIsProprietary)(Node* node, AttVal* attval)
|
||||
{
|
||||
if (!node || !attval)
|
||||
return no;
|
||||
|
@ -500,6 +501,34 @@ static Bool AttributeIsProprietary(Node* node, AttVal* attval)
|
|||
return yes;
|
||||
}
|
||||
|
||||
/* returns true if the element is a W3C defined element
|
||||
* but the element/attribute combination is not. We're
|
||||
* considering it a mismatch if the document version
|
||||
* does not allow the attribute as called out in its
|
||||
* AttrVersion structure.
|
||||
*/
|
||||
Bool TY_(AttributeIsMismatched)(Node* node, AttVal* attval, TidyDocImpl* doc)
|
||||
{
|
||||
uint doctype;
|
||||
|
||||
if (!node || !attval)
|
||||
return no;
|
||||
|
||||
if (!node->tag)
|
||||
return no;
|
||||
|
||||
if (!(node->tag->versions & VERS_ALL))
|
||||
return no;
|
||||
|
||||
doctype = doc->lexer->versionEmitted == 0 ? doc->lexer->doctype : doc->lexer->versionEmitted;
|
||||
|
||||
if (AttributeVersions(node, attval) & doctype)
|
||||
return no;
|
||||
|
||||
return yes;
|
||||
}
|
||||
|
||||
|
||||
/* used by CheckColor() */
|
||||
struct _colors
|
||||
{
|
||||
|
@ -1358,14 +1387,6 @@ const Attribute* TY_(CheckAttribute)( TidyDocImpl* doc, Node *node, AttVal *attv
|
|||
attribute->attrchk( doc, node, attval );
|
||||
}
|
||||
|
||||
if (AttributeIsProprietary(node, attval))
|
||||
{
|
||||
TY_(ReportAttrError)(doc, node, attval, PROPRIETARY_ATTRIBUTE);
|
||||
|
||||
if (cfgBool(doc, TidyDropPropAttrs))
|
||||
TY_(RemoveAttribute)( doc, node, attval );
|
||||
}
|
||||
|
||||
return attribute;
|
||||
}
|
||||
|
||||
|
|
|
@ -147,6 +147,10 @@ AttVal* TY_(AttrGetById)( Node* node, TidyAttrId id );
|
|||
|
||||
uint TY_(NodeAttributeVersions)( Node* node, TidyAttrId id );
|
||||
|
||||
Bool TY_(AttributeIsProprietary)(Node* node, AttVal* attval);
|
||||
Bool TY_(AttributeIsMismatched)(Node* node, AttVal* attval, TidyDocImpl* doc);
|
||||
|
||||
|
||||
/* 0 == TidyAttr_UNKNOWN */
|
||||
#define AttrId(av) ((av) && (av)->dict ? (av)->dict->id : TidyAttr_UNKNOWN)
|
||||
#define AttrIsId(av, atid) ((av) && (av)->dict && ((av)->dict->id == atid))
|
||||
|
|
20
src/clean.c
20
src/clean.c
|
@ -1917,6 +1917,7 @@ void TY_(CleanWord2000)( TidyDocImpl* doc, Node *node)
|
|||
/* used to a list from a sequence of bulletted p's */
|
||||
Lexer* lexer = doc->lexer;
|
||||
Node* list = NULL;
|
||||
AttVal *next_attr, *attval;
|
||||
|
||||
while ( node )
|
||||
{
|
||||
|
@ -1928,6 +1929,19 @@ void TY_(CleanWord2000)( TidyDocImpl* doc, Node *node)
|
|||
!cfgBool(doc, TidyMakeBare) )
|
||||
return;
|
||||
|
||||
/* Output proprietary attributes to maintain errout compatability
|
||||
* with traditional Tidy. This is a result of moving all of the
|
||||
* proprietary checks to near the end of the cleanup process,
|
||||
* meaning this result would not ordinarily be displayed.
|
||||
*/
|
||||
attval = node->attributes;
|
||||
while ( attval ) {
|
||||
next_attr = attval->next;
|
||||
if ( strcmp(attval->attribute, "xmlns") != 0 )
|
||||
TY_(ReportAttrError)(doc, node, attval, PROPRIETARY_ATTRIBUTE);
|
||||
attval = next_attr;
|
||||
}
|
||||
|
||||
TY_(FreeAttrs)( doc, node );
|
||||
}
|
||||
|
||||
|
@ -2001,6 +2015,12 @@ void TY_(CleanWord2000)( TidyDocImpl* doc, Node *node)
|
|||
/* discards <o:p> which encodes the paragraph mark */
|
||||
if ( node->tag && TY_(tmbstrcmp)(node->tag->name,"o:p")==0)
|
||||
{
|
||||
/* Output proprietary elements to maintain errout compatability
|
||||
* with traditional Tidy. This is a result of moving all of the
|
||||
* proprietary checks to near the end of the cleanup process,
|
||||
* meaning this result would not ordinarily be displayed.
|
||||
*/
|
||||
TY_(ReportError)(doc, NULL, node, PROPRIETARY_ELEMENT);
|
||||
Node* next;
|
||||
DiscardContainer( doc, node, &next );
|
||||
node = next;
|
||||
|
|
|
@ -322,7 +322,7 @@ static const TidyOptionImpl option_defs[] =
|
|||
{ TidyAnchorAsName, MU, "anchor-as-name", BL, yes, ParseBool, boolPicks },
|
||||
{ TidyPPrintTabs, PP, "indent-with-tabs", BL, no, ParseTabs, boolPicks }, /* 20150515 - Issue #108 */
|
||||
{ TidySkipNested, MU, "skip-nested", BL, yes, ParseBool, boolPicks }, /* 1642186 - Issue #65 */
|
||||
{ TidyStrictTagsAttr, MU, "strict-tags-attributes", BL, yes, ParseBool, boolPicks }, /* 20160209 - Issue #350 */
|
||||
{ TidyStrictTagsAttr, MU, "strict-tags-attributes", BL, no, ParseBool, boolPicks }, /* 20160209 - Issue #350 */
|
||||
{ N_TIDY_OPTIONS, XX, NULL, XY, 0, NULL, NULL }
|
||||
};
|
||||
|
||||
|
|
|
@ -262,6 +262,8 @@ static const tidyErrorFilterKeyItem tidyErrorFilterKeysStruct[] = {
|
|||
{ "CANT_BE_NESTED", CANT_BE_NESTED },
|
||||
{ "OBSOLETE_ELEMENT", OBSOLETE_ELEMENT },
|
||||
{ "PROPRIETARY_ELEMENT", PROPRIETARY_ELEMENT },
|
||||
{ "ELEMENT_VERS_MISMATCH_ERROR", ELEMENT_VERS_MISMATCH_ERROR },
|
||||
{ "ELEMENT_VERS_MISMATCH_WARN", ELEMENT_VERS_MISMATCH_WARN },
|
||||
{ "UNKNOWN_ELEMENT", UNKNOWN_ELEMENT },
|
||||
{ "TRIM_EMPTY_ELEMENT", TRIM_EMPTY_ELEMENT },
|
||||
{ "COERCE_TO_ENDTAG", COERCE_TO_ENDTAG },
|
||||
|
@ -298,6 +300,8 @@ static const tidyErrorFilterKeyItem tidyErrorFilterKeysStruct[] = {
|
|||
{ "BAD_ATTRIBUTE_VALUE", BAD_ATTRIBUTE_VALUE },
|
||||
{ "UNEXPECTED_GT", UNEXPECTED_GT },
|
||||
{ "PROPRIETARY_ATTRIBUTE", PROPRIETARY_ATTRIBUTE },
|
||||
{ "MISMATCHED_ATTRIBUTE_ERROR", MISMATCHED_ATTRIBUTE_ERROR },
|
||||
{ "MISMATCHED_ATTRIBUTE_WARN", MISMATCHED_ATTRIBUTE_WARN },
|
||||
{ "PROPRIETARY_ATTR_VALUE", PROPRIETARY_ATTR_VALUE },
|
||||
{ "REPEATED_ATTRIBUTE", REPEATED_ATTRIBUTE },
|
||||
{ "MISSING_IMAGEMAP", MISSING_IMAGEMAP },
|
||||
|
@ -322,8 +326,6 @@ static const tidyErrorFilterKeyItem tidyErrorFilterKeysStruct[] = {
|
|||
{ "MISSING_ATTRIBUTE", MISSING_ATTRIBUTE },
|
||||
{ "WHITE_IN_URI", WHITE_IN_URI },
|
||||
{ "REMOVED_HTML5", REMOVED_HTML5 },
|
||||
{ "BAD_BODY_HTML5", BAD_BODY_HTML5 },
|
||||
{ "BAD_ALIGN_HTML5", BAD_ALIGN_HTML5 },
|
||||
{ "BAD_SUMMARY_HTML5", BAD_SUMMARY_HTML5 },
|
||||
{ "PREVIOUS_LOCATION", PREVIOUS_LOCATION },
|
||||
{ "VENDOR_SPECIFIC_CHARS", VENDOR_SPECIFIC_CHARS },
|
||||
|
|
|
@ -350,6 +350,8 @@ static languageDefinition language_en = { whichPluralForm_en, {
|
|||
{ MISSING_ATTR_VALUE, 0, "%s attribute \"%s\" lacks value" }, /* Warning in CheckUrl, Error otherwise */
|
||||
{ UNKNOWN_ATTRIBUTE, 0, "%s unknown attribute \"%s\"" }, /* Error */
|
||||
{ PROPRIETARY_ATTRIBUTE, 0, "%s proprietary attribute \"%s\"" }, /* Error */
|
||||
{ MISMATCHED_ATTRIBUTE_ERROR, 0, "%s attribute \"%s\" not allowed for %s" }, /* Error */
|
||||
{ MISMATCHED_ATTRIBUTE_WARN, 0, "%s attribute \"%s\" not allowed for %s" }, /* Warning */
|
||||
{ JOINING_ATTRIBUTE, 0, "%s joining values of repeated attribute \"%s\"" }, /* Error */
|
||||
{ XML_ATTRIBUTE_VALUE, 0, "%s has XML attribute \"%s\"" }, /* Error (but deprecated) */
|
||||
|
||||
|
@ -392,8 +394,6 @@ static languageDefinition language_en = { whichPluralForm_en, {
|
|||
{ OBSOLETE_ELEMENT, 0, "replacing obsolete element %s with %s" }, /* Warning */
|
||||
{ COERCE_TO_ENDTAG_WARN, 0, "<%s> is probably intended as </%s>" }, /* Warning */
|
||||
{ REMOVED_HTML5, 0, "%s element removed from HTML5" }, /* Warning */
|
||||
{ BAD_BODY_HTML5, 0, "Found attribute on body that is obsolete in HTML5. Use CSS" }, /* Warning */
|
||||
{ BAD_ALIGN_HTML5, 0, "The align attribute on the %s element is obsolete. Use CSS" }, /* Warning */
|
||||
{ BAD_SUMMARY_HTML5, 0, "The summary attribute on the %s element is obsolete in HTML5" }, /* Warning */
|
||||
|
||||
/* ReportNotice */
|
||||
|
@ -415,6 +415,8 @@ static languageDefinition language_en = { whichPluralForm_en, {
|
|||
{ INSERTING_TAG, 0, "inserting implicit <%s>" }, /* Error */
|
||||
{ CANT_BE_NESTED, 0, "%s can't be nested" }, /* Error */
|
||||
{ PROPRIETARY_ELEMENT, 0, "%s is not approved by W3C" }, /* Error */
|
||||
{ ELEMENT_VERS_MISMATCH_ERROR, 0, "%s element not available in %s" }, /* Error */
|
||||
{ ELEMENT_VERS_MISMATCH_WARN, 0, "%s element not available in %s" }, /* Warning */
|
||||
{ ILLEGAL_NESTING, 0, "%s shouldn't be nested" }, /* Error */
|
||||
{ NOFRAMES_CONTENT, 0, "%s not inside 'noframes' element" }, /* Error */
|
||||
{ UNEXPECTED_END_OF_FILE, 0, "unexpected end of file %s" }, /* Error */
|
||||
|
@ -745,7 +747,9 @@ static languageDefinition language_en = { whichPluralForm_en, {
|
|||
- The strings "Tidy" and "HTML Tidy" are the program name and must not be translated. */
|
||||
TidyDropPropAttrs, 0,
|
||||
"This option specifies if Tidy should strip out proprietary attributes, "
|
||||
"such as Microsoft data binding attributes. "
|
||||
"such as Microsoft data binding attributes. Additionally attributes "
|
||||
"that aren't permitted in the output version of HTML will be dropped "
|
||||
"if used with <code>strict-tags-attributes</code>. "
|
||||
},
|
||||
{/* Please use _only_ <code></code>, <em></em>, <strong></strong>, and <br/>.
|
||||
It's very important that <br/> be self-closing in this manner!
|
||||
|
@ -1574,6 +1578,21 @@ static languageDefinition language_en = { whichPluralForm_en, {
|
|||
"This option specifies that Tidy should skip nested tags when parsing "
|
||||
"script and style data. "
|
||||
},
|
||||
{/* Please use _only_ <code></code>, <em></em>, <strong></strong>, and <br/>.
|
||||
It's very important that <br/> be self-closing in this manner!
|
||||
- The strings "Tidy" and "HTML Tidy" are the program name and must not be translated. */
|
||||
TidyStrictTagsAttr, 0,
|
||||
"This options ensures that tags and attributes are applicable for the "
|
||||
"version of HTML that Tidy outputs. When set to <code>yes</code> (the "
|
||||
"default) and the output document type is a strict doctype, then Tidy "
|
||||
"will report errors. If the output document type is a loose or "
|
||||
"transitional doctype, then Tidy will report warnings. "
|
||||
"<br/>"
|
||||
"Additionally if <code>drop-proprietary-attributes</code> is enabled, "
|
||||
"then not applicable attributes will be dropped, too. "
|
||||
"<br/>"
|
||||
"When set to <code>no</code>, these checks are not performed. "
|
||||
},
|
||||
|
||||
/********************************************************
|
||||
** Console Application
|
||||
|
|
22
src/lexer.c
22
src/lexer.c
|
@ -2753,26 +2753,8 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
|
|||
}
|
||||
else if ( !cfgBool(doc, TidyXmlTags) )
|
||||
{
|
||||
Node* curr = lexer->token;
|
||||
TY_(ConstrainVersion)( doc, curr->tag->versions );
|
||||
|
||||
if ( curr->tag->versions & VERS_PROPRIETARY )
|
||||
{
|
||||
if ( !cfgBool(doc, TidyMakeClean) ||
|
||||
( !nodeIsNOBR(curr) && !nodeIsWBR(curr) ) )
|
||||
{
|
||||
TY_(ReportError)(doc, NULL, curr, PROPRIETARY_ELEMENT );
|
||||
|
||||
if ( nodeIsLAYER(curr) )
|
||||
doc->badLayout |= USING_LAYER;
|
||||
else if ( nodeIsSPACER(curr) )
|
||||
doc->badLayout |= USING_SPACER;
|
||||
else if ( nodeIsNOBR(curr) )
|
||||
doc->badLayout |= USING_NOBR;
|
||||
}
|
||||
}
|
||||
|
||||
TY_(RepairDuplicateAttributes)( doc, curr, no );
|
||||
TY_(ConstrainVersion)( doc, lexer->token->tag->versions );
|
||||
TY_(RepairDuplicateAttributes)( doc, lexer->token, no );
|
||||
} else
|
||||
TY_(RepairDuplicateAttributes)( doc, lexer->token, yes );
|
||||
#ifdef TIDY_STORE_ORIGINAL_TEXT
|
||||
|
|
|
@ -525,6 +525,8 @@ void TY_(ReportAttrError)(TidyDocImpl* doc, Node *node, AttVal *av, uint code)
|
|||
char const *name = "NULL", *value = "NULL";
|
||||
char tagdesc[64];
|
||||
ctmbstr fmt = tidyLocalizedString(code);
|
||||
uint version;
|
||||
ctmbstr extra_string;
|
||||
|
||||
assert( fmt != NULL );
|
||||
|
||||
|
@ -549,6 +551,22 @@ void TY_(ReportAttrError)(TidyDocImpl* doc, Node *node, AttVal *av, uint code)
|
|||
messageNode(doc, TidyWarning, code, node, fmt, tagdesc, name);
|
||||
break;
|
||||
|
||||
case MISMATCHED_ATTRIBUTE_WARN:
|
||||
version = doc->lexer->versionEmitted == 0 ? doc->lexer->doctype : doc->lexer->versionEmitted;
|
||||
extra_string = TY_(HTMLVersionNameFromCode)(version, 0);
|
||||
if (!extra_string)
|
||||
extra_string = tidyLocalizedString(STRING_HTML_PROPRIETARY);
|
||||
messageNode(doc, TidyWarning, code, node, fmt, tagdesc, name, extra_string);
|
||||
break;
|
||||
|
||||
case MISMATCHED_ATTRIBUTE_ERROR:
|
||||
version = doc->lexer->versionEmitted == 0 ? doc->lexer->doctype : doc->lexer->versionEmitted;
|
||||
extra_string = TY_(HTMLVersionNameFromCode)(version, 0);
|
||||
if (!extra_string)
|
||||
extra_string = tidyLocalizedString(STRING_HTML_PROPRIETARY);
|
||||
messageNode(doc, TidyError, code, node, fmt, tagdesc, name, extra_string);
|
||||
break;
|
||||
|
||||
case BAD_ATTRIBUTE_VALUE:
|
||||
case BAD_ATTRIBUTE_VALUE_REPLACED:
|
||||
case INVALID_ATTRIBUTE:
|
||||
|
@ -665,8 +683,6 @@ void TY_(ReportWarning)(TidyDocImpl* doc, Node *element, Node *node, uint code)
|
|||
|
||||
case NESTED_EMPHASIS:
|
||||
case REMOVED_HTML5:
|
||||
case BAD_BODY_HTML5:
|
||||
case BAD_ALIGN_HTML5:
|
||||
case BAD_SUMMARY_HTML5:
|
||||
messageNode(doc, TidyWarning, code, rpt, fmt, nodedesc);
|
||||
break;
|
||||
|
@ -707,6 +723,8 @@ void TY_(ReportError)(TidyDocImpl* doc, Node *element, Node *node, uint code)
|
|||
char elemdesc[ 256 ] = {0};
|
||||
Node* rpt = ( element ? element : node );
|
||||
ctmbstr fmt = tidyLocalizedString(code);
|
||||
uint versionEmitted, declared, version;
|
||||
ctmbstr extra_string = NULL;
|
||||
|
||||
assert( fmt != NULL );
|
||||
|
||||
|
@ -729,6 +747,26 @@ void TY_(ReportError)(TidyDocImpl* doc, Node *element, Node *node, uint code)
|
|||
messageNode(doc, TidyWarning, code, node, fmt, nodedesc);
|
||||
break;
|
||||
|
||||
case ELEMENT_VERS_MISMATCH_WARN:
|
||||
versionEmitted = doc->lexer->versionEmitted;
|
||||
declared = doc->lexer->doctype;
|
||||
version = versionEmitted == 0 ? declared : versionEmitted;
|
||||
extra_string = TY_(HTMLVersionNameFromCode)(version, 0);
|
||||
if (!extra_string)
|
||||
extra_string = tidyLocalizedString(STRING_HTML_PROPRIETARY);
|
||||
messageNode(doc, TidyWarning, code, node, fmt, nodedesc, extra_string);
|
||||
break;
|
||||
|
||||
case ELEMENT_VERS_MISMATCH_ERROR:
|
||||
versionEmitted = doc->lexer->versionEmitted;
|
||||
declared = doc->lexer->doctype;
|
||||
version = versionEmitted == 0 ? declared : versionEmitted;
|
||||
extra_string = TY_(HTMLVersionNameFromCode)(version, 0);
|
||||
if (!extra_string)
|
||||
extra_string = tidyLocalizedString(STRING_HTML_PROPRIETARY);
|
||||
messageNode(doc, TidyError, code, node, fmt, nodedesc, extra_string);
|
||||
break;
|
||||
|
||||
case MISSING_TITLE_ELEMENT:
|
||||
case INCONSISTENT_VERSION:
|
||||
case MALFORMED_DOCTYPE:
|
||||
|
|
|
@ -98,6 +98,8 @@ typedef enum {
|
|||
CANT_BE_NESTED,
|
||||
OBSOLETE_ELEMENT,
|
||||
PROPRIETARY_ELEMENT,
|
||||
ELEMENT_VERS_MISMATCH_ERROR,
|
||||
ELEMENT_VERS_MISMATCH_WARN,
|
||||
UNKNOWN_ELEMENT,
|
||||
TRIM_EMPTY_ELEMENT,
|
||||
COERCE_TO_ENDTAG,
|
||||
|
@ -137,6 +139,8 @@ typedef enum {
|
|||
BAD_ATTRIBUTE_VALUE,
|
||||
UNEXPECTED_GT,
|
||||
PROPRIETARY_ATTRIBUTE,
|
||||
MISMATCHED_ATTRIBUTE_ERROR,
|
||||
MISMATCHED_ATTRIBUTE_WARN,
|
||||
PROPRIETARY_ATTR_VALUE,
|
||||
REPEATED_ATTRIBUTE,
|
||||
MISSING_IMAGEMAP,
|
||||
|
@ -168,8 +172,6 @@ typedef enum {
|
|||
WHITE_IN_URI,
|
||||
|
||||
REMOVED_HTML5, /* this element removed from HTML5 */
|
||||
BAD_BODY_HTML5, /* attr on body removed from HTML5 */
|
||||
BAD_ALIGN_HTML5, /* use of align attr removed from HTML5 */
|
||||
BAD_SUMMARY_HTML5, /* use of summary attr removed from HTML5 */
|
||||
|
||||
PREVIOUS_LOCATION, /* last */
|
||||
|
|
300
src/tidylib.c
300
src/tidylib.c
|
@ -1300,9 +1300,9 @@ void tidyDocReportDoctype( TidyDocImpl* doc )
|
|||
}
|
||||
|
||||
|
||||
/* ######################################################################################
|
||||
HTML5 STUFF
|
||||
*/
|
||||
/*****************************************************************************
|
||||
* HTML5 STUFF
|
||||
*****************************************************************************/
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
extern void show_not_html5(void);
|
||||
/* -----------------------------
|
||||
|
@ -1358,19 +1358,19 @@ Bool inRemovedInfo( uint tid )
|
|||
return no;
|
||||
}
|
||||
|
||||
static Bool BadBody5( Node* node )
|
||||
{
|
||||
if (TY_(AttrGetById)(node, TidyAttr_BACKGROUND) ||
|
||||
TY_(AttrGetById)(node, TidyAttr_BGCOLOR) ||
|
||||
TY_(AttrGetById)(node, TidyAttr_TEXT) ||
|
||||
TY_(AttrGetById)(node, TidyAttr_LINK) ||
|
||||
TY_(AttrGetById)(node, TidyAttr_VLINK) ||
|
||||
TY_(AttrGetById)(node, TidyAttr_ALINK))
|
||||
{
|
||||
return yes;
|
||||
}
|
||||
return no;
|
||||
}
|
||||
/* Things that should not be in an HTML5 body. This is special for CheckHTML5(),
|
||||
and we might just want to remove CheckHTML5()'s output altogether and count
|
||||
on the default --strict-tags-attributes.
|
||||
*/
|
||||
static BadBody5Attribs[] = {
|
||||
TidyAttr_BACKGROUND,
|
||||
TidyAttr_BGCOLOR,
|
||||
TidyAttr_TEXT,
|
||||
TidyAttr_LINK,
|
||||
TidyAttr_VLINK,
|
||||
TidyAttr_ALINK,
|
||||
TidyAttr_UNKNOWN /* Must be last! */
|
||||
};
|
||||
|
||||
static Bool nodeHasAlignAttr( Node *node )
|
||||
{
|
||||
|
@ -1383,47 +1383,66 @@ static Bool nodeHasAlignAttr( Node *node )
|
|||
return no;
|
||||
}
|
||||
|
||||
/* see http://www.whatwg.org/specs/web-apps/current-work/multipage/obsolete.html#obsolete */
|
||||
|
||||
/*
|
||||
* Perform special checks for HTML, even when we're not using the default
|
||||
* option `--strict-tags-attributes yes`. This will ensure that HTML5 warning
|
||||
* and error output is given regardless of the new option, and ensure that
|
||||
* cleanup takes place. This provides mostly consistent Tidy behavior even with
|
||||
* the introduction of this new option. Note that strings have changed, though,
|
||||
* in order to maintain consistency with the `--strict-tags-attributes`
|
||||
* messages.
|
||||
*
|
||||
* See also: http://www.whatwg.org/specs/web-apps/current-work/multipage/obsolete.html#obsolete
|
||||
*/
|
||||
void TY_(CheckHTML5)( TidyDocImpl* doc, Node* node )
|
||||
{
|
||||
/* Lexer* lexer = doc->lexer; */
|
||||
Bool clean = cfgBool( doc, TidyMakeClean );
|
||||
Bool already_strict = cfgBool( doc, TidyStrictTagsAttr );
|
||||
Node* body = TY_(FindBody)( doc );
|
||||
Bool warn = yes; /* should this be a warning, error, or report??? */
|
||||
AttVal* attr = NULL;
|
||||
int i = 0;
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
// list_not_html5();
|
||||
// list_not_html5();
|
||||
#endif
|
||||
while (node)
|
||||
{
|
||||
if ( nodeHasAlignAttr( node ) ) {
|
||||
/*\
|
||||
* Is this for ALL elements that accept an 'align' attribute, or should
|
||||
* this be a sub-set test
|
||||
\*/
|
||||
TY_(ReportWarning)(doc, node, node, BAD_ALIGN_HTML5);
|
||||
/* @todo: Is this for ALL elements that accept an 'align' attribute,
|
||||
* or should this be a sub-set test?
|
||||
*/
|
||||
|
||||
/* We will only emit this message if `--strict-tags-attributes==no`;
|
||||
* otherwise if yes this message will be output during later
|
||||
* checking.
|
||||
*/
|
||||
if ( !already_strict )
|
||||
TY_(ReportAttrError)(doc, node, TY_(AttrGetById)(node, TidyAttr_ALIGN), MISMATCHED_ATTRIBUTE_WARN);
|
||||
}
|
||||
if ( node == body ) {
|
||||
if ( BadBody5(body) ) {
|
||||
/* perhaps need a new/different warning for this, like
|
||||
* The background 'attribute" on the body element is obsolete. Use CSS instead.
|
||||
* but how to pass an attribute name to be embedded in the message.
|
||||
\*/
|
||||
TY_(ReportWarning)(doc, node, body, BAD_BODY_HTML5);
|
||||
i = 0;
|
||||
/* We will only emit these messages if `--strict-tags-attributes==no`;
|
||||
* otherwise if yes these messages will be output during later
|
||||
* checking.
|
||||
*/
|
||||
if ( !already_strict ) {
|
||||
while ( BadBody5Attribs[i] != TidyAttr_UNKNOWN ) {
|
||||
attr = TY_(AttrGetById)(node, BadBody5Attribs[i]);
|
||||
if ( attr )
|
||||
TY_(ReportAttrError)(doc, node, attr , MISMATCHED_ATTRIBUTE_WARN);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
} else
|
||||
if ( nodeIsACRONYM(node) ) {
|
||||
if (clean) {
|
||||
/* replace with 'abbr' with warning to that effect
|
||||
* maybe should use static void RenameElem( TidyDocImpl* doc, Node* node, TidyTagId tid )
|
||||
/* Replace with 'abbr' with warning to that effect.
|
||||
* Maybe should use static void RenameElem( TidyDocImpl* doc, Node* node, TidyTagId tid )
|
||||
*/
|
||||
TY_(CoerceNode)(doc, node, TidyTag_ABBR, warn, no);
|
||||
} else {
|
||||
/* sadly, this stops writing of the tidied document, unless 'forced'
|
||||
TY_(ReportError)(doc, node, node, REMOVED_HTML5);
|
||||
so go back to a 'warning' for now...
|
||||
*/
|
||||
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
|
||||
if ( !already_strict )
|
||||
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
|
||||
}
|
||||
} else
|
||||
if ( nodeIsAPPLET(node) ) {
|
||||
|
@ -1433,22 +1452,24 @@ void TY_(CheckHTML5)( TidyDocImpl* doc, Node* node )
|
|||
*/
|
||||
TY_(CoerceNode)(doc, node, TidyTag_OBJECT, warn, no);
|
||||
} else {
|
||||
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
|
||||
if ( !already_strict )
|
||||
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
|
||||
}
|
||||
} else
|
||||
if ( nodeIsBASEFONT(node) ) {
|
||||
/*\
|
||||
* basefont: CSS equivalen 'font-size', 'font-family' and 'color' on body or class on each subsequent element
|
||||
* Difficult - If it is the first body element, then could consider adding that
|
||||
* to the <body> as a whole, else could perhaps apply it to all subsequent element.
|
||||
* But also in consideration is the fact that it was NOT supported in many browsers
|
||||
* For now just report a warning
|
||||
\*/
|
||||
/* basefont: CSS equivalent 'font-size', 'font-family' and 'color'
|
||||
* on body or class on each subsequent element.
|
||||
* Difficult - If it is the first body element, then could consider
|
||||
* adding that to the <body> as a whole, else could perhaps apply it
|
||||
* to all subsequent elements. But also in consideration is the fact
|
||||
* that it was NOT supported in many browsers.
|
||||
* - For now just report a warning
|
||||
*/
|
||||
if ( !already_strict )
|
||||
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
|
||||
} else
|
||||
if ( nodeIsBIG(node) ) {
|
||||
/*\
|
||||
* big: CSS equivalent 'font-size:larger'
|
||||
/* big: CSS equivalent 'font-size:larger'
|
||||
* so could replace the <big> ... </big> with
|
||||
* <span style="font-size: larger"> ... </span>
|
||||
* then replace <big> with <span>
|
||||
|
@ -1461,84 +1482,82 @@ void TY_(CheckHTML5)( TidyDocImpl* doc, Node* node )
|
|||
* Also maybe need a specific message like
|
||||
* Element '%s' replaced with 'span' with a 'font-size: larger style attribute
|
||||
* maybe should use static void RenameElem( TidyDocImpl* doc, Node* node, TidyTagId tid )
|
||||
*
|
||||
\*/
|
||||
*/
|
||||
if (clean) {
|
||||
TY_(AddStyleProperty)( doc, node, "font-size: larger" );
|
||||
TY_(CoerceNode)(doc, node, TidyTag_SPAN, warn, no);
|
||||
} else {
|
||||
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
|
||||
if ( !already_strict )
|
||||
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
|
||||
}
|
||||
} else
|
||||
if ( nodeIsCENTER(node) ) {
|
||||
/*\
|
||||
* center: CSS equivalent 'text-align:center'
|
||||
* and 'margin-left:auto; margin-right:auto' on descendant blocks
|
||||
* Tidy already handles this if 'clean' by SILENTLY generating the <style>
|
||||
* and adding a <div class="c1"> around the elements.
|
||||
/* center: CSS equivalent 'text-align:center'
|
||||
* and 'margin-left:auto; margin-right:auto' on descendant blocks
|
||||
* Tidy already handles this if 'clean' by SILENTLY generating the
|
||||
* <style> and adding a <div class="c1"> around the elements.
|
||||
* see: static Bool Center2Div( TidyDocImpl* doc, Node *node, Node **pnode)
|
||||
\*/
|
||||
*/
|
||||
if ( !already_strict )
|
||||
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
|
||||
} else
|
||||
if ( nodeIsDIR(node) ) {
|
||||
/*\
|
||||
* dir: replace by <ul>
|
||||
* Tidy already actions this and issues a warning
|
||||
* Should this be CHANGED???
|
||||
\*/
|
||||
/* dir: replace by <ul>
|
||||
* Tidy already actions this and issues a warning
|
||||
* Should this be CHANGED???
|
||||
*/
|
||||
if ( !already_strict )
|
||||
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
|
||||
} else
|
||||
if ( nodeIsFONT(node) ) {
|
||||
/*\
|
||||
* Tidy already handles this -
|
||||
/* Tidy already handles this -
|
||||
* If 'clean' replaced by CSS, else
|
||||
* if is NOT clean, and doctype html5 then warnings issued
|
||||
* done in Bool Font2Span( TidyDocImpl* doc, Node *node, Node **pnode ) (I think?)
|
||||
*
|
||||
\*/
|
||||
*/
|
||||
if ( !already_strict )
|
||||
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
|
||||
} else
|
||||
if (( nodesIsFRAME(node) ) || ( nodeIsFRAMESET(node) ) || ( nodeIsNOFRAMES(node) )) {
|
||||
/*\
|
||||
* YOW: What to do here?????? Maybe <iframe>????
|
||||
\*/
|
||||
/* YOW: What to do here?????? Maybe <iframe>????
|
||||
*/
|
||||
if ( !already_strict )
|
||||
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
|
||||
} else
|
||||
if ( nodeIsSTRIKE(node) ) {
|
||||
/*\
|
||||
* strike: CSS equivalent 'text-decoration:line-through'
|
||||
/* strike: CSS equivalent 'text-decoration:line-through'
|
||||
* maybe should use static void RenameElem( TidyDocImpl* doc, Node* node, TidyTagId tid )
|
||||
\*/
|
||||
*/
|
||||
if (clean) {
|
||||
TY_(AddStyleProperty)( doc, node, "text-decoration: line-through" );
|
||||
TY_(CoerceNode)(doc, node, TidyTag_SPAN, warn, no);
|
||||
} else {
|
||||
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
|
||||
if ( !already_strict )
|
||||
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
|
||||
}
|
||||
} else
|
||||
if ( nodeIsTT(node) ) {
|
||||
/*\
|
||||
* tt: CSS equivalent 'font-family:monospace'
|
||||
/* tt: CSS equivalent 'font-family:monospace'
|
||||
* Tidy presently does nothing. Tidy5 issues a warning
|
||||
* But like the 'clean' <font> replacement this could also be replaced with CSS
|
||||
* maybe should use static void RenameElem( TidyDocImpl* doc, Node* node, TidyTagId tid )
|
||||
*
|
||||
\*/
|
||||
*/
|
||||
if (clean) {
|
||||
TY_(AddStyleProperty)( doc, node, "font-family: monospace" );
|
||||
TY_(CoerceNode)(doc, node, TidyTag_SPAN, warn, no);
|
||||
} else {
|
||||
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
|
||||
if ( !already_strict )
|
||||
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
|
||||
}
|
||||
} else
|
||||
if (TY_(nodeIsElement)(node)) {
|
||||
if (node->tag) {
|
||||
if ((!(node->tag->versions & VERS_HTML5))||(inRemovedInfo(node->tag->id))) {
|
||||
/* issue warning for elements like 'markquee' */
|
||||
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
|
||||
if (TY_(nodeIsElement)(node)) {
|
||||
if (node->tag) {
|
||||
if ( (!(node->tag->versions & VERS_HTML5) && !(node->tag->versions & VERS_PROPRIETARY)) || (inRemovedInfo(node->tag->id)) ) {
|
||||
if ( !already_strict )
|
||||
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (node->content)
|
||||
TY_(CheckHTML5)( doc, node->content );
|
||||
|
@ -1546,9 +1565,102 @@ void TY_(CheckHTML5)( TidyDocImpl* doc, Node* node )
|
|||
node = node->next;
|
||||
}
|
||||
}
|
||||
/* END HTML5 STUFF
|
||||
######################################################################################
|
||||
/*****************************************************************************
|
||||
* END HTML5 STUFF
|
||||
*****************************************************************************/
|
||||
|
||||
|
||||
/*
|
||||
* Check and report HTML tags and attributes that are:
|
||||
* - Proprietary, and/or
|
||||
* - Not supported in the current version of HTML, defined as the version
|
||||
* of HTML that we are emitting.
|
||||
* Proprietary items are reported as WARNINGS, and version mismatches will
|
||||
* be reported as WARNING or ERROR in the following conditions:
|
||||
* - ERROR if the emitted doctype is a strict doctype.
|
||||
* - WARNING if the emitted doctype is a non-strict doctype.
|
||||
* The propriety checks are *always* run as they have always been an integral
|
||||
* part of Tidy. The version checks are controlled by `strict-tags-attributes`.
|
||||
*/
|
||||
void TY_(CheckHTMLTagsAttribsVersions)( TidyDocImpl* doc, Node* node )
|
||||
{
|
||||
uint versionEmitted = doc->lexer->versionEmitted;
|
||||
uint declared = doc->lexer->doctype;
|
||||
uint version = versionEmitted == 0 ? declared : versionEmitted;
|
||||
int tagReportType = VERS_STRICT & version ? ELEMENT_VERS_MISMATCH_ERROR : ELEMENT_VERS_MISMATCH_WARN;
|
||||
int attrReportType = VERS_STRICT & version ? MISMATCHED_ATTRIBUTE_ERROR : MISMATCHED_ATTRIBUTE_WARN;
|
||||
Bool check_versions = cfgBool( doc, TidyStrictTagsAttr );
|
||||
AttVal *next_attr, *attval;
|
||||
Bool attrIsProprietary = no;
|
||||
Bool attrIsMismatched = yes;
|
||||
|
||||
while (node)
|
||||
{
|
||||
/* This bit here handles our HTML tags */
|
||||
if ( TY_(nodeIsElement)(node) && node->tag ) {
|
||||
|
||||
/* Leave XML stuff alone. */
|
||||
if ( !cfgBool(doc, TidyXmlTags) )
|
||||
{
|
||||
/* Version mismatches take priority. */
|
||||
if ( check_versions && !(node->tag->versions & version) )
|
||||
{
|
||||
TY_(ReportError)(doc, NULL, node, tagReportType );
|
||||
}
|
||||
/* If it's not mismatched, it could still be proprietary. */
|
||||
else if ( node->tag->versions & VERS_PROPRIETARY )
|
||||
{
|
||||
if ( !cfgBool(doc, TidyMakeClean) ||
|
||||
( !nodeIsNOBR(node) && !nodeIsWBR(node) ) )
|
||||
{
|
||||
TY_(ReportError)(doc, NULL, node, PROPRIETARY_ELEMENT );
|
||||
|
||||
if ( nodeIsLAYER(node) )
|
||||
doc->badLayout |= USING_LAYER;
|
||||
else if ( nodeIsSPACER(node) )
|
||||
doc->badLayout |= USING_SPACER;
|
||||
else if ( nodeIsNOBR(node) )
|
||||
doc->badLayout |= USING_NOBR;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* And this bit here handles our attributes */
|
||||
if (TY_(nodeIsElement)(node))
|
||||
{
|
||||
attval = node->attributes;
|
||||
|
||||
while (attval)
|
||||
{
|
||||
next_attr = attval->next;
|
||||
|
||||
attrIsProprietary = TY_(AttributeIsProprietary)(node, attval);
|
||||
attrIsMismatched = check_versions ? TY_(AttributeIsMismatched)(node, attval, doc) : no;
|
||||
/* Let the PROPRIETARY_ATTRIBUTE warning have precedence. */
|
||||
if ( attrIsProprietary )
|
||||
TY_(ReportAttrError)(doc, node, attval, PROPRIETARY_ATTRIBUTE);
|
||||
else if ( attrIsMismatched )
|
||||
{
|
||||
TY_(ReportAttrError)(doc, node, attval, attrReportType);
|
||||
}
|
||||
|
||||
/* @todo: do we need a new option to drop mismatches? Or should we
|
||||
simply drop them? */
|
||||
if ( ( attrIsProprietary || attrIsMismatched ) && cfgBool(doc, TidyDropPropAttrs) )
|
||||
TY_(RemoveAttribute)( doc, node, attval );
|
||||
|
||||
attval = next_attr;
|
||||
}
|
||||
}
|
||||
|
||||
if (node->content)
|
||||
TY_(CheckHTMLTagsAttribsVersions)( doc, node->content );
|
||||
|
||||
node = node->next;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
/* *** FOR DEBUG ONLY *** */
|
||||
|
@ -1686,7 +1798,6 @@ int tidyDocCleanAndRepair( TidyDocImpl* doc )
|
|||
Bool tidyXmlTags = cfgBool( doc, TidyXmlTags );
|
||||
Bool wantNameAttr = cfgBool( doc, TidyAnchorAsName );
|
||||
Bool mergeEmphasis = cfgBool( doc, TidyMergeEmphasis );
|
||||
ctmbstr sdef = NULL;
|
||||
Node* node;
|
||||
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
|
@ -1747,12 +1858,7 @@ int tidyDocCleanAndRepair( TidyDocImpl* doc )
|
|||
|
||||
/* remember given doctype for reporting */
|
||||
node = TY_(FindDocType)(doc);
|
||||
sdef = tidyOptGetValue((TidyDoc)doc, TidyDoctype );
|
||||
if (!sdef)
|
||||
sdef = tidyOptGetCurrPick((TidyDoc) doc, TidyDoctypeMode );
|
||||
if (sdef && (strcmp(sdef,"html5") == 0)) {
|
||||
TY_(CheckHTML5)( doc, &doc->root );
|
||||
}
|
||||
|
||||
if (node)
|
||||
{
|
||||
AttVal* fpi = TY_(GetAttrByName)(node, "PUBLIC");
|
||||
|
@ -1798,6 +1904,14 @@ int tidyDocCleanAndRepair( TidyDocImpl* doc )
|
|||
if ( xmlOut && xmlDecl )
|
||||
TY_(FixXmlDecl)( doc );
|
||||
|
||||
/* At this point the apparent doctype is going to be as stable as
|
||||
it can ever be, so we can start detecting things that shouldn't
|
||||
be in this version of HTML
|
||||
*/
|
||||
if (doc->lexer->versionEmitted & VERS_HTML5)
|
||||
TY_(CheckHTML5)( doc, &doc->root );
|
||||
TY_(CheckHTMLTagsAttribsVersions)( doc, &doc->root );
|
||||
|
||||
#if !defined(NDEBUG) && defined(_MSC_VER)
|
||||
SPRTF("All nodes AFTER clean and repair\n");
|
||||
dbg_show_all_nodes( doc, &doc->root, 0 );
|
||||
|
|
|
@ -1,2 +1,3 @@
|
|||
5.1.38
|
||||
5.1.39
|
||||
2016.02.16
|
||||
|
||||
|
|
Loading…
Reference in a new issue