Merge pull request #372 from htacg/attrdict_phase2

Attrdict phase2 - enforce strict tags and attributes
This commit is contained in:
Jim Derry 2016-02-16 11:12:32 +08:00
commit 468cc02cf3
11 changed files with 347 additions and 144 deletions

View file

@ -479,11 +479,12 @@ uint TY_(NodeAttributeVersions)( Node* node, TidyAttrId id )
return VERS_UNKNOWN;
}
/* returns true if the element is a W3C defined element */
/* but the element/attribute combination is not. We're */
/* only defining as "proprietary" items that are not in */
/* the element's AttrVersion structure. */
static Bool AttributeIsProprietary(Node* node, AttVal* attval)
/* returns true if the element is a W3C defined element
* but the element/attribute combination is not. We're
* only defining as "proprietary" items that are not in
* the element's AttrVersion structure.
*/
Bool TY_(AttributeIsProprietary)(Node* node, AttVal* attval)
{
if (!node || !attval)
return no;
@ -500,6 +501,34 @@ static Bool AttributeIsProprietary(Node* node, AttVal* attval)
return yes;
}
/* returns true if the element is a W3C defined element
* but the element/attribute combination is not. We're
* considering it a mismatch if the document version
* does not allow the attribute as called out in its
* AttrVersion structure.
*/
Bool TY_(AttributeIsMismatched)(Node* node, AttVal* attval, TidyDocImpl* doc)
{
uint doctype;
if (!node || !attval)
return no;
if (!node->tag)
return no;
if (!(node->tag->versions & VERS_ALL))
return no;
doctype = doc->lexer->versionEmitted == 0 ? doc->lexer->doctype : doc->lexer->versionEmitted;
if (AttributeVersions(node, attval) & doctype)
return no;
return yes;
}
/* used by CheckColor() */
struct _colors
{
@ -1358,14 +1387,6 @@ const Attribute* TY_(CheckAttribute)( TidyDocImpl* doc, Node *node, AttVal *attv
attribute->attrchk( doc, node, attval );
}
if (AttributeIsProprietary(node, attval))
{
TY_(ReportAttrError)(doc, node, attval, PROPRIETARY_ATTRIBUTE);
if (cfgBool(doc, TidyDropPropAttrs))
TY_(RemoveAttribute)( doc, node, attval );
}
return attribute;
}

View file

@ -147,6 +147,10 @@ AttVal* TY_(AttrGetById)( Node* node, TidyAttrId id );
uint TY_(NodeAttributeVersions)( Node* node, TidyAttrId id );
Bool TY_(AttributeIsProprietary)(Node* node, AttVal* attval);
Bool TY_(AttributeIsMismatched)(Node* node, AttVal* attval, TidyDocImpl* doc);
/* 0 == TidyAttr_UNKNOWN */
#define AttrId(av) ((av) && (av)->dict ? (av)->dict->id : TidyAttr_UNKNOWN)
#define AttrIsId(av, atid) ((av) && (av)->dict && ((av)->dict->id == atid))

View file

@ -1917,6 +1917,7 @@ void TY_(CleanWord2000)( TidyDocImpl* doc, Node *node)
/* used to a list from a sequence of bulletted p's */
Lexer* lexer = doc->lexer;
Node* list = NULL;
AttVal *next_attr, *attval;
while ( node )
{
@ -1928,6 +1929,19 @@ void TY_(CleanWord2000)( TidyDocImpl* doc, Node *node)
!cfgBool(doc, TidyMakeBare) )
return;
/* Output proprietary attributes to maintain errout compatability
* with traditional Tidy. This is a result of moving all of the
* proprietary checks to near the end of the cleanup process,
* meaning this result would not ordinarily be displayed.
*/
attval = node->attributes;
while ( attval ) {
next_attr = attval->next;
if ( strcmp(attval->attribute, "xmlns") != 0 )
TY_(ReportAttrError)(doc, node, attval, PROPRIETARY_ATTRIBUTE);
attval = next_attr;
}
TY_(FreeAttrs)( doc, node );
}
@ -2001,6 +2015,12 @@ void TY_(CleanWord2000)( TidyDocImpl* doc, Node *node)
/* discards <o:p> which encodes the paragraph mark */
if ( node->tag && TY_(tmbstrcmp)(node->tag->name,"o:p")==0)
{
/* Output proprietary elements to maintain errout compatability
* with traditional Tidy. This is a result of moving all of the
* proprietary checks to near the end of the cleanup process,
* meaning this result would not ordinarily be displayed.
*/
TY_(ReportError)(doc, NULL, node, PROPRIETARY_ELEMENT);
Node* next;
DiscardContainer( doc, node, &next );
node = next;

View file

@ -322,7 +322,7 @@ static const TidyOptionImpl option_defs[] =
{ TidyAnchorAsName, MU, "anchor-as-name", BL, yes, ParseBool, boolPicks },
{ TidyPPrintTabs, PP, "indent-with-tabs", BL, no, ParseTabs, boolPicks }, /* 20150515 - Issue #108 */
{ TidySkipNested, MU, "skip-nested", BL, yes, ParseBool, boolPicks }, /* 1642186 - Issue #65 */
{ TidyStrictTagsAttr, MU, "strict-tags-attributes", BL, yes, ParseBool, boolPicks }, /* 20160209 - Issue #350 */
{ TidyStrictTagsAttr, MU, "strict-tags-attributes", BL, no, ParseBool, boolPicks }, /* 20160209 - Issue #350 */
{ N_TIDY_OPTIONS, XX, NULL, XY, 0, NULL, NULL }
};

View file

@ -262,6 +262,8 @@ static const tidyErrorFilterKeyItem tidyErrorFilterKeysStruct[] = {
{ "CANT_BE_NESTED", CANT_BE_NESTED },
{ "OBSOLETE_ELEMENT", OBSOLETE_ELEMENT },
{ "PROPRIETARY_ELEMENT", PROPRIETARY_ELEMENT },
{ "ELEMENT_VERS_MISMATCH_ERROR", ELEMENT_VERS_MISMATCH_ERROR },
{ "ELEMENT_VERS_MISMATCH_WARN", ELEMENT_VERS_MISMATCH_WARN },
{ "UNKNOWN_ELEMENT", UNKNOWN_ELEMENT },
{ "TRIM_EMPTY_ELEMENT", TRIM_EMPTY_ELEMENT },
{ "COERCE_TO_ENDTAG", COERCE_TO_ENDTAG },
@ -298,6 +300,8 @@ static const tidyErrorFilterKeyItem tidyErrorFilterKeysStruct[] = {
{ "BAD_ATTRIBUTE_VALUE", BAD_ATTRIBUTE_VALUE },
{ "UNEXPECTED_GT", UNEXPECTED_GT },
{ "PROPRIETARY_ATTRIBUTE", PROPRIETARY_ATTRIBUTE },
{ "MISMATCHED_ATTRIBUTE_ERROR", MISMATCHED_ATTRIBUTE_ERROR },
{ "MISMATCHED_ATTRIBUTE_WARN", MISMATCHED_ATTRIBUTE_WARN },
{ "PROPRIETARY_ATTR_VALUE", PROPRIETARY_ATTR_VALUE },
{ "REPEATED_ATTRIBUTE", REPEATED_ATTRIBUTE },
{ "MISSING_IMAGEMAP", MISSING_IMAGEMAP },
@ -322,8 +326,6 @@ static const tidyErrorFilterKeyItem tidyErrorFilterKeysStruct[] = {
{ "MISSING_ATTRIBUTE", MISSING_ATTRIBUTE },
{ "WHITE_IN_URI", WHITE_IN_URI },
{ "REMOVED_HTML5", REMOVED_HTML5 },
{ "BAD_BODY_HTML5", BAD_BODY_HTML5 },
{ "BAD_ALIGN_HTML5", BAD_ALIGN_HTML5 },
{ "BAD_SUMMARY_HTML5", BAD_SUMMARY_HTML5 },
{ "PREVIOUS_LOCATION", PREVIOUS_LOCATION },
{ "VENDOR_SPECIFIC_CHARS", VENDOR_SPECIFIC_CHARS },

View file

@ -350,6 +350,8 @@ static languageDefinition language_en = { whichPluralForm_en, {
{ MISSING_ATTR_VALUE, 0, "%s attribute \"%s\" lacks value" }, /* Warning in CheckUrl, Error otherwise */
{ UNKNOWN_ATTRIBUTE, 0, "%s unknown attribute \"%s\"" }, /* Error */
{ PROPRIETARY_ATTRIBUTE, 0, "%s proprietary attribute \"%s\"" }, /* Error */
{ MISMATCHED_ATTRIBUTE_ERROR, 0, "%s attribute \"%s\" not allowed for %s" }, /* Error */
{ MISMATCHED_ATTRIBUTE_WARN, 0, "%s attribute \"%s\" not allowed for %s" }, /* Warning */
{ JOINING_ATTRIBUTE, 0, "%s joining values of repeated attribute \"%s\"" }, /* Error */
{ XML_ATTRIBUTE_VALUE, 0, "%s has XML attribute \"%s\"" }, /* Error (but deprecated) */
@ -392,8 +394,6 @@ static languageDefinition language_en = { whichPluralForm_en, {
{ OBSOLETE_ELEMENT, 0, "replacing obsolete element %s with %s" }, /* Warning */
{ COERCE_TO_ENDTAG_WARN, 0, "<%s> is probably intended as </%s>" }, /* Warning */
{ REMOVED_HTML5, 0, "%s element removed from HTML5" }, /* Warning */
{ BAD_BODY_HTML5, 0, "Found attribute on body that is obsolete in HTML5. Use CSS" }, /* Warning */
{ BAD_ALIGN_HTML5, 0, "The align attribute on the %s element is obsolete. Use CSS" }, /* Warning */
{ BAD_SUMMARY_HTML5, 0, "The summary attribute on the %s element is obsolete in HTML5" }, /* Warning */
/* ReportNotice */
@ -415,6 +415,8 @@ static languageDefinition language_en = { whichPluralForm_en, {
{ INSERTING_TAG, 0, "inserting implicit <%s>" }, /* Error */
{ CANT_BE_NESTED, 0, "%s can't be nested" }, /* Error */
{ PROPRIETARY_ELEMENT, 0, "%s is not approved by W3C" }, /* Error */
{ ELEMENT_VERS_MISMATCH_ERROR, 0, "%s element not available in %s" }, /* Error */
{ ELEMENT_VERS_MISMATCH_WARN, 0, "%s element not available in %s" }, /* Warning */
{ ILLEGAL_NESTING, 0, "%s shouldn't be nested" }, /* Error */
{ NOFRAMES_CONTENT, 0, "%s not inside 'noframes' element" }, /* Error */
{ UNEXPECTED_END_OF_FILE, 0, "unexpected end of file %s" }, /* Error */
@ -745,7 +747,9 @@ static languageDefinition language_en = { whichPluralForm_en, {
- The strings "Tidy" and "HTML Tidy" are the program name and must not be translated. */
TidyDropPropAttrs, 0,
"This option specifies if Tidy should strip out proprietary attributes, "
"such as Microsoft data binding attributes. "
"such as Microsoft data binding attributes. Additionally attributes "
"that aren't permitted in the output version of HTML will be dropped "
"if used with <code>strict-tags-attributes</code>. "
},
{/* Please use _only_ <code></code>, <em></em>, <strong></strong>, and <br/>.
It's very important that <br/> be self-closing in this manner!
@ -1574,6 +1578,21 @@ static languageDefinition language_en = { whichPluralForm_en, {
"This option specifies that Tidy should skip nested tags when parsing "
"script and style data. "
},
{/* Please use _only_ <code></code>, <em></em>, <strong></strong>, and <br/>.
It's very important that <br/> be self-closing in this manner!
- The strings "Tidy" and "HTML Tidy" are the program name and must not be translated. */
TidyStrictTagsAttr, 0,
"This options ensures that tags and attributes are applicable for the "
"version of HTML that Tidy outputs. When set to <code>yes</code> (the "
"default) and the output document type is a strict doctype, then Tidy "
"will report errors. If the output document type is a loose or "
"transitional doctype, then Tidy will report warnings. "
"<br/>"
"Additionally if <code>drop-proprietary-attributes</code> is enabled, "
"then not applicable attributes will be dropped, too. "
"<br/>"
"When set to <code>no</code>, these checks are not performed. "
},
/********************************************************
** Console Application

View file

@ -2753,26 +2753,8 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
}
else if ( !cfgBool(doc, TidyXmlTags) )
{
Node* curr = lexer->token;
TY_(ConstrainVersion)( doc, curr->tag->versions );
if ( curr->tag->versions & VERS_PROPRIETARY )
{
if ( !cfgBool(doc, TidyMakeClean) ||
( !nodeIsNOBR(curr) && !nodeIsWBR(curr) ) )
{
TY_(ReportError)(doc, NULL, curr, PROPRIETARY_ELEMENT );
if ( nodeIsLAYER(curr) )
doc->badLayout |= USING_LAYER;
else if ( nodeIsSPACER(curr) )
doc->badLayout |= USING_SPACER;
else if ( nodeIsNOBR(curr) )
doc->badLayout |= USING_NOBR;
}
}
TY_(RepairDuplicateAttributes)( doc, curr, no );
TY_(ConstrainVersion)( doc, lexer->token->tag->versions );
TY_(RepairDuplicateAttributes)( doc, lexer->token, no );
} else
TY_(RepairDuplicateAttributes)( doc, lexer->token, yes );
#ifdef TIDY_STORE_ORIGINAL_TEXT

View file

@ -525,6 +525,8 @@ void TY_(ReportAttrError)(TidyDocImpl* doc, Node *node, AttVal *av, uint code)
char const *name = "NULL", *value = "NULL";
char tagdesc[64];
ctmbstr fmt = tidyLocalizedString(code);
uint version;
ctmbstr extra_string;
assert( fmt != NULL );
@ -549,6 +551,22 @@ void TY_(ReportAttrError)(TidyDocImpl* doc, Node *node, AttVal *av, uint code)
messageNode(doc, TidyWarning, code, node, fmt, tagdesc, name);
break;
case MISMATCHED_ATTRIBUTE_WARN:
version = doc->lexer->versionEmitted == 0 ? doc->lexer->doctype : doc->lexer->versionEmitted;
extra_string = TY_(HTMLVersionNameFromCode)(version, 0);
if (!extra_string)
extra_string = tidyLocalizedString(STRING_HTML_PROPRIETARY);
messageNode(doc, TidyWarning, code, node, fmt, tagdesc, name, extra_string);
break;
case MISMATCHED_ATTRIBUTE_ERROR:
version = doc->lexer->versionEmitted == 0 ? doc->lexer->doctype : doc->lexer->versionEmitted;
extra_string = TY_(HTMLVersionNameFromCode)(version, 0);
if (!extra_string)
extra_string = tidyLocalizedString(STRING_HTML_PROPRIETARY);
messageNode(doc, TidyError, code, node, fmt, tagdesc, name, extra_string);
break;
case BAD_ATTRIBUTE_VALUE:
case BAD_ATTRIBUTE_VALUE_REPLACED:
case INVALID_ATTRIBUTE:
@ -665,8 +683,6 @@ void TY_(ReportWarning)(TidyDocImpl* doc, Node *element, Node *node, uint code)
case NESTED_EMPHASIS:
case REMOVED_HTML5:
case BAD_BODY_HTML5:
case BAD_ALIGN_HTML5:
case BAD_SUMMARY_HTML5:
messageNode(doc, TidyWarning, code, rpt, fmt, nodedesc);
break;
@ -707,6 +723,8 @@ void TY_(ReportError)(TidyDocImpl* doc, Node *element, Node *node, uint code)
char elemdesc[ 256 ] = {0};
Node* rpt = ( element ? element : node );
ctmbstr fmt = tidyLocalizedString(code);
uint versionEmitted, declared, version;
ctmbstr extra_string = NULL;
assert( fmt != NULL );
@ -729,6 +747,26 @@ void TY_(ReportError)(TidyDocImpl* doc, Node *element, Node *node, uint code)
messageNode(doc, TidyWarning, code, node, fmt, nodedesc);
break;
case ELEMENT_VERS_MISMATCH_WARN:
versionEmitted = doc->lexer->versionEmitted;
declared = doc->lexer->doctype;
version = versionEmitted == 0 ? declared : versionEmitted;
extra_string = TY_(HTMLVersionNameFromCode)(version, 0);
if (!extra_string)
extra_string = tidyLocalizedString(STRING_HTML_PROPRIETARY);
messageNode(doc, TidyWarning, code, node, fmt, nodedesc, extra_string);
break;
case ELEMENT_VERS_MISMATCH_ERROR:
versionEmitted = doc->lexer->versionEmitted;
declared = doc->lexer->doctype;
version = versionEmitted == 0 ? declared : versionEmitted;
extra_string = TY_(HTMLVersionNameFromCode)(version, 0);
if (!extra_string)
extra_string = tidyLocalizedString(STRING_HTML_PROPRIETARY);
messageNode(doc, TidyError, code, node, fmt, nodedesc, extra_string);
break;
case MISSING_TITLE_ELEMENT:
case INCONSISTENT_VERSION:
case MALFORMED_DOCTYPE:

View file

@ -98,6 +98,8 @@ typedef enum {
CANT_BE_NESTED,
OBSOLETE_ELEMENT,
PROPRIETARY_ELEMENT,
ELEMENT_VERS_MISMATCH_ERROR,
ELEMENT_VERS_MISMATCH_WARN,
UNKNOWN_ELEMENT,
TRIM_EMPTY_ELEMENT,
COERCE_TO_ENDTAG,
@ -137,6 +139,8 @@ typedef enum {
BAD_ATTRIBUTE_VALUE,
UNEXPECTED_GT,
PROPRIETARY_ATTRIBUTE,
MISMATCHED_ATTRIBUTE_ERROR,
MISMATCHED_ATTRIBUTE_WARN,
PROPRIETARY_ATTR_VALUE,
REPEATED_ATTRIBUTE,
MISSING_IMAGEMAP,
@ -168,8 +172,6 @@ typedef enum {
WHITE_IN_URI,
REMOVED_HTML5, /* this element removed from HTML5 */
BAD_BODY_HTML5, /* attr on body removed from HTML5 */
BAD_ALIGN_HTML5, /* use of align attr removed from HTML5 */
BAD_SUMMARY_HTML5, /* use of summary attr removed from HTML5 */
PREVIOUS_LOCATION, /* last */

View file

@ -1300,9 +1300,9 @@ void tidyDocReportDoctype( TidyDocImpl* doc )
}
/* ######################################################################################
HTML5 STUFF
*/
/*****************************************************************************
* HTML5 STUFF
*****************************************************************************/
#if !defined(NDEBUG) && defined(_MSC_VER)
extern void show_not_html5(void);
/* -----------------------------
@ -1358,19 +1358,19 @@ Bool inRemovedInfo( uint tid )
return no;
}
static Bool BadBody5( Node* node )
{
if (TY_(AttrGetById)(node, TidyAttr_BACKGROUND) ||
TY_(AttrGetById)(node, TidyAttr_BGCOLOR) ||
TY_(AttrGetById)(node, TidyAttr_TEXT) ||
TY_(AttrGetById)(node, TidyAttr_LINK) ||
TY_(AttrGetById)(node, TidyAttr_VLINK) ||
TY_(AttrGetById)(node, TidyAttr_ALINK))
{
return yes;
}
return no;
}
/* Things that should not be in an HTML5 body. This is special for CheckHTML5(),
and we might just want to remove CheckHTML5()'s output altogether and count
on the default --strict-tags-attributes.
*/
static BadBody5Attribs[] = {
TidyAttr_BACKGROUND,
TidyAttr_BGCOLOR,
TidyAttr_TEXT,
TidyAttr_LINK,
TidyAttr_VLINK,
TidyAttr_ALINK,
TidyAttr_UNKNOWN /* Must be last! */
};
static Bool nodeHasAlignAttr( Node *node )
{
@ -1383,46 +1383,65 @@ static Bool nodeHasAlignAttr( Node *node )
return no;
}
/* see http://www.whatwg.org/specs/web-apps/current-work/multipage/obsolete.html#obsolete */
/*
* Perform special checks for HTML, even when we're not using the default
* option `--strict-tags-attributes yes`. This will ensure that HTML5 warning
* and error output is given regardless of the new option, and ensure that
* cleanup takes place. This provides mostly consistent Tidy behavior even with
* the introduction of this new option. Note that strings have changed, though,
* in order to maintain consistency with the `--strict-tags-attributes`
* messages.
*
* See also: http://www.whatwg.org/specs/web-apps/current-work/multipage/obsolete.html#obsolete
*/
void TY_(CheckHTML5)( TidyDocImpl* doc, Node* node )
{
/* Lexer* lexer = doc->lexer; */
Bool clean = cfgBool( doc, TidyMakeClean );
Bool already_strict = cfgBool( doc, TidyStrictTagsAttr );
Node* body = TY_(FindBody)( doc );
Bool warn = yes; /* should this be a warning, error, or report??? */
AttVal* attr = NULL;
int i = 0;
#if !defined(NDEBUG) && defined(_MSC_VER)
// list_not_html5();
#endif
while (node)
{
if ( nodeHasAlignAttr( node ) ) {
/*\
* Is this for ALL elements that accept an 'align' attribute, or should
* this be a sub-set test
\*/
TY_(ReportWarning)(doc, node, node, BAD_ALIGN_HTML5);
/* @todo: Is this for ALL elements that accept an 'align' attribute,
* or should this be a sub-set test?
*/
/* We will only emit this message if `--strict-tags-attributes==no`;
* otherwise if yes this message will be output during later
* checking.
*/
if ( !already_strict )
TY_(ReportAttrError)(doc, node, TY_(AttrGetById)(node, TidyAttr_ALIGN), MISMATCHED_ATTRIBUTE_WARN);
}
if ( node == body ) {
if ( BadBody5(body) ) {
/* perhaps need a new/different warning for this, like
* The background 'attribute" on the body element is obsolete. Use CSS instead.
* but how to pass an attribute name to be embedded in the message.
\*/
TY_(ReportWarning)(doc, node, body, BAD_BODY_HTML5);
i = 0;
/* We will only emit these messages if `--strict-tags-attributes==no`;
* otherwise if yes these messages will be output during later
* checking.
*/
if ( !already_strict ) {
while ( BadBody5Attribs[i] != TidyAttr_UNKNOWN ) {
attr = TY_(AttrGetById)(node, BadBody5Attribs[i]);
if ( attr )
TY_(ReportAttrError)(doc, node, attr , MISMATCHED_ATTRIBUTE_WARN);
i++;
}
}
} else
if ( nodeIsACRONYM(node) ) {
if (clean) {
/* replace with 'abbr' with warning to that effect
* maybe should use static void RenameElem( TidyDocImpl* doc, Node* node, TidyTagId tid )
/* Replace with 'abbr' with warning to that effect.
* Maybe should use static void RenameElem( TidyDocImpl* doc, Node* node, TidyTagId tid )
*/
TY_(CoerceNode)(doc, node, TidyTag_ABBR, warn, no);
} else {
/* sadly, this stops writing of the tidied document, unless 'forced'
TY_(ReportError)(doc, node, node, REMOVED_HTML5);
so go back to a 'warning' for now...
*/
if ( !already_strict )
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
}
} else
@ -1433,22 +1452,24 @@ void TY_(CheckHTML5)( TidyDocImpl* doc, Node* node )
*/
TY_(CoerceNode)(doc, node, TidyTag_OBJECT, warn, no);
} else {
if ( !already_strict )
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
}
} else
if ( nodeIsBASEFONT(node) ) {
/*\
* basefont: CSS equivalen 'font-size', 'font-family' and 'color' on body or class on each subsequent element
* Difficult - If it is the first body element, then could consider adding that
* to the <body> as a whole, else could perhaps apply it to all subsequent element.
* But also in consideration is the fact that it was NOT supported in many browsers
* For now just report a warning
\*/
/* basefont: CSS equivalent 'font-size', 'font-family' and 'color'
* on body or class on each subsequent element.
* Difficult - If it is the first body element, then could consider
* adding that to the <body> as a whole, else could perhaps apply it
* to all subsequent elements. But also in consideration is the fact
* that it was NOT supported in many browsers.
* - For now just report a warning
*/
if ( !already_strict )
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
} else
if ( nodeIsBIG(node) ) {
/*\
* big: CSS equivalent 'font-size:larger'
/* big: CSS equivalent 'font-size:larger'
* so could replace the <big> ... </big> with
* <span style="font-size: larger"> ... </span>
* then replace <big> with <span>
@ -1461,80 +1482,78 @@ void TY_(CheckHTML5)( TidyDocImpl* doc, Node* node )
* Also maybe need a specific message like
* Element '%s' replaced with 'span' with a 'font-size: larger style attribute
* maybe should use static void RenameElem( TidyDocImpl* doc, Node* node, TidyTagId tid )
*
\*/
*/
if (clean) {
TY_(AddStyleProperty)( doc, node, "font-size: larger" );
TY_(CoerceNode)(doc, node, TidyTag_SPAN, warn, no);
} else {
if ( !already_strict )
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
}
} else
if ( nodeIsCENTER(node) ) {
/*\
* center: CSS equivalent 'text-align:center'
/* center: CSS equivalent 'text-align:center'
* and 'margin-left:auto; margin-right:auto' on descendant blocks
* Tidy already handles this if 'clean' by SILENTLY generating the <style>
* and adding a <div class="c1"> around the elements.
* Tidy already handles this if 'clean' by SILENTLY generating the
* <style> and adding a <div class="c1"> around the elements.
* see: static Bool Center2Div( TidyDocImpl* doc, Node *node, Node **pnode)
\*/
*/
if ( !already_strict )
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
} else
if ( nodeIsDIR(node) ) {
/*\
* dir: replace by <ul>
/* dir: replace by <ul>
* Tidy already actions this and issues a warning
* Should this be CHANGED???
\*/
*/
if ( !already_strict )
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
} else
if ( nodeIsFONT(node) ) {
/*\
* Tidy already handles this -
/* Tidy already handles this -
* If 'clean' replaced by CSS, else
* if is NOT clean, and doctype html5 then warnings issued
* done in Bool Font2Span( TidyDocImpl* doc, Node *node, Node **pnode ) (I think?)
*
\*/
*/
if ( !already_strict )
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
} else
if (( nodesIsFRAME(node) ) || ( nodeIsFRAMESET(node) ) || ( nodeIsNOFRAMES(node) )) {
/*\
* YOW: What to do here?????? Maybe <iframe>????
\*/
/* YOW: What to do here?????? Maybe <iframe>????
*/
if ( !already_strict )
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
} else
if ( nodeIsSTRIKE(node) ) {
/*\
* strike: CSS equivalent 'text-decoration:line-through'
/* strike: CSS equivalent 'text-decoration:line-through'
* maybe should use static void RenameElem( TidyDocImpl* doc, Node* node, TidyTagId tid )
\*/
*/
if (clean) {
TY_(AddStyleProperty)( doc, node, "text-decoration: line-through" );
TY_(CoerceNode)(doc, node, TidyTag_SPAN, warn, no);
} else {
if ( !already_strict )
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
}
} else
if ( nodeIsTT(node) ) {
/*\
* tt: CSS equivalent 'font-family:monospace'
/* tt: CSS equivalent 'font-family:monospace'
* Tidy presently does nothing. Tidy5 issues a warning
* But like the 'clean' <font> replacement this could also be replaced with CSS
* maybe should use static void RenameElem( TidyDocImpl* doc, Node* node, TidyTagId tid )
*
\*/
*/
if (clean) {
TY_(AddStyleProperty)( doc, node, "font-family: monospace" );
TY_(CoerceNode)(doc, node, TidyTag_SPAN, warn, no);
} else {
if ( !already_strict )
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
}
} else
if (TY_(nodeIsElement)(node)) {
if (node->tag) {
if ((!(node->tag->versions & VERS_HTML5))||(inRemovedInfo(node->tag->id))) {
/* issue warning for elements like 'markquee' */
if ( (!(node->tag->versions & VERS_HTML5) && !(node->tag->versions & VERS_PROPRIETARY)) || (inRemovedInfo(node->tag->id)) ) {
if ( !already_strict )
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
}
}
@ -1546,9 +1565,102 @@ void TY_(CheckHTML5)( TidyDocImpl* doc, Node* node )
node = node->next;
}
}
/* END HTML5 STUFF
######################################################################################
/*****************************************************************************
* END HTML5 STUFF
*****************************************************************************/
/*
* Check and report HTML tags and attributes that are:
* - Proprietary, and/or
* - Not supported in the current version of HTML, defined as the version
* of HTML that we are emitting.
* Proprietary items are reported as WARNINGS, and version mismatches will
* be reported as WARNING or ERROR in the following conditions:
* - ERROR if the emitted doctype is a strict doctype.
* - WARNING if the emitted doctype is a non-strict doctype.
* The propriety checks are *always* run as they have always been an integral
* part of Tidy. The version checks are controlled by `strict-tags-attributes`.
*/
void TY_(CheckHTMLTagsAttribsVersions)( TidyDocImpl* doc, Node* node )
{
uint versionEmitted = doc->lexer->versionEmitted;
uint declared = doc->lexer->doctype;
uint version = versionEmitted == 0 ? declared : versionEmitted;
int tagReportType = VERS_STRICT & version ? ELEMENT_VERS_MISMATCH_ERROR : ELEMENT_VERS_MISMATCH_WARN;
int attrReportType = VERS_STRICT & version ? MISMATCHED_ATTRIBUTE_ERROR : MISMATCHED_ATTRIBUTE_WARN;
Bool check_versions = cfgBool( doc, TidyStrictTagsAttr );
AttVal *next_attr, *attval;
Bool attrIsProprietary = no;
Bool attrIsMismatched = yes;
while (node)
{
/* This bit here handles our HTML tags */
if ( TY_(nodeIsElement)(node) && node->tag ) {
/* Leave XML stuff alone. */
if ( !cfgBool(doc, TidyXmlTags) )
{
/* Version mismatches take priority. */
if ( check_versions && !(node->tag->versions & version) )
{
TY_(ReportError)(doc, NULL, node, tagReportType );
}
/* If it's not mismatched, it could still be proprietary. */
else if ( node->tag->versions & VERS_PROPRIETARY )
{
if ( !cfgBool(doc, TidyMakeClean) ||
( !nodeIsNOBR(node) && !nodeIsWBR(node) ) )
{
TY_(ReportError)(doc, NULL, node, PROPRIETARY_ELEMENT );
if ( nodeIsLAYER(node) )
doc->badLayout |= USING_LAYER;
else if ( nodeIsSPACER(node) )
doc->badLayout |= USING_SPACER;
else if ( nodeIsNOBR(node) )
doc->badLayout |= USING_NOBR;
}
}
}
}
/* And this bit here handles our attributes */
if (TY_(nodeIsElement)(node))
{
attval = node->attributes;
while (attval)
{
next_attr = attval->next;
attrIsProprietary = TY_(AttributeIsProprietary)(node, attval);
attrIsMismatched = check_versions ? TY_(AttributeIsMismatched)(node, attval, doc) : no;
/* Let the PROPRIETARY_ATTRIBUTE warning have precedence. */
if ( attrIsProprietary )
TY_(ReportAttrError)(doc, node, attval, PROPRIETARY_ATTRIBUTE);
else if ( attrIsMismatched )
{
TY_(ReportAttrError)(doc, node, attval, attrReportType);
}
/* @todo: do we need a new option to drop mismatches? Or should we
simply drop them? */
if ( ( attrIsProprietary || attrIsMismatched ) && cfgBool(doc, TidyDropPropAttrs) )
TY_(RemoveAttribute)( doc, node, attval );
attval = next_attr;
}
}
if (node->content)
TY_(CheckHTMLTagsAttribsVersions)( doc, node->content );
node = node->next;
}
}
#if !defined(NDEBUG) && defined(_MSC_VER)
/* *** FOR DEBUG ONLY *** */
@ -1686,7 +1798,6 @@ int tidyDocCleanAndRepair( TidyDocImpl* doc )
Bool tidyXmlTags = cfgBool( doc, TidyXmlTags );
Bool wantNameAttr = cfgBool( doc, TidyAnchorAsName );
Bool mergeEmphasis = cfgBool( doc, TidyMergeEmphasis );
ctmbstr sdef = NULL;
Node* node;
#if !defined(NDEBUG) && defined(_MSC_VER)
@ -1747,12 +1858,7 @@ int tidyDocCleanAndRepair( TidyDocImpl* doc )
/* remember given doctype for reporting */
node = TY_(FindDocType)(doc);
sdef = tidyOptGetValue((TidyDoc)doc, TidyDoctype );
if (!sdef)
sdef = tidyOptGetCurrPick((TidyDoc) doc, TidyDoctypeMode );
if (sdef && (strcmp(sdef,"html5") == 0)) {
TY_(CheckHTML5)( doc, &doc->root );
}
if (node)
{
AttVal* fpi = TY_(GetAttrByName)(node, "PUBLIC");
@ -1798,6 +1904,14 @@ int tidyDocCleanAndRepair( TidyDocImpl* doc )
if ( xmlOut && xmlDecl )
TY_(FixXmlDecl)( doc );
/* At this point the apparent doctype is going to be as stable as
it can ever be, so we can start detecting things that shouldn't
be in this version of HTML
*/
if (doc->lexer->versionEmitted & VERS_HTML5)
TY_(CheckHTML5)( doc, &doc->root );
TY_(CheckHTMLTagsAttribsVersions)( doc, &doc->root );
#if !defined(NDEBUG) && defined(_MSC_VER)
SPRTF("All nodes AFTER clean and repair\n");
dbg_show_all_nodes( doc, &doc->root, 0 );

View file

@ -1,2 +1,3 @@
5.1.38
5.1.39
2016.02.16