raft of changes to CheckHTML5, and clean

This commit is contained in:
Geoff McLane 2014-08-08 17:14:28 +02:00
parent 7a6a86503e
commit 786b6a99a9
4 changed files with 184 additions and 4 deletions

View file

@ -106,6 +106,8 @@ static struct _msgfmt
{ COERCE_TO_ENDTAG_WARN, "<%s> is probably intended as </%s>" }, /* Warning */
/* HTML5 */
{ REMOVED_HTML5, "%s element removed from HTML5" }, /* Warning */
{ BAD_BODY_HTML5, "Found attribute on body that is obsolete in HTML5. Use CSS" }, /* Warning */
{ BAD_ALIGN_HTML5, "The align attribute on the %s element is obsolete, Use CSS" }, /* Wanring */
/* ReportNotice */
{ TRIM_EMPTY_ELEMENT, "trimming empty %s" }, /* Notice */
@ -1467,8 +1469,10 @@ void TY_(ReportWarning)(TidyDocImpl* doc, Node *element, Node *node, uint code)
messageNode(doc, TidyWarning, rpt, fmt, elemdesc, nodedesc);
break;
case REMOVED_HTML5:
case NESTED_EMPHASIS:
case REMOVED_HTML5:
case BAD_BODY_HTML5:
case BAD_ALIGN_HTML5:
messageNode(doc, TidyWarning, rpt, fmt, nodedesc);
break;
case COERCE_TO_ENDTAG_WARN:
@ -1596,6 +1600,9 @@ void TY_(ReportError)(TidyDocImpl* doc, Node *element, Node *node, uint code)
TagToString(element, elemdesc, sizeof(elemdesc));
messageNode(doc, TidyWarning, rpt, fmt, elemdesc, nodedesc);
break;
case REMOVED_HTML5:
messageNode(doc, TidyError, rpt, fmt, nodedesc);
break;
}
}

View file

@ -155,8 +155,10 @@ void TY_(ReportFatal)(TidyDocImpl* doc, Node* element, Node* node, uint code);
#define WHITE_IN_URI 87
#define REMOVED_HTML5 88 /* this element removed from HTML5 */
#define BAD_BODY_HTML5 89 /* attr on body removed from HTML5 */
#define BAD_ALIGN_HTML5 90 /* use of align attr removed from HTML5 */
#define PREVIOUS_LOCATION 89 /* last */
#define PREVIOUS_LOCATION 91 /* last */
/* character encoding errors */

View file

@ -230,6 +230,9 @@ uint TY_(nodeHeaderLevel)( Node* node ); /* 1, 2, ..., 6 */
/* HTML5 */
#define nodeIsDATALIST( node ) TagIsId( node, TidyTag_DATALIST )
/* NOT in HTML 5 */
#define nodeIsACRONYM( node ) TagIsId( node, TidyTag_ACRONYM )
#define nodesIsFRAME( node ) TagIsId( node, TidyTag_FRAME )
#define nodeIsTT( node ) TagIsId( node, TidyTag_TT )
#endif /* __TAGS_H__ */

View file

@ -1228,6 +1228,7 @@ int tidyDocRunDiagnostics( TidyDocImpl* doc )
return tidyDocStatus( doc );
}
/* What about <blink>, <s> stike-through, <u> underline */
static struct _html5Info
{
const char *tag;
@ -1259,11 +1260,177 @@ Bool inRemovedInfo( uint tid )
return no;
}
static Bool BadBody5( Node* node )
{
if (TY_(AttrGetById)(node, TidyAttr_BACKGROUND) ||
TY_(AttrGetById)(node, TidyAttr_BGCOLOR) ||
TY_(AttrGetById)(node, TidyAttr_TEXT) ||
TY_(AttrGetById)(node, TidyAttr_LINK) ||
TY_(AttrGetById)(node, TidyAttr_VLINK) ||
TY_(AttrGetById)(node, TidyAttr_ALINK))
{
return yes;
}
return no;
}
static Bool nodeHasAlignAttr( Node *node )
{
/* #define attrIsALIGN(av) AttrIsId( av, TidyAttr_ALIGN ) */
AttVal* av;
for ( av = node->attributes; av != NULL; av = av->next ) {
if (attrIsALIGN(av))
return yes;
}
return no;
}
/* see http://www.whatwg.org/specs/web-apps/current-work/multipage/obsolete.html#obsolete */
void TY_(CheckHTML5)( TidyDocImpl* doc, Node* node )
{
/* Lexer* lexer = doc->lexer; */
Bool clean = cfgBool( doc, TidyMakeClean );
Node* body = TY_(FindBody)( doc );
Bool warn = yes; /* should this be a warning, error, or report??? */
while (node)
{
if ( nodeHasAlignAttr( node ) ) {
/*\
* Is this for ALL elements that accept an 'align' attribute, or should
* this be a sub-set test
\*/
TY_(ReportWarning)(doc, node, node, BAD_ALIGN_HTML5);
}
if ( node == body ) {
if ( BadBody5(body) ) {
/* perhaps need a new/different warning for this, like
* The background 'attribute" on the body element is obsolete. Use CSS instead.
* but how to pass an attribute name to be embedded in the message.
\*/
TY_(ReportWarning)(doc, node, body, BAD_BODY_HTML5);
}
} else
if ( nodeIsACRONYM(node) ) {
if (clean) {
/* replace with 'abbr' with warning to that effect
* maybe should use static void RenameElem( TidyDocImpl* doc, Node* node, TidyTagId tid )
*/
TY_(CoerceNode)(doc, node, TidyTag_ABBR, warn, no);
} else {
/* sadly, this stops writing of the tidied document, unless 'forced'
TY_(ReportError)(doc, node, node, REMOVED_HTML5);
so go back to a 'warning' for now...
*/
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
}
} else
if ( nodeIsAPPLET(node) ) {
if (clean) {
/* replace with 'object' with warning to that effect
* maybe should use static void RenameElem( TidyDocImpl* doc, Node* node, TidyTagId tid )
*/
TY_(CoerceNode)(doc, node, TidyTag_OBJECT, warn, no);
} else {
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
}
} else
if ( nodeIsBASEFONT(node) ) {
/*\
* basefont: CSS equivalen 'font-size', 'font-family' and 'color' on body or class on each subsequent element
* Difficult - If it is the first body element, then could consider adding that
* to the <body> as a whole, else could perhaps apply it to all subsequent element.
* But also in consideration is the fact that it was NOT supported in many browsers
* For now just report a warning
\*/
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
} else
if ( nodeIsBIG(node) ) {
/*\
* big: CSS equivalent 'font-size:larger'
* so could replace the <big> ... </big> with
* <span style="font-size: larger"> ... </span>
* then replace <big> with <span>
* Need to think about that...
* Could use -
* TY_(AddStyleProperty)( doc, node, "font-size: larger" );
* TY_(CoerceNode)(doc, node, TidyTag_SPAN, no, no);
* Alternatively generated a <style> but how to get the style name
* TY_(AddAttribute)( doc, node, "class", "????" );
* Also maybe need a specific message like
* Element '%s' replaced with 'span' with a 'font-size: larger style attribute
* maybe should use static void RenameElem( TidyDocImpl* doc, Node* node, TidyTagId tid )
*
\*/
if (clean) {
TY_(AddStyleProperty)( doc, node, "font-size: larger" );
TY_(CoerceNode)(doc, node, TidyTag_SPAN, warn, no);
} else {
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
}
} else
if ( nodeIsCENTER(node) ) {
/*\
* center: CSS equivalent 'text-align:center'
* and 'margin-left:auto; margin-right:auto' on descendant blocks
* Tidy already handles this if 'clean' by SILENTLY generating the <style>
* and adding a <div class="c1"> around the elements.
* see: static Bool Center2Div( TidyDocImpl* doc, Node *node, Node **pnode)
\*/
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
} else
if ( nodeIsDIR(node) ) {
/*\
* dir: replace by <ul>
* Tidy already actions this and issues a warning
* Should this be CHANGED???
\*/
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
} else
if ( nodeIsFONT(node) ) {
/*\
* Tidy already handles this -
* If 'clean' replaced by CSS, else
* if is NOT clean, and doctype html5 then warnings issued
* done in Bool Font2Span( TidyDocImpl* doc, Node *node, Node **pnode ) (I think?)
*
\*/
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
} else
if (( nodesIsFRAME(node) ) || ( nodeIsFRAMESET(node) ) || ( nodeIsNOFRAMES(node) )) {
/*\
* YOW: What to do here?????? Maybe <iframe>????
\*/
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
} else
if ( nodeIsSTRIKE(node) ) {
/*\
* strike: CSS equivalent 'text-decoration:line-through'
* maybe should use static void RenameElem( TidyDocImpl* doc, Node* node, TidyTagId tid )
\*/
if (clean) {
TY_(AddStyleProperty)( doc, node, "text-decoration: line-through" );
TY_(CoerceNode)(doc, node, TidyTag_SPAN, warn, no);
} else {
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
}
} else
if ( nodeIsTT(node) ) {
/*\
* tt: CSS equivalent 'font-family:monospace'
* Tidy presently does nothing. Tidy5 issues a warning
* But like the 'clean' <font> replacement this could also be replaced with CSS
* maybe should use static void RenameElem( TidyDocImpl* doc, Node* node, TidyTagId tid )
*
\*/
if (clean) {
TY_(AddStyleProperty)( doc, node, "font-family: monospace" );
TY_(CoerceNode)(doc, node, TidyTag_SPAN, warn, no);
} else {
TY_(ReportWarning)(doc, node, node, REMOVED_HTML5);
}
} else
if (TY_(nodeIsElement)(node)) {
if (node->tag) {
if ((!(node->tag->versions & VERS_HTML5))||(inRemovedInfo(node->tag->id))) {
@ -1272,6 +1439,7 @@ void TY_(CheckHTML5)( TidyDocImpl* doc, Node* node )
}
}
}
if (node->content)
TY_(CheckHTML5)( doc, node->content );