Issue #166 - repeated main element.

With this fix introduced two new services, FindNodeById and
FindNodeWithId. The former does a total tree search for a TidyTagId.

Maybe there is a way to optimise this search...

Also change the uint badForm from an on/off to a bit field, so could be
extended to other document format errors.
This commit is contained in:
Geoff McLane 2015-02-24 14:58:43 +01:00
parent a5629443e6
commit cfffe7765f
3 changed files with 69 additions and 4 deletions

View file

@ -1603,7 +1603,9 @@ void TY_(ReportError)(TidyDocImpl* doc, Node *element, Node *node, uint code)
break; break;
case DISCARDING_UNEXPECTED: case DISCARDING_UNEXPECTED:
/* Force error if in a bad form */ /* Force error if in a bad form, or
Issue #166 - repeated <main> element
*/
messageNode(doc, doc->badForm ? TidyError : TidyWarning, node, fmt, nodedesc); messageNode(doc, doc->badForm ? TidyError : TidyWarning, node, fmt, nodedesc);
break; break;
@ -1739,7 +1741,7 @@ void TY_(ErrorSummary)( TidyDocImpl* doc )
} }
} }
if (doc->badForm) if (doc->badForm & flg_BadForm) /* Issue #166 - changed to BIT flag to support other errors */
{ {
tidy_out(doc, "You may need to move one or both of the <form> and </form>\n"); tidy_out(doc, "You may need to move one or both of the <form> and </form>\n");
tidy_out(doc, "tags. HTML elements should be properly nested and form elements\n"); tidy_out(doc, "tags. HTML elements should be properly nested and form elements\n");
@ -1749,6 +1751,13 @@ void TY_(ErrorSummary)( TidyDocImpl* doc )
tidy_out(doc, "table! Note that one form can't be nested inside another!\n\n"); tidy_out(doc, "table! Note that one form can't be nested inside another!\n\n");
} }
if (doc->badForm & flg_BadMain) /* Issue #166 - repeated <main> element */
{
tidy_out(doc, "Only one <main> element is allowed in a document.\n");
tidy_out(doc, "Subsequent <main> elements have been discarded, which may\n");
tidy_out(doc, "render the document invalid.\n");
}
if (doc->badAccess) if (doc->badAccess)
{ {
/* Tidy "classic" accessibility tests */ /* Tidy "classic" accessibility tests */

View file

@ -373,10 +373,12 @@ Node* TY_(DropEmptyElements)(TidyDocImpl* doc, Node* node)
/* /*
errors in positioning of form start or end tags errors in positioning of form start or end tags
generally require human intervention to fix generally require human intervention to fix
Issue #166 - repeated <main> element also uses this flag
to indicate duplicates, discarded
*/ */
static void BadForm( TidyDocImpl* doc ) static void BadForm( TidyDocImpl* doc )
{ {
doc->badForm = yes; doc->badForm |= flg_BadForm;
/* doc->errors++; */ /* doc->errors++; */
} }
@ -3694,6 +3696,39 @@ void TY_(ParseHead)(TidyDocImpl* doc, Node *head, GetTokenMode ARG_UNUSED(mode))
#endif #endif
} }
/*\
* Issue #166 - repeated <main> element
* But this service is generalised to check for other duplicate elements
\*/
Bool TY_(FindNodeWithId)( Node *node, TidyTagId tid )
{
Node *content;
while (node)
{
if (TagIsId(node,tid))
return yes;
for (content = node->content; content; content = content->content)
{
if (TY_(FindNodeWithId)(content,tid))
return yes;
}
node = node->next;
}
return no;
}
/*\
* Issue #166 - repeated <main> element
* Do a global search for an element
\*/
Bool TY_(FindNodeById)( TidyDocImpl* doc, TidyTagId tid )
{
Node *node = (doc ? doc->root.content : NULL);
return TY_(FindNodeWithId)(node,tid);
}
void TY_(ParseBody)(TidyDocImpl* doc, Node *body, GetTokenMode mode) void TY_(ParseBody)(TidyDocImpl* doc, Node *body, GetTokenMode mode)
{ {
Lexer* lexer = doc->lexer; Lexer* lexer = doc->lexer;
@ -3934,6 +3969,18 @@ void TY_(ParseBody)(TidyDocImpl* doc, Node *body, GetTokenMode mode)
if (TY_(nodeIsElement)(node)) if (TY_(nodeIsElement)(node))
{ {
if (nodeIsMAIN(node)) {
/*\ Issue #166 - repeated <main> element
* How to efficiently search for a previous main element?
\*/
if ( TY_(FindNodeById)(doc, TidyTag_MAIN) )
{
doc->badForm |= flg_BadMain; /* this is an ERROR in format */
TY_(ReportError)(doc, body, node, DISCARDING_UNEXPECTED);
TY_(FreeNode)( doc, node);
continue;
}
}
/* Issue #20 - merging from Ger Hobbelt fork put back CM_MIXED, which had been /* Issue #20 - merging from Ger Hobbelt fork put back CM_MIXED, which had been
removed to fix this issue - reverting to fix 880221e removed to fix this issue - reverting to fix 880221e
*/ */

View file

@ -23,6 +23,15 @@
#define MIN(a,b) (((a) < (b))?(a):(b)) #define MIN(a,b) (((a) < (b))?(a):(b))
#endif #endif
/*\
* Issue #166 - repeated <main> element
* Change the previous on/off uint flag badForm
* to a BIT flag to support other than <form>
* errors. This could be extended more...
\*/
#define flg_BadForm 0x00000001
#define flg_BadMain 0x00000002
struct _TidyDocImpl struct _TidyDocImpl
{ {
/* The Document Tree (and backing store buffer) */ /* The Document Tree (and backing store buffer) */
@ -63,7 +72,7 @@ struct _TidyDocImpl
uint badAccess; /* for accessibility errors */ uint badAccess; /* for accessibility errors */
uint badLayout; /* for bad style errors */ uint badLayout; /* for bad style errors */
uint badChars; /* for bad char encodings */ uint badChars; /* for bad char encodings */
uint badForm; /* for badly placed form tags */ uint badForm; /* bit field, for badly placed form tags, or other format errors */
/* Memory allocator */ /* Memory allocator */
TidyAllocator* allocator; TidyAllocator* allocator;