Issue #166 - repeated main element.

With this fix introduced two new services, FindNodeById and
FindNodeWithId. The former does a total tree search for a TidyTagId.

Maybe there is a way to optimise this search...

Also change the uint badForm from an on/off to a bit field, so could be
extended to other document format errors.
This commit is contained in:
Geoff McLane 2015-02-24 14:58:43 +01:00
parent a5629443e6
commit cfffe7765f
3 changed files with 69 additions and 4 deletions

View file

@ -1603,7 +1603,9 @@ void TY_(ReportError)(TidyDocImpl* doc, Node *element, Node *node, uint code)
break;
case DISCARDING_UNEXPECTED:
/* Force error if in a bad form */
/* Force error if in a bad form, or
Issue #166 - repeated <main> element
*/
messageNode(doc, doc->badForm ? TidyError : TidyWarning, node, fmt, nodedesc);
break;
@ -1739,7 +1741,7 @@ void TY_(ErrorSummary)( TidyDocImpl* doc )
}
}
if (doc->badForm)
if (doc->badForm & flg_BadForm) /* Issue #166 - changed to BIT flag to support other errors */
{
tidy_out(doc, "You may need to move one or both of the <form> and </form>\n");
tidy_out(doc, "tags. HTML elements should be properly nested and form elements\n");
@ -1749,6 +1751,13 @@ void TY_(ErrorSummary)( TidyDocImpl* doc )
tidy_out(doc, "table! Note that one form can't be nested inside another!\n\n");
}
if (doc->badForm & flg_BadMain) /* Issue #166 - repeated <main> element */
{
tidy_out(doc, "Only one <main> element is allowed in a document.\n");
tidy_out(doc, "Subsequent <main> elements have been discarded, which may\n");
tidy_out(doc, "render the document invalid.\n");
}
if (doc->badAccess)
{
/* Tidy "classic" accessibility tests */

View file

@ -373,10 +373,12 @@ Node* TY_(DropEmptyElements)(TidyDocImpl* doc, Node* node)
/*
errors in positioning of form start or end tags
generally require human intervention to fix
Issue #166 - repeated <main> element also uses this flag
to indicate duplicates, discarded
*/
static void BadForm( TidyDocImpl* doc )
{
doc->badForm = yes;
doc->badForm |= flg_BadForm;
/* doc->errors++; */
}
@ -3694,6 +3696,39 @@ void TY_(ParseHead)(TidyDocImpl* doc, Node *head, GetTokenMode ARG_UNUSED(mode))
#endif
}
/*\
* Issue #166 - repeated <main> element
* But this service is generalised to check for other duplicate elements
\*/
Bool TY_(FindNodeWithId)( Node *node, TidyTagId tid )
{
Node *content;
while (node)
{
if (TagIsId(node,tid))
return yes;
for (content = node->content; content; content = content->content)
{
if (TY_(FindNodeWithId)(content,tid))
return yes;
}
node = node->next;
}
return no;
}
/*\
* Issue #166 - repeated <main> element
* Do a global search for an element
\*/
Bool TY_(FindNodeById)( TidyDocImpl* doc, TidyTagId tid )
{
Node *node = (doc ? doc->root.content : NULL);
return TY_(FindNodeWithId)(node,tid);
}
void TY_(ParseBody)(TidyDocImpl* doc, Node *body, GetTokenMode mode)
{
Lexer* lexer = doc->lexer;
@ -3934,6 +3969,18 @@ void TY_(ParseBody)(TidyDocImpl* doc, Node *body, GetTokenMode mode)
if (TY_(nodeIsElement)(node))
{
if (nodeIsMAIN(node)) {
/*\ Issue #166 - repeated <main> element
* How to efficiently search for a previous main element?
\*/
if ( TY_(FindNodeById)(doc, TidyTag_MAIN) )
{
doc->badForm |= flg_BadMain; /* this is an ERROR in format */
TY_(ReportError)(doc, body, node, DISCARDING_UNEXPECTED);
TY_(FreeNode)( doc, node);
continue;
}
}
/* Issue #20 - merging from Ger Hobbelt fork put back CM_MIXED, which had been
removed to fix this issue - reverting to fix 880221e
*/

View file

@ -23,6 +23,15 @@
#define MIN(a,b) (((a) < (b))?(a):(b))
#endif
/*\
* Issue #166 - repeated <main> element
* Change the previous on/off uint flag badForm
* to a BIT flag to support other than <form>
* errors. This could be extended more...
\*/
#define flg_BadForm 0x00000001
#define flg_BadMain 0x00000002
struct _TidyDocImpl
{
/* The Document Tree (and backing store buffer) */
@ -63,7 +72,7 @@ struct _TidyDocImpl
uint badAccess; /* for accessibility errors */
uint badLayout; /* for bad style errors */
uint badChars; /* for bad char encodings */
uint badForm; /* for badly placed form tags */
uint badForm; /* bit field, for badly placed form tags, or other format errors */
/* Memory allocator */
TidyAllocator* allocator;