diff --git a/regression_testing/cases/dev-cases/case-001.conf b/regression_testing/cases/dev-cases/case-001.conf new file mode 100755 index 0000000..85723a4 --- /dev/null +++ b/regression_testing/cases/dev-cases/case-001.conf @@ -0,0 +1,4 @@ +# Config for test case. +tidy-mark: no +indent: yes +wrap: 999 diff --git a/regression_testing/cases/dev-cases/case-001@0.html b/regression_testing/cases/dev-cases/case-001@0.html new file mode 100755 index 0000000..dd9ab09 --- /dev/null +++ b/regression_testing/cases/dev-cases/case-001@0.html @@ -0,0 +1,26 @@ + + + + + This is a title + + + +
+

This is the first paragraph.

+

Now now, second paragraph?

+
+

I'm nested in a div.

+ +

Because, you know, lists should have a minimum of three items.

+
+

Penultimate paragraphs are sometimes the best.

+
+

Don't Cray; Buy Amiga!

+ + diff --git a/regression_testing/cases/dev-cases/case-002.conf b/regression_testing/cases/dev-cases/case-002.conf new file mode 100755 index 0000000..85723a4 --- /dev/null +++ b/regression_testing/cases/dev-cases/case-002.conf @@ -0,0 +1,4 @@ +# Config for test case. +tidy-mark: no +indent: yes +wrap: 999 diff --git a/regression_testing/cases/dev-cases/case-002@1.html b/regression_testing/cases/dev-cases/case-002@1.html new file mode 100755 index 0000000..180f995 --- /dev/null +++ b/regression_testing/cases/dev-cases/case-002@1.html @@ -0,0 +1,33 @@ + + + + + This is a title + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/regression_testing/cases/dev-expects/case-001.html b/regression_testing/cases/dev-expects/case-001.html new file mode 100644 index 0000000..472b95b --- /dev/null +++ b/regression_testing/cases/dev-expects/case-001.html @@ -0,0 +1,41 @@ + + + + + + This is a title + + + +
+

+ This is the first paragraph. +

+

+ Now now, second paragraph? +

+
+

+ I'm nested in a div. +

+
    +
  • List item one. +
  • +
  • List item two. There isn't a third. Hahaha. +
  • +
+

+ Because, you know, lists should have a minimum of three items. +

+
+

+ Penultimate paragraphs are sometimes the best. +

+
+

+ Don't Cray; Buy Amiga! +

+ + diff --git a/regression_testing/cases/dev-expects/case-001.txt b/regression_testing/cases/dev-expects/case-001.txt new file mode 100644 index 0000000..c32fbc8 --- /dev/null +++ b/regression_testing/cases/dev-expects/case-001.txt @@ -0,0 +1,14 @@ +line 17 column 13 - Info: missing optional end tag +Info: Document content looks like HTML5 +No warnings or errors were found. + +About HTML Tidy: https://github.com/htacg/tidy-html5 +Bug reports and comments: https://github.com/htacg/tidy-html5/issues +Official mailing list: https://lists.w3.org/Archives/Public/public-htacg/ +Latest HTML specification: https://html.spec.whatwg.org/multipage/ +Validate your HTML documents: https://validator.w3.org/nu/ +Lobby your company to join the W3C: https://www.w3.org/Consortium + +Do you speak a language other than English, or a different variant of +English? Consider helping us to localize HTML Tidy. For details please see +https://github.com/htacg/tidy-html5/blob/master/README/LOCALIZE.md diff --git a/regression_testing/cases/dev-expects/case-002.html b/regression_testing/cases/dev-expects/case-002.html new file mode 100644 index 0000000..2e1f028 --- /dev/null +++ b/regression_testing/cases/dev-expects/case-002.html @@ -0,0 +1,39 @@ + + + + + + This is a title + + + + + + + + + + + + + + + + + + + diff --git a/regression_testing/cases/dev-expects/case-002.txt b/regression_testing/cases/dev-expects/case-002.txt new file mode 100644 index 0000000..e79f1ec --- /dev/null +++ b/regression_testing/cases/dev-expects/case-002.txt @@ -0,0 +1,16 @@ +line 32 column 1 - Warning: discarding unexpected +line 33 column 1 - Warning: discarding unexpected +line 25 column 5 - Warning: missing
+Info: Document content looks like HTML5 +Tidy found 3 warnings and 0 errors! + +About HTML Tidy: https://github.com/htacg/tidy-html5 +Bug reports and comments: https://github.com/htacg/tidy-html5/issues +Official mailing list: https://lists.w3.org/Archives/Public/public-htacg/ +Latest HTML specification: https://html.spec.whatwg.org/multipage/ +Validate your HTML documents: https://validator.w3.org/nu/ +Lobby your company to join the W3C: https://www.w3.org/Consortium + +Do you speak a language other than English, or a different variant of +English? Consider helping us to localize HTML Tidy. For details please see +https://github.com/htacg/tidy-html5/blob/master/README/LOCALIZE.md diff --git a/regression_testing/cases/dev-expects/case-003.html b/regression_testing/cases/dev-expects/case-003.html new file mode 100644 index 0000000..af8bcb5 --- /dev/null +++ b/regression_testing/cases/dev-expects/case-003.html @@ -0,0 +1,30 @@ + + + + + + case-003 + + + +
+
+
+ + + + +
+ What is up? +
+
+
+
+
+
+ Hello +
+ + diff --git a/regression_testing/cases/dev-expects/case-003.txt b/regression_testing/cases/dev-expects/case-003.txt new file mode 100644 index 0000000..f0a93ab --- /dev/null +++ b/regression_testing/cases/dev-expects/case-003.txt @@ -0,0 +1,26 @@ +line 14 column 7 - Warning:
isn't allowed in elements +line 13 column 5 - Info: previously mentioned +line 14 column 7 - Warning: missing
before +line 10 column 3 - Info: missing optional end tag +line 12 column 5 - Warning: The summary attribute on the element is obsolete in HTML5 +line 14 column 7 - Warning: trimming empty
+line 21 column 3 - Warning:
element removed from HTML5 +line 12 column 5 - Warning:
attribute "summary" not allowed for HTML5 +Info: Document content looks like HTML5 +Tidy found 6 warnings and 0 errors! + +One or more empty elements were present in the source document but +dropped on output. If these elements are necessary or you don't want +this behavior, then consider setting the option "drop-empty-elements" +to no. + +About HTML Tidy: https://github.com/htacg/tidy-html5 +Bug reports and comments: https://github.com/htacg/tidy-html5/issues +Official mailing list: https://lists.w3.org/Archives/Public/public-htacg/ +Latest HTML specification: https://html.spec.whatwg.org/multipage/ +Validate your HTML documents: https://validator.w3.org/nu/ +Lobby your company to join the W3C: https://www.w3.org/Consortium + +Do you speak a language other than English, or a different variant of +English? Consider helping us to localize HTML Tidy. For details please see +https://github.com/htacg/tidy-html5/blob/master/README/LOCALIZE.md diff --git a/regression_testing/cases/dev-expects/case-004.html b/regression_testing/cases/dev-expects/case-004.html new file mode 100644 index 0000000..34d97df --- /dev/null +++ b/regression_testing/cases/dev-expects/case-004.html @@ -0,0 +1,61 @@ + + + + + + case-004 + + + + + + + + + + + + + + + + + + diff --git a/regression_testing/cases/dev-expects/case-004.txt b/regression_testing/cases/dev-expects/case-004.txt new file mode 100644 index 0000000..968d986 --- /dev/null +++ b/regression_testing/cases/dev-expects/case-004.txt @@ -0,0 +1,14 @@ +line 30 column 5 - Warning: can't be nested +Info: Document content looks like HTML5 +Tidy found 1 warning and 0 errors! + +About HTML Tidy: https://github.com/htacg/tidy-html5 +Bug reports and comments: https://github.com/htacg/tidy-html5/issues +Official mailing list: https://lists.w3.org/Archives/Public/public-htacg/ +Latest HTML specification: https://html.spec.whatwg.org/multipage/ +Validate your HTML documents: https://validator.w3.org/nu/ +Lobby your company to join the W3C: https://www.w3.org/Consortium + +Do you speak a language other than English, or a different variant of +English? Consider helping us to localize HTML Tidy. For details please see +https://github.com/htacg/tidy-html5/blob/master/README/LOCALIZE.md diff --git a/regression_testing/cases/special-cases/README.txt b/regression_testing/cases/special-cases/README.txt new file mode 100644 index 0000000..bc7ceb9 --- /dev/null +++ b/regression_testing/cases/special-cases/README.txt @@ -0,0 +1,15 @@ +About this test suite: +====================== + +These test files represent that standard regression testing that must be +performed prior to committing changes to Tidy's code. In some circumstances +results are platform specific and these notices will be displayed in the +testing results. + +This test set is intended to collect tests that we **don't** want to run +automatically, either because they take a long time to complete, or their +expectations files are much too large for keeping the repository a reasonable +size. + +Because there's no matching -expects directory, these cases will NOT be included +in the default testing service. diff --git a/regression_testing/cases/special-cases/case-evil.conf b/regression_testing/cases/special-cases/case-evil.conf new file mode 100755 index 0000000..85723a4 --- /dev/null +++ b/regression_testing/cases/special-cases/case-evil.conf @@ -0,0 +1,4 @@ +# Config for test case. +tidy-mark: no +indent: yes +wrap: 999 diff --git a/regression_testing/cases/special-cases/case-evil@1.html b/regression_testing/cases/special-cases/case-evil@1.html new file mode 100644 index 0000000..c7540b8 --- /dev/null +++ b/regression_testing/cases/special-cases/case-evil@1.html @@ -0,0 +1,6 @@ + + + diff --git a/src/parser.c b/src/parser.c index 69befb9..0662779 100644 --- a/src/parser.c +++ b/src/parser.c @@ -15,14 +15,6 @@ #include "sprtf.h" -/****************************************************************************//* - ** MARK: - Forward Declarations - ***************************************************************************/ - - -static void ParseTag( TidyDocImpl* doc, Node *node, GetTokenMode mode ); - - /****************************************************************************//* ** MARK: - Configuration Options ***************************************************************************/ @@ -42,33 +34,43 @@ static void ParseTag( TidyDocImpl* doc, Node *node, GetTokenMode mode ); /** - * Insert "node" into markup tree in place of "element" - * which is moved to become the child of the node + * Generalised search for duplicate elements. + * Issue #166 - repeated
element. */ -static void InsertNodeAsParent(Node *element, Node *node) +static Bool findNodeWithId( Node *node, TidyTagId tid ) { - node->content = element; - node->last = element; - node->parent = element->parent; - element->parent = node; + Node *content; + while (node) + { + if (TagIsId(node,tid)) + return yes; + /*\ + * Issue #459 - Under certain circumstances, with many node this use of + * 'for (content = node->content; content; content = content->content)' + * would produce a **forever** circle, or at least a very extended loop... + * It is sufficient to test the content, if it exists, + * to quickly iterate all nodes. Now all nodes are tested only once. + \*/ + content = node->content; + if (content) + { + if ( findNodeWithId(content,tid) ) + return yes; + } + node = node->next; + } + return no; +} - if (node->parent->content == element) - node->parent->content = node; - if (node->parent->last == element) - node->parent->last = node; - - node->prev = element->prev; - element->prev = NULL; - - if (node->prev) - node->prev->next = node; - - node->next = element->next; - element->next = NULL; - - if (node->next) - node->next->prev = node; +/** + * Perform a global search for an element. + * Issue #166 - repeated
element + */ +static Bool findNodeById( TidyDocImpl* doc, TidyTagId tid ) +{ + Node *node = (doc ? doc->root.content : NULL); + return findNodeWithId( node,tid ); } @@ -120,46 +122,33 @@ static Bool InsertMisc(Node *element, Node *node) /** - * Move node to the head, where element is used as starting - * point in hunt for head. normally called during parsing. + * Insert "node" into markup tree in place of "element" + * which is moved to become the child of the node */ -static void MoveToHead( TidyDocImpl* doc, Node *element, Node *node ) +static void InsertNodeAsParent(Node *element, Node *node) { - Node *head; + node->content = element; + node->last = element; + node->parent = element->parent; + element->parent = node; - TY_(RemoveNode)( node ); /* make sure that node is isolated */ + if (node->parent->content == element) + node->parent->content = node; - if ( TY_(nodeIsElement)(node) ) - { - TY_(Report)(doc, element, node, TAG_NOT_ALLOWED_IN ); + if (node->parent->last == element) + node->parent->last = node; - head = TY_(FindHEAD)(doc); - assert(head != NULL); + node->prev = element->prev; + element->prev = NULL; - TY_(InsertNodeAtEnd)(head, node); + if (node->prev) + node->prev->next = node; - if ( node->tag->parser ) - ParseTag( doc, node, IgnoreWhitespace ); - } - else - { - TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node ); - } -} + node->next = element->next; + element->next = NULL; - -/** - * Moves given node to end of body element. - */ -static void MoveNodeToBody( TidyDocImpl* doc, Node* node ) -{ - Node* body = TY_(FindBody)( doc ); - if ( body ) - { - TY_(RemoveNode)( node ); - TY_(InsertNodeAtEnd)( body, node ); - } + if (node->next) + node->next->prev = node; } @@ -188,43 +177,55 @@ static void MoveBeforeTable( TidyDocImpl* ARG_UNUSED(doc), Node *row, /** - * Generalised search for duplicate elements. - * Issue #166 - repeated
element. + * Moves given node to end of body element. */ -static Bool findNodeWithId( Node *node, TidyTagId tid ) +static void MoveNodeToBody( TidyDocImpl* doc, Node* node ) { - Node *content; - while (node) + Node* body = TY_(FindBody)( doc ); + if ( body ) { - if (TagIsId(node,tid)) - return yes; - /*\ - * Issue #459 - Under certain circumstances, with many node this use of - * 'for (content = node->content; content; content = content->content)' - * would produce a **forever** circle, or at least a very extended loop... - * It is sufficient to test the content, if it exists, - * to quickly iterate all nodes. Now all nodes are tested only once. - \*/ - content = node->content; - if (content) - { - if ( findNodeWithId(content,tid) ) - return yes; - } - node = node->next; + TY_(RemoveNode)( node ); + TY_(InsertNodeAtEnd)( body, node ); } - return no; } /** - * Perform a global search for an element. - * Issue #166 - repeated
element + * Move node to the head, where element is used as starting + * point in hunt for head. Normally called during parsing. */ -static Bool findNodeById( TidyDocImpl* doc, TidyTagId tid ) +static void MoveToHead( TidyDocImpl* doc, Node *element, Node *node ) { - Node *node = (doc ? doc->root.content : NULL); - return findNodeWithId( node,tid ); + Node *head = NULL; + + TY_(RemoveNode)( node ); /* make sure that node is isolated */ + + if ( TY_(nodeIsElement)(node) ) + { + TY_(Report)(doc, element, node, TAG_NOT_ALLOWED_IN ); + + head = TY_(FindHEAD)(doc); + assert(head != NULL); + + TY_(InsertNodeAtEnd)(head, node); + + if ( node->tag->parser ) + { + /* Only one of the existing test cases as of 2021-08-14 invoke + MoveToHead, and it doesn't go deeper than one level. The + parser() call is supposed to return a node if additional + parsing is needed. Keep this in mind if we start to get bug + reports. + */ + Parser* parser = node->tag->parser; + parser( doc, node, IgnoreWhitespace ); + } + } + else + { + TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED); + TY_(FreeNode)( doc, node ); + } } @@ -359,6 +360,73 @@ static Bool IsPreDescendant(Node* node) } +/** + * Indicates whether or not the only content model for the given node + * is CM_INLINE. + */ +static Bool nodeCMIsOnlyInline( Node* node ) +{ + return TY_(nodeHasCM)( node, CM_INLINE ) && !TY_(nodeHasCM)( node, CM_BLOCK ); +} + + +/** + * Indicates whether or not the content of the given node is acceptable + * content for pre elements + */ +static Bool PreContent( TidyDocImpl* ARG_UNUSED(doc), Node* node ) +{ + /* p is coerced to br's, Text OK too */ + if ( nodeIsP(node) || TY_(nodeIsText)(node) ) + return yes; + + if ( node->tag == NULL || + nodeIsPARAM(node) || + !TY_(nodeHasCM)(node, CM_INLINE|CM_NEW) ) + return no; + + return yes; +} + + +/** + * Indicates whether or not leading whitespace should be cleaned. + */ +static Bool CleanLeadingWhitespace(TidyDocImpl* ARG_UNUSED(doc), Node* node) +{ + if (!TY_(nodeIsText)(node)) + return no; + + if (node->parent->type == DocTypeTag) + return no; + + if (IsPreDescendant(node)) + return no; + + if (node->parent->tag && node->parent->tag->parser == TY_(ParseScript)) + return no; + + /*

...
......

*/ + if (nodeIsBR(node->prev)) + return yes; + + /*

...

*/ + if (node->prev == NULL && !TY_(nodeHasCM)(node->parent, CM_INLINE)) + return yes; + + /*

...

... */ + if (node->prev && !TY_(nodeHasCM)(node->prev, CM_INLINE) && + TY_(nodeIsElement)(node->prev)) + return yes; + + /*

...

*/ + if (!node->prev && !node->parent->prev && !TY_(nodeHasCM)(node->parent->parent, CM_INLINE)) + return yes; + + return no; +} + + /** * Indicates whether or not trailing whitespace should be cleaned. */ @@ -414,73 +482,6 @@ static Bool CleanTrailingWhitespace(TidyDocImpl* doc, Node* node) } -/** - * Indicates whether or not leading whitespace should be cleaned. - */ -static Bool CleanLeadingWhitespace(TidyDocImpl* ARG_UNUSED(doc), Node* node) -{ - if (!TY_(nodeIsText)(node)) - return no; - - if (node->parent->type == DocTypeTag) - return no; - - if (IsPreDescendant(node)) - return no; - - if (node->parent->tag && node->parent->tag->parser == TY_(ParseScript)) - return no; - - /*

...
......

*/ - if (nodeIsBR(node->prev)) - return yes; - - /*

...

*/ - if (node->prev == NULL && !TY_(nodeHasCM)(node->parent, CM_INLINE)) - return yes; - - /*

...

... */ - if (node->prev && !TY_(nodeHasCM)(node->prev, CM_INLINE) && - TY_(nodeIsElement)(node->prev)) - return yes; - - /*

...

*/ - if (!node->prev && !node->parent->prev && !TY_(nodeHasCM)(node->parent->parent, CM_INLINE)) - return yes; - - return no; -} - - -/** - * Indicates whether or not the content of the given node is acceptable - * content for pre elements - */ -static Bool PreContent( TidyDocImpl* ARG_UNUSED(doc), Node* node ) -{ - /* p is coerced to br's, Text OK too */ - if ( nodeIsP(node) || TY_(nodeIsText)(node) ) - return yes; - - if ( node->tag == NULL || - nodeIsPARAM(node) || - !TY_(nodeHasCM)(node, CM_INLINE|CM_NEW) ) - return no; - - return yes; -} - - -/** - * Indicates whether or not the only content model for the given node - * is CM_INLINE. - */ -static Bool nodeCMIsOnlyInline( Node* node ) -{ - return TY_(nodeHasCM)( node, CM_INLINE ) && !TY_(nodeHasCM)( node, CM_BLOCK ); -} - - /***************************************************************************//* ** MARK: - Information Accumulation ***************************************************************************/ @@ -504,84 +505,20 @@ static void BadForm( TidyDocImpl* doc ) /** - * This maps - * hello world - * to - * hello world - * - * If last child of element is a text node - * then trim trailing white space character - * moving it to after element's end tag. + * Adds style information as a class in the document or a property + * of the node to prevent indentation of inferred UL tags. */ -static void TrimTrailingSpace( TidyDocImpl* doc, Node *element, Node *last ) +static void AddClassNoIndent( TidyDocImpl* doc, Node *node ) { - Lexer* lexer = doc->lexer; - byte c; - - if (TY_(nodeIsText)(last)) - { - if (last->end > last->start) - { - c = (byte) lexer->lexbuf[ last->end - 1 ]; - - if ( c == ' ' ) - { - last->end -= 1; - if ( (element->tag->model & CM_INLINE) && - !(element->tag->model & CM_FIELD) ) - lexer->insertspace = yes; - } - } - } -} - - -/** - * This maps - *

hello world - * to - *

hello world - * - * Trims initial space, by moving it before the - * start tag, or if this element is the first in - * parent's content, then by discarding the space - */ -static void TrimInitialSpace( TidyDocImpl* doc, Node *element, Node *text ) -{ - Lexer* lexer = doc->lexer; - Node *prev, *node; - - if ( TY_(nodeIsText)(text) && - lexer->lexbuf[text->start] == ' ' && - text->start < text->end ) - { - if ( (element->tag->model & CM_INLINE) && - !(element->tag->model & CM_FIELD) ) - { - prev = element->prev; - - if (TY_(nodeIsText)(prev)) - { - if (prev->end == 0 || lexer->lexbuf[prev->end - 1] != ' ') - lexer->lexbuf[(prev->end)++] = ' '; - - ++(element->start); - } - else /* create new node */ - { - node = TY_(NewNode)(lexer->allocator, lexer); - node->start = (element->start)++; - node->end = element->start; - lexer->lexbuf[node->start] = ' '; - TY_(InsertNodeBeforeElement)(element ,node); - DEBUG_LOG(SPRTF("TrimInitialSpace: Created text node, inserted before <%s>\n", - (element->element ? element->element : "unknown"))); - } - } - - /* discard the space in current node */ - ++(text->start); - } + ctmbstr sprop = + "padding-left: 2ex; margin-left: 0ex" + "; margin-top: 0ex; margin-bottom: 0ex"; + if ( !cfgBool(doc, TidyDecorateInferredUL) ) + return; + if ( cfgBool(doc, TidyMakeClean) ) + TY_(AddStyleAsClass)( doc, node, sprop ); + else + TY_(AddStyleProperty)( doc, node, sprop ); } @@ -628,30 +565,20 @@ static void CleanSpaces(TidyDocImpl* doc, Node* node) /** - * Move initial and trailing space out. - * This routine maps: - * hello world - * to - * hello world - * and - * hello world - * to - * hello world + * If a table row is empty then insert an empty cell. This practice is + * consistent with browser behavior and avoids potential problems with + * row spanning cells. */ -static void TrimSpaces( TidyDocImpl* doc, Node *element) +static void FixEmptyRow(TidyDocImpl* doc, Node *row) { - Node* text = element->content; + Node *cell; - if (nodeIsPRE(element) || IsPreDescendant(element)) - return; - - if (TY_(nodeIsText)(text)) - TrimInitialSpace(doc, element, text); - - text = element->last; - - if (TY_(nodeIsText)(text)) - TrimTrailingSpace(doc, element, text); + if (row->content == NULL) + { + cell = TY_(InferredTag)(doc, TidyTag_TD); + TY_(InsertNodeAtEnd)(row, cell); + TY_(Report)(doc, row, cell, MISSING_STARTTAG); + } } @@ -677,43 +604,116 @@ static void InsertDocType( TidyDocImpl* doc, Node *element, Node *doctype ) } - /** - * Adds style information as a class in the document or a property - * of the node to prevent indentation of inferred UL tags. + * This maps + *

hello world + * to + *

hello world + * + * Trims initial space, by moving it before the + * start tag, or if this element is the first in + * parent's content, then by discarding the space */ -static void AddClassNoIndent( TidyDocImpl* doc, Node *node ) +static void TrimInitialSpace( TidyDocImpl* doc, Node *element, Node *text ) { - ctmbstr sprop = - "padding-left: 2ex; margin-left: 0ex" - "; margin-top: 0ex; margin-bottom: 0ex"; - if ( !cfgBool(doc, TidyDecorateInferredUL) ) - return; - if ( cfgBool(doc, TidyMakeClean) ) - TY_(AddStyleAsClass)( doc, node, sprop ); - else - TY_(AddStyleProperty)( doc, node, sprop ); + Lexer* lexer = doc->lexer; + Node *prev, *node; + + if ( TY_(nodeIsText)(text) && + lexer->lexbuf[text->start] == ' ' && + text->start < text->end ) + { + if ( (element->tag->model & CM_INLINE) && + !(element->tag->model & CM_FIELD) ) + { + prev = element->prev; + + if (TY_(nodeIsText)(prev)) + { + if (prev->end == 0 || lexer->lexbuf[prev->end - 1] != ' ') + lexer->lexbuf[(prev->end)++] = ' '; + + ++(element->start); + } + else /* create new node */ + { + node = TY_(NewNode)(lexer->allocator, lexer); + node->start = (element->start)++; + node->end = element->start; + lexer->lexbuf[node->start] = ' '; + TY_(InsertNodeBeforeElement)(element ,node); + DEBUG_LOG(SPRTF("TrimInitialSpace: Created text node, inserted before <%s>\n", + (element->element ? element->element : "unknown"))); + } + } + + /* discard the space in current node */ + ++(text->start); + } } /** - * If a table row is empty then insert an empty cell. This practice is - * consistent with browser behavior and avoids potential problems with - * row spanning cells. + * This maps + * hello world + * to + * hello world + * + * If last child of element is a text node + * then trim trailing white space character + * moving it to after element's end tag. */ -static void FixEmptyRow(TidyDocImpl* doc, Node *row) +static void TrimTrailingSpace( TidyDocImpl* doc, Node *element, Node *last ) { - Node *cell; + Lexer* lexer = doc->lexer; + byte c; - if (row->content == NULL) + if (TY_(nodeIsText)(last)) { - cell = TY_(InferredTag)(doc, TidyTag_TD); - TY_(InsertNodeAtEnd)(row, cell); - TY_(Report)(doc, row, cell, MISSING_STARTTAG); + if (last->end > last->start) + { + c = (byte) lexer->lexbuf[ last->end - 1 ]; + + if ( c == ' ' ) + { + last->end -= 1; + if ( (element->tag->model & CM_INLINE) && + !(element->tag->model & CM_FIELD) ) + lexer->insertspace = yes; + } + } } } +/** + * Move initial and trailing space out. + * This routine maps: + * hello world + * to + * hello world + * and + * hello world + * to + * hello world + */ +static void TrimSpaces( TidyDocImpl* doc, Node *element) +{ + Node* text = element->content; + + if (nodeIsPRE(element) || IsPreDescendant(element)) + return; + + if (TY_(nodeIsText)(text)) + TrimInitialSpace(doc, element, text); + + text = element->last; + + if (TY_(nodeIsText)(text)) + TrimTrailingSpace(doc, element, text); +} + + /***************************************************************************//* ** MARK: - Parsers Support ***************************************************************************/ @@ -817,13 +817,12 @@ static Bool FindLastLI( Node *list, Node **lastli ) */ void TY_(InitParserStack)( TidyDocImpl* doc ) { - uint default_size = 16; + enum { default_size = 32 }; TidyParserMemory *content = (TidyParserMemory *) TidyAlloc( doc->allocator, sizeof(TidyParserMemory) * default_size ); doc->stack.content = content; doc->stack.size = default_size; doc->stack.top = -1; - doc->stack.allocator = doc->allocator; } @@ -832,7 +831,7 @@ void TY_(InitParserStack)( TidyDocImpl* doc ) */ void TY_(FreeParserStack)( TidyDocImpl* doc ) { - TidyFree( doc->stack.allocator, doc->stack.content ); + TidyFree( doc->allocator, doc->stack.content ); doc->stack.content = NULL; doc->stack.size = 0; @@ -842,15 +841,14 @@ void TY_(FreeParserStack)( TidyDocImpl* doc ) /** * Increase the stack size. - * TODO: don't overflow max_uint. Need a message when we can no longer increase the size beyond 429 million depth. */ static void growParserStack( TidyDocImpl* doc ) { TidyParserMemory *content; - content = (TidyParserMemory *) TidyAlloc( doc->stack.allocator, sizeof(TidyParserMemory) * doc->stack.size * 2 ); + content = (TidyParserMemory *) TidyAlloc( doc->allocator, sizeof(TidyParserMemory) * doc->stack.size * 2 ); memcpy( content, doc->stack.content, sizeof(TidyParserMemory) * (doc->stack.top + 1) ); - TidyFree(doc->stack.allocator, doc->stack.content); + TidyFree(doc->allocator, doc->stack.content); doc->stack.content = content; doc->stack.size = doc->stack.size * 2; @@ -860,12 +858,58 @@ static void growParserStack( TidyDocImpl* doc ) /** * Indicates whether or not the stack is empty. */ -static Bool isEmptyParserStack( TidyDocImpl* doc ) +static inline Bool isEmptyParserStack( TidyDocImpl* doc ) { return doc->stack.top < 0; } +/** + * Peek at the parser memory. + */ +static inline FUNC_UNUSED TidyParserMemory peekMemory( TidyDocImpl* doc ) +{ + return doc->stack.content[doc->stack.top]; +} + + +/** + * Peek at the parser memory "identity" field. This is just a convenience + * to avoid having to create a new struct instance in the caller. + */ +static inline Parser* peekMemoryIdentity( TidyDocImpl* doc ) +{ + return doc->stack.content[doc->stack.top].identity; +} + + +/** + * Peek at the parser memory "mode" field. This is just a convenience + * to avoid having to create a new struct instance in the caller. + */ +static GetTokenMode inline peekMemoryMode( TidyDocImpl* doc ) +{ + return doc->stack.content[doc->stack.top].mode; +} + + +/** + * Pop out a parser memory. + */ +static TidyParserMemory popMemory( TidyDocImpl* doc ) +{ + if ( !isEmptyParserStack( doc ) ) + { + TidyParserMemory data = doc->stack.content[doc->stack.top]; + DEBUG_LOG(SPRTF("\n<--POP %s pointed to is %p,\t memory is %p (size is %lu), depth is %i\n", data.reentry_node ? data.reentry_node->element : NULL, data.reentry_node, &doc->stack.content[doc->stack.top], sizeof(TidyParserMemory), doc->stack.top - 1 )); + doc->stack.top = doc->stack.top - 1; + return data; + } + TidyParserMemory blank = { NULL }; + return blank; +} + + /** * Push the parser memory to the stack. */ @@ -875,52 +919,9 @@ static void pushMemory( TidyDocImpl* doc, TidyParserMemory data ) growParserStack( doc ); doc->stack.top++; + doc->stack.content[doc->stack.top] = data; -} - - -/** - * Peek at the parser memory. - */ -static FUNC_UNUSED TidyParserMemory peekMemory( TidyDocImpl* doc ) -{ - return doc->stack.content[doc->stack.top]; -} - - -/** - * Peek at the parser memory "mode" field. This is just a convenience - * to avoid having to create a new struct instance in the caller. - */ -static GetTokenMode peekMemoryMode( TidyDocImpl* doc ) -{ - return doc->stack.content[doc->stack.top].mode; -} - - -/** - * Peek at the parser memory "identity" field. This is just a convenience - * to avoid having to create a new struct instance in the caller. - */ -static Parser* peekMemoryIdentity( TidyDocImpl* doc ) -{ - return doc->stack.content[doc->stack.top].identity; -} - - -/** - * Pop out a parser memory. - */ -static TidyParserMemory popMemory( TidyDocImpl* doc ) -{ - if ( !isEmptyParserStack( doc ) ) - { - TidyParserMemory data = doc->stack.content[doc->stack.top]; - doc->stack.top = doc->stack.top - 1; - return data; - } - TidyParserMemory blank = { NULL }; - return blank; + DEBUG_LOG(SPRTF("\n-->PUSH %s pointed to is %p,\t memory is %p (size is %lu), depth is %i\n", data.reentry_node ? data.reentry_node->element : NULL, data.reentry_node, &doc->stack.content[doc->stack.top], sizeof(TidyParserMemory), doc->stack.top )); } @@ -938,7 +939,7 @@ static Parser* GetParserForNode( TidyDocImpl* doc, Node *node ) Lexer* lexer = doc->lexer; /* [i_a]2 prevent crash for active content (php, asp) docs */ - if (node->tag == NULL) + if (!node || node->tag == NULL) return NULL; /* @@ -968,28 +969,16 @@ static Parser* GetParserForNode( TidyDocImpl* doc, Node *node ) /** - * Instantiates the correct parser for the given node. This is currently - * maintained ONLY until the legacy parsers have been ported, as this - * introduces recursion when used. - */ -static void ParseTag( TidyDocImpl* doc, Node *node, GetTokenMode mode ) -{ - Parser* parser = GetParserForNode( doc, node ); - - if ( parser ) - (*parser)( doc, node, mode, no ); -} - - -/** - * The main parser body will populate the document's document root starting - * with the provided node, which generally should be the HTML node after the - * pre-HTML stuff is handled at a higher level. + * This parser controller initiates the parsing process with the document's + * root starting with the provided node, which should be the HTML node after + * the pre-HTML stuff is handled at a higher level. * - * This parser works cooperatively with compliant parsers to pass state - * information back and forth in the TidyDocImpl's `stack`, which resides on - * the heap and prevents recursion and stack exhaustion, and also works well - * with the old-style parsers that do recurse. + * This controller is responsible for calling each of the individual parsers, + * based on the tokens it pulls from the lexer, or the tokens passed back via + * the parserMemory stack from each of the parsers. Having a main, central + * looping dispatcher in this fashion allows the prevention of recursion. Note, + * though, that some of the parsers are still recursive and have to be + * refactored in order to cooperate with this controller. * * (The goal is to update the old-style parsers slowly and deliberately * without causing regressions, in a series of smaller commits and updates.) @@ -997,76 +986,78 @@ static void ParseTag( TidyDocImpl* doc, Node *node, GetTokenMode mode ) void ParseHTMLWithNode( TidyDocImpl* doc, Node* node ) { GetTokenMode mode = IgnoreWhitespace; - Parser* parser = NULL; + Parser* parser = GetParserForNode( doc, node ); + Bool something_to_do = yes; /* This main loop is only extinguished when all of the parser tokens are - consumed. Note that most of the parsers consume tokens as well, and - so what we're really doing here is managing parsers and preventing - recursion with cooperating parsers. + consumed. Ideally, EVERY parser will return nodes to this loop for + dispatch to the appropriate parser, but some of the recursive parsers + still consume some tokens on their own. */ - while ( node ) + while (something_to_do) { - if ( (parser = GetParserForNode( doc, node )) ) + node = parser ? parser( doc, node, mode ) : NULL; + + /* + We have a node, so anything deferred was already pushed to the stack + to be dealt with later. + */ + if ( node ) { - if ( (node = parser( doc, node, mode, no )) ) - { - /* - When a parser returns a node, it means that we have - to continue the loop rather than moving on, because it - indicates that the parser encountered a token it does not - handle. It also tells us the correct GetTokenMode to use - for it via the struct that it pushed: - */ - mode = peekMemoryMode( doc ); - continue; - } + parser = GetParserForNode( doc, node ); + continue; } /* - If we've come this far, the parser has bottomed out, and won't be - going any deeper. Now we run back up the stack to close all of the - open elements and handle any parser post-processing that was needed. - Of course, other nodes might cause us to deepen the stack again, too. + We weren't given a node, which means this particular leaf is bottomed + out. We'll re-enter the parsers using information from the stack. */ - if ( !isEmptyParserStack( doc ) ) + if ( !isEmptyParserStack(doc)) { - if ( (parser = peekMemoryIdentity( doc )) ) + parser = peekMemoryIdentity(doc); + if (parser) { - if ( (node = parser( doc, NULL, 0, yes )) ) - { - /* Another assignment from the parser. */ - mode = peekMemoryMode( doc ); - continue; - } - } else { - /* - There's no identity in the stack (it was used to pass back - a GetToken mode, and nothing else, so remove discard it. - */ + continue; + } + else + { + /* No parser means we're only passing back a parsing mode. */ + mode = peekMemoryMode( doc ); popMemory( doc ); } } - + /* - Assuming we've gotten this far, there's no more work to do and - so we can draw a nice, fresh token from the lexer. + At this point, there's nothing being returned from parsers, and + nothing on the stack, so we can draw a new node from the lexer. */ - node = TY_(GetToken)( doc, mode ); + node = TY_(GetToken)( doc, mode ); + DEBUG_LOG(SPRTF("---ParseHTMLWithNode got token %s with mode %u.\n", node ? node->element : NULL, mode)); + + if (node) + parser = GetParserForNode( doc, node ); + else + something_to_do = no; } } /***************************************************************************//* - ** MARK: - Old Parsers + ** MARK: - Parsers ***************************************************************************/ -/** MARK: TY_(oldParseBlock) +/** MARK: TY_(ParseBlock) * `element` is a node created by the lexer upon seeing the start tag, or * by the parser when the start tag is inferred + * + * This is a non-recursing parser. It uses the document's parser memory stack + * to send subsequent nodes back to the controller for dispatching to parsers. + * This parser is also re-enterable, so that post-processing can occur after + * such dispatching. */ -void* TY_(oldParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode) +Node* TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode ) { #if defined(ENABLE_DEBUG_LOG) static int in_parse_block = 0; @@ -1076,63 +1067,74 @@ void* TY_(oldParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode) Node *node; Bool checkstack = yes; uint istackbase = 0; -#if defined(ENABLE_DEBUG_LOG) - in_parse_block++; - parse_block_cnt++; - SPRTF("Entering ParseBlock %d... %d %s\n",in_parse_block,parse_block_cnt, - ((element && element->element) ? element->element : "")); -#endif - - if ( element->tag->model & CM_EMPTY ) { -#if defined(ENABLE_DEBUG_LOG) - in_parse_block--; - SPRTF("Exit ParseBlockL 1 %d...\n",in_parse_block); -#endif - return NULL; + + if ( element == NULL ) + { + TidyParserMemory memory = popMemory( doc ); + node = memory.reentry_node; /* Throwaway, because the loop overwrites this immediately. */ + mode = memory.reentry_mode; + element = memory.original_node; + DEBUG_LOG(SPRTF(">>>Re-Enter ParseBlock with %s\n", node->element)); } + else + { + DEBUG_LOG(SPRTF(">>>Entering ParseBlock %d... %d %s\n",++in_parse_block,++parse_block_cnt, + ((element && element->element) ? element->element : ""))); - if ( nodeIsFORM(element) && - DescendantOf(element, TidyTag_FORM) ) - TY_(Report)(doc, element, NULL, ILLEGAL_NESTING ); + if ( element->tag->model & CM_EMPTY ) + { + DEBUG_LOG(SPRTF("<<tag->model & CM_OBJECT) + { + istackbase = lexer->istackbase; + lexer->istackbase = lexer->istacksize; + } + + if (!(element->tag->model & CM_MIXED)) + TY_(InlineDup)( doc, NULL ); + + /*\ + * Issue #212 - If it is likely that it may be necessary + * to move a leading space into a text node before this + * element, then keep the mode MixedContent to keep any + * leading space + \*/ + if ( !(element->tag->model & CM_INLINE) || + (element->tag->model & CM_FIELD ) ) + { + mode = IgnoreWhitespace; + } + else if (mode == IgnoreWhitespace) + { + /* Issue #212 - Further fix in case ParseBlock() is called with 'IgnoreWhitespace' + when such a leading space may need to be inserted before this element to + preverve the browser view */ + mode = MixedContent; + } + } /* Re-Entering */ + /* - InlineDup() asks the lexer to insert inline emphasis tags - currently pushed on the istack, but take care to avoid - propagating inline emphasis inside OBJECT or APPLET. - For these elements a fresh inline stack context is created - and disposed of upon reaching the end of the element. - They thus behave like table cells in this respect. - */ - if (element->tag->model & CM_OBJECT) - { - istackbase = lexer->istackbase; - lexer->istackbase = lexer->istacksize; - } - - if (!(element->tag->model & CM_MIXED)) - TY_(InlineDup)( doc, NULL ); - - /*\ - * Issue #212 - If it is likely that it may be necessary - * to move a leading space into a text node before this - * element, then keep the mode MixedContent to keep any - * leading space - \*/ - if ( !(element->tag->model & CM_INLINE) || - (element->tag->model & CM_FIELD ) ) - { - mode = IgnoreWhitespace; - } - else if (mode == IgnoreWhitespace) - { - /* Issue #212 - Further fix in case ParseBlock() is called with 'IgnoreWhitespace' - when such a leading space may need to be inserted before this element to - preverve the browser view */ - mode = MixedContent; - } - + Main Loop + */ + while ((node = TY_(GetToken)(doc, mode /*MixedContent*/)) != NULL) { + DEBUG_LOG(SPRTF("---ParseBlock got token %s with mode %u\n", node->element, IgnoreWhitespace)); /* end tag for this element */ if (node->type == EndTag && node->tag && (node->tag == element->tag || element->was == node->tag)) @@ -1149,10 +1151,7 @@ void* TY_(oldParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode) element->closed = yes; TrimSpaces( doc, element ); -#if defined(ENABLE_DEBUG_LOG) - in_parse_block--; - SPRTF("Exit ParseBlock 2 %d...\n",in_parse_block); -#endif + DEBUG_LOG(SPRTF("<<istackbase = istackbase; TrimSpaces( doc, element ); -#if defined(ENABLE_DEBUG_LOG) - in_parse_block--; - SPRTF("Exit ParseBlock 4 %d...\n",in_parse_block); -#endif + DEBUG_LOG(SPRTF("<<parent->tag->parser == TY_(ParseList) ) { TrimSpaces( doc, element ); -#if defined(ENABLE_DEBUG_LOG) - in_parse_block--; - SPRTF("Exit ParseBlock 5 %d...\n",in_parse_block); -#endif + DEBUG_LOG(SPRTF("<<parent) ) { TrimSpaces( doc, element ); -#if defined(ENABLE_DEBUG_LOG) - in_parse_block--; - SPRTF("Exit ParseBlock 6 %d...\n",in_parse_block); -#endif + DEBUG_LOG(SPRTF("<<exiled) { -#if defined(ENABLE_DEBUG_LOG) - in_parse_block--; - SPRTF("Exit ParseBlock 7 %d...\n",in_parse_block); -#endif + if (lexer->exiled) + { + DEBUG_LOG(SPRTF("<<istackbase = istackbase; TrimSpaces( doc, element ); -#if defined(ENABLE_DEBUG_LOG) - in_parse_block--; - SPRTF("Exit ParseBlock 8 %d...\n",in_parse_block); -#endif + DEBUG_LOG(SPRTF("<<element)); + } + return node; } /* discard unexpected tags */ @@ -1614,2189 +1596,53 @@ void* TY_(oldParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode) TrimSpaces( doc, element ); -#if defined(ENABLE_DEBUG_LOG) - in_parse_block--; - SPRTF("Exit ParseBlock 10 %d...\n",in_parse_block); -#endif + DEBUG_LOG(SPRTF("<<lexer; - Node *node, *parent; -#if defined(ENABLE_DEBUG_LOG) - in_parse_inline++; - SPRTF("Entering ParseInline %d...\n",in_parse_inline); -#endif - - if (element->tag->model & CM_EMPTY) { -#if defined(ENABLE_DEBUG_LOG) - in_parse_inline--; - SPRTF("Exit ParseInline 1 %d...\n",in_parse_inline); -#endif - return NULL; - } - - /* - ParseInline is used for some block level elements like H1 to H6 - For such elements we need to insert inline emphasis tags currently - on the inline stack. For Inline elements, we normally push them - onto the inline stack provided they aren't implicit or OBJECT/APPLET. - This test is carried out in PushInline and PopInline, see istack.c - - InlineDup(...) is not called for elements with a CM_MIXED (inline and - block) content model, e.g. or , otherwise constructs like - -

111222333444555

-

111222333444555

-

111222333444555

- - will get corrupted. - */ - if ((TY_(nodeHasCM)(element, CM_BLOCK) || nodeIsDT(element)) && - !TY_(nodeHasCM)(element, CM_MIXED)) - TY_(InlineDup)(doc, NULL); - else if (TY_(nodeHasCM)(element, CM_INLINE)) - TY_(PushInline)(doc, element); - - if ( nodeIsNOBR(element) ) - doc->badLayout |= USING_NOBR; - else if ( nodeIsFONT(element) ) - doc->badLayout |= USING_FONT; - - /* Inline elements may or may not be within a preformatted element */ - if (mode != Preformatted) - mode = MixedContent; - - while ((node = TY_(GetToken)(doc, mode)) != NULL) - { - /* end tag for current element */ - if (node->tag == element->tag && node->type == EndTag) - { - if (element->tag->model & CM_INLINE) - TY_(PopInline)( doc, node ); - - TY_(FreeNode)( doc, node ); - - if (!(mode & Preformatted)) - TrimSpaces(doc, element); - - /* - if a font element wraps an anchor and nothing else - then move the font element inside the anchor since - otherwise it won't alter the anchor text color - */ - if ( nodeIsFONT(element) && - element->content && element->content == element->last ) - { - Node *child = element->content; - - if ( nodeIsA(child) ) - { - child->parent = element->parent; - child->next = element->next; - child->prev = element->prev; - - element->next = NULL; - element->prev = NULL; - element->parent = child; - - element->content = child->content; - element->last = child->last; - child->content = element; - - TY_(FixNodeLinks)(child); - TY_(FixNodeLinks)(element); - } - } - - element->closed = yes; - TrimSpaces( doc, element ); -#if defined(ENABLE_DEBUG_LOG) - in_parse_inline--; - SPRTF("Exit ParseInline 2 %d...\n",in_parse_inline); -#endif - return NULL; - } - - /* ... map 2nd to if 1st is explicit */ - /* (see additional conditions below) */ - /* otherwise emphasis nesting is probably unintentional */ - /* big, small, sub, sup have cumulative effect to leave them alone */ - if ( node->type == StartTag - && node->tag == element->tag - && TY_(IsPushed)( doc, node ) - && !node->implicit - && !element->implicit - && node->tag && (node->tag->model & CM_INLINE) - && !nodeIsA(node) - && !nodeIsFONT(node) - && !nodeIsBIG(node) - && !nodeIsSMALL(node) - && !nodeIsSUB(node) - && !nodeIsSUP(node) - && !nodeIsQ(node) - && !nodeIsSPAN(node) - && cfgBool(doc, TidyCoerceEndTags) - ) - { - /* proceeds only if "node" does not have any attribute and - follows a text node not finishing with a space */ - if (element->content != NULL && node->attributes == NULL - && TY_(nodeIsText)(element->last) - && !TY_(TextNodeEndWithSpace)(doc->lexer, element->last) ) - { - TY_(Report)(doc, element, node, COERCE_TO_ENDTAG); - node->type = EndTag; - TY_(UngetToken)(doc); - continue; - } - - if (node->attributes == NULL || element->attributes == NULL) - TY_(Report)(doc, element, node, NESTED_EMPHASIS); - } - else if ( TY_(IsPushed)(doc, node) && node->type == StartTag && - nodeIsQ(node) ) - { - /*\ - * Issue #215 - such nested quotes are NOT a problem if HTML5, so - * only issue this warning if NOT HTML5 mode. - \*/ - if (TY_(HTMLVersion)(doc) != HT50) - { - TY_(Report)(doc, element, node, NESTED_QUOTATION); - } - } - - if ( TY_(nodeIsText)(node) ) - { - /* only called for 1st child */ - if ( element->content == NULL && !(mode & Preformatted) ) - TrimSpaces( doc, element ); - - if ( node->start >= node->end ) - { - TY_(FreeNode)( doc, node ); - continue; - } - - TY_(InsertNodeAtEnd)(element, node); - continue; - } - - /* mixed content model so allow text */ - if (InsertMisc(element, node)) - continue; - - /* deal with HTML tags */ - if ( nodeIsHTML(node) ) - { - if ( TY_(nodeIsElement)(node) ) - { - TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED ); - TY_(FreeNode)( doc, node ); - continue; - } - - /* otherwise infer end of inline element */ - TY_(UngetToken)( doc ); - - if (!(mode & Preformatted)) - TrimSpaces(doc, element); -#if defined(ENABLE_DEBUG_LOG) - in_parse_inline--; - SPRTF("Exit ParseInline 3 %d...\n",in_parse_inline); -#endif - return NULL; - } - - /* within
or
 map 

to
*/ - if ( nodeIsP(node) && - node->type == StartTag && - ( (mode & Preformatted) || - nodeIsDT(element) || - DescendantOf(element, TidyTag_DT ) - ) - ) - { - node->tag = TY_(LookupTagDef)( TidyTag_BR ); - TidyDocFree(doc, node->element); - node->element = TY_(tmbstrdup)(doc->allocator, "br"); - TrimSpaces(doc, element); - TY_(InsertNodeAtEnd)(element, node); - continue; - } - - /*

allowed within

in HTML 4.01 Transitional */ - if ( nodeIsP(node) && - node->type == StartTag && - nodeIsADDRESS(element) ) - { - TY_(ConstrainVersion)( doc, ~VERS_HTML40_STRICT ); - TY_(InsertNodeAtEnd)(element, node); - (*node->tag->parser)( doc, node, mode, no ); - continue; - } - - /* ignore unknown and PARAM tags */ - if ( node->tag == NULL || nodeIsPARAM(node) ) - { - TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node ); - continue; - } - - if ( nodeIsBR(node) && node->type == EndTag ) - node->type = StartTag; - - if ( node->type == EndTag ) - { - /* coerce
to
*/ - if ( nodeIsBR(node) ) - node->type = StartTag; - else if ( nodeIsP(node) ) - { - /* coerce unmatched

to

*/ - if ( !DescendantOf(element, TidyTag_P) ) - { - TY_(CoerceNode)(doc, node, TidyTag_BR, no, no); - TrimSpaces( doc, element ); - TY_(InsertNodeAtEnd)( element, node ); - node = TY_(InferredTag)(doc, TidyTag_BR); - TY_(InsertNodeAtEnd)( element, node ); /* todo: check this */ - continue; - } - } - else if ( TY_(nodeHasCM)(node, CM_INLINE) - && !nodeIsA(node) - && !TY_(nodeHasCM)(node, CM_OBJECT) - && TY_(nodeHasCM)(element, CM_INLINE) ) - { - /* allow any inline end tag to end current element */ - - /* http://tidy.sf.net/issue/1426419 */ - /* but, like the browser, retain an earlier inline element. - This is implemented by setting the lexer into a mode - where it gets tokens from the inline stack rather than - from the input stream. Check if the scenerio fits. */ - if ( !nodeIsA(element) - && (node->tag != element->tag) - && TY_(IsPushed)( doc, node ) - && TY_(IsPushed)( doc, element ) ) - { - /* we have something like - bold bold and italic italics */ - if ( TY_(SwitchInline)( doc, element, node ) ) - { - TY_(Report)(doc, element, node, NON_MATCHING_ENDTAG); - TY_(UngetToken)( doc ); /* put this back */ - TY_(InlineDup1)( doc, NULL, element ); /* dupe the , after */ - if (!(mode & Preformatted)) - TrimSpaces( doc, element ); -#if defined(ENABLE_DEBUG_LOG) - in_parse_inline--; - SPRTF("Exit ParseInline 4 %d...\n",in_parse_inline); -#endif - return NULL; /* close , but will re-open it, after */ - } - } - TY_(PopInline)( doc, element ); - - if ( !nodeIsA(element) ) - { - if ( nodeIsA(node) && node->tag != element->tag ) - { - TY_(Report)(doc, element, node, MISSING_ENDTAG_BEFORE ); - TY_(UngetToken)( doc ); - } - else - { - TY_(Report)(doc, element, node, NON_MATCHING_ENDTAG); - TY_(FreeNode)( doc, node); - } - - if (!(mode & Preformatted)) - TrimSpaces(doc, element); -#if defined(ENABLE_DEBUG_LOG) - in_parse_inline--; - SPRTF("Exit ParseInline 5 %d...\n",in_parse_inline); -#endif - return NULL; - } - - /* if parent is then discard unexpected inline end tag */ - TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } /* special case etc. for stuff moved in front of table */ - else if ( lexer->exiled - && (TY_(nodeHasCM)(node, CM_TABLE) || nodeIsTABLE(node)) ) - { - TY_(UngetToken)( doc ); - TrimSpaces(doc, element); -#if defined(ENABLE_DEBUG_LOG) - in_parse_inline--; - SPRTF("Exit ParseInline 6 %d...\n",in_parse_inline); -#endif - return NULL; - } - } - - /* allow any header tag to end current header */ - if ( TY_(nodeHasCM)(node, CM_HEADING) && TY_(nodeHasCM)(element, CM_HEADING) ) - { - - if ( node->tag == element->tag ) - { - TY_(Report)(doc, element, node, NON_MATCHING_ENDTAG ); - TY_(FreeNode)( doc, node); - } - else - { - TY_(Report)(doc, element, node, MISSING_ENDTAG_BEFORE ); - TY_(UngetToken)( doc ); - } - - if (!(mode & Preformatted)) - TrimSpaces(doc, element); - -#if defined(ENABLE_DEBUG_LOG) - in_parse_inline--; - SPRTF("Exit ParseInline 7 %d...\n",in_parse_inline); -#endif - return NULL; - } - - /* - an tag to ends any open element - but is mapped to - */ - /* #427827 - fix by Randy Waki and Bjoern Hoehrmann 23 Aug 00 */ - /* if (node->tag == doc->tags.tag_a && !node->implicit && TY_(IsPushed)(doc, node)) */ - if ( nodeIsA(node) && !node->implicit && - (nodeIsA(element) || DescendantOf(element, TidyTag_A)) ) - { - /* coerce to unless it has some attributes */ - /* #427827 - fix by Randy Waki and Bjoern Hoehrmann 23 Aug 00 */ - /* other fixes by Dave Raggett */ - /* if (node->attributes == NULL) */ - if (node->type != EndTag && node->attributes == NULL - && cfgBool(doc, TidyCoerceEndTags) ) - { - node->type = EndTag; - TY_(Report)(doc, element, node, COERCE_TO_ENDTAG); - /* TY_(PopInline)( doc, node ); */ - TY_(UngetToken)( doc ); - continue; - } - - TY_(UngetToken)( doc ); - TY_(Report)(doc, element, node, MISSING_ENDTAG_BEFORE); - /* TY_(PopInline)( doc, element ); */ - - if (!(mode & Preformatted)) - TrimSpaces(doc, element); - -#if defined(ENABLE_DEBUG_LOG) - in_parse_inline--; - SPRTF("Exit ParseInline 8 %d...\n",in_parse_inline); -#endif - return NULL; - } - - if (element->tag->model & CM_HEADING) - { - if ( nodeIsCENTER(node) || nodeIsDIV(node) ) - { - if (!TY_(nodeIsElement)(node)) - { - TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - - TY_(Report)(doc, element, node, TAG_NOT_ALLOWED_IN); - - /* insert center as parent if heading is empty */ - if (element->content == NULL) - { - InsertNodeAsParent(element, node); - continue; - } - - /* split heading and make center parent of 2nd part */ - TY_(InsertNodeAfterElement)(element, node); - - if (!(mode & Preformatted)) - TrimSpaces(doc, element); - - element = TY_(CloneNode)( doc, element ); - TY_(InsertNodeAtEnd)(node, element); - continue; - } - - if ( nodeIsHR(node) ) - { - if ( !TY_(nodeIsElement)(node) ) - { - TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - - TY_(Report)(doc, element, node, TAG_NOT_ALLOWED_IN); - - /* insert hr before heading if heading is empty */ - if (element->content == NULL) - { - TY_(InsertNodeBeforeElement)(element, node); - continue; - } - - /* split heading and insert hr before 2nd part */ - TY_(InsertNodeAfterElement)(element, node); - - if (!(mode & Preformatted)) - TrimSpaces(doc, element); - - element = TY_(CloneNode)( doc, element ); - TY_(InsertNodeAfterElement)(node, element); - continue; - } - } - - if ( nodeIsDT(element) ) - { - if ( nodeIsHR(node) ) - { - Node *dd; - if ( !TY_(nodeIsElement)(node) ) - { - TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - - TY_(Report)(doc, element, node, TAG_NOT_ALLOWED_IN); - dd = TY_(InferredTag)(doc, TidyTag_DD); - - /* insert hr within dd before dt if dt is empty */ - if (element->content == NULL) - { - TY_(InsertNodeBeforeElement)(element, dd); - TY_(InsertNodeAtEnd)(dd, node); - continue; - } - - /* split dt and insert hr within dd before 2nd part */ - TY_(InsertNodeAfterElement)(element, dd); - TY_(InsertNodeAtEnd)(dd, node); - - if (!(mode & Preformatted)) - TrimSpaces(doc, element); - - element = TY_(CloneNode)( doc, element ); - TY_(InsertNodeAfterElement)(dd, element); - continue; - } - } - - - /* - if this is the end tag for an ancestor element - then infer end tag for this element - */ - if (node->type == EndTag) - { - for (parent = element->parent; - parent != NULL; parent = parent->parent) - { - if (node->tag == parent->tag) - { - if (!(element->tag->model & CM_OPT) && !element->implicit) - TY_(Report)(doc, element, node, MISSING_ENDTAG_BEFORE); - - if( TY_(IsPushedLast)( doc, element, node ) ) - TY_(PopInline)( doc, element ); - TY_(UngetToken)( doc ); - - if (!(mode & Preformatted)) - TrimSpaces(doc, element); - -#if defined(ENABLE_DEBUG_LOG) - in_parse_inline--; - SPRTF("Exit ParseInline 9 %d...\n",in_parse_inline); -#endif - return NULL; - } - } - } - - /*\ - * block level tags end this element - * Issue #333 - There seems an exception if the element is a 'span', - * and the node just collected is a 'meta'. The 'meta' can not have - * CM_INLINE added, nor can the 'span' have CM_MIXED added without - * big consequences. - * There may be other exceptions to be added... - \*/ - if (!(node->tag->model & CM_INLINE) && - !(element->tag->model & CM_MIXED) && - !(nodeIsSPAN(element) && nodeIsMETA(node)) ) - { - if ( !TY_(nodeIsElement)(node) ) - { - TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - /* HTML5 */ - if (nodeIsDATALIST(element)) { - TY_(ConstrainVersion)( doc, ~VERS_HTML5 ); - } else - if (!(element->tag->model & CM_OPT)) - TY_(Report)(doc, element, node, MISSING_ENDTAG_BEFORE); - - if (node->tag->model & CM_HEAD && !(node->tag->model & CM_BLOCK)) - { - MoveToHead(doc, element, node); - continue; - } - - /* - prevent anchors from propagating into block tags - except for headings h1 to h6 - */ - if ( nodeIsA(element) ) - { - if (node->tag && !(node->tag->model & CM_HEADING)) - TY_(PopInline)( doc, element ); - else if (!(element->content)) - { - TY_(DiscardElement)( doc, element ); - TY_(UngetToken)( doc ); -#if defined(ENABLE_DEBUG_LOG) - in_parse_inline--; - SPRTF("Exit ParseInline 10 %d...\n",in_parse_inline); -#endif - return NULL; - } - } - - TY_(UngetToken)( doc ); - - if (!(mode & Preformatted)) - TrimSpaces(doc, element); - -#if defined(ENABLE_DEBUG_LOG) - in_parse_inline--; - SPRTF("Exit ParseInline 11 %d...\n",in_parse_inline); -#endif - return NULL; - } - - /* parse inline element */ - if (TY_(nodeIsElement)(node)) - { - if (node->implicit) - TY_(Report)(doc, element, node, INSERTING_TAG); - - /* trim white space before
*/ - if ( nodeIsBR(node) ) - TrimSpaces(doc, element); - - TY_(InsertNodeAtEnd)(element, node); - ParseTag(doc, node, mode); - continue; - } - - /* discard unexpected tags */ - TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node ); - continue; - } - - if (!(element->tag->model & CM_OPT)) - TY_(Report)(doc, element, node, MISSING_ENDTAG_FOR); - -#if defined(ENABLE_DEBUG_LOG) - in_parse_inline--; - SPRTF("Exit ParseInline 12 %d...\n",in_parse_inline); -#endif - return NULL; -} - - -/** MARK: TY_(oldParseDefList) - * Parses the `dl` tag. - */ -void* TY_(oldParseDefList)(TidyDocImpl* doc, Node *list, GetTokenMode mode) -{ - Lexer* lexer = doc->lexer; - Node *node, *parent; - - if (list->tag->model & CM_EMPTY) - return NULL; - - lexer->insert = NULL; /* defer implicit inline start tags */ - - while ((node = TY_(GetToken)( doc, IgnoreWhitespace)) != NULL) - { - if (node->tag == list->tag && node->type == EndTag) - { - TY_(FreeNode)( doc, node); - list->closed = yes; - return NULL; - } - - /* deal with comments etc. */ - if (InsertMisc(list, node)) - continue; - - if (TY_(nodeIsText)(node)) - { - TY_(UngetToken)( doc ); - node = TY_(InferredTag)(doc, TidyTag_DT); - TY_(Report)(doc, list, node, MISSING_STARTTAG); - } - - if (node->tag == NULL) - { - TY_(Report)(doc, list, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - - /* - if this is the end tag for an ancestor element - then infer end tag for this element - */ - if (node->type == EndTag) - { - Bool discardIt = no; - if ( nodeIsFORM(node) ) - { - BadForm( doc ); - TY_(Report)(doc, list, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node ); - continue; - } - - for (parent = list->parent; - parent != NULL; parent = parent->parent) - { - /* Do not match across BODY to avoid infinite loop - between ParseBody and this parser, - See http://tidy.sf.net/bug/1098012. */ - if (nodeIsBODY(parent)) - { - discardIt = yes; - break; - } - if (node->tag == parent->tag) - { - TY_(Report)(doc, list, node, MISSING_ENDTAG_BEFORE); - - TY_(UngetToken)( doc ); - return NULL; - } - } - if (discardIt) - { - TY_(Report)(doc, list, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - } - - /* center in a dt or a dl breaks the dl list in two */ - if ( nodeIsCENTER(node) ) - { - if (list->content) - TY_(InsertNodeAfterElement)(list, node); - else /* trim empty dl list */ - { - TY_(InsertNodeBeforeElement)(list, node); - } - - /* #426885 - fix by Glenn Carroll 19 Apr 00, and - Gary Dechaines 11 Aug 00 */ - /* ParseTag can destroy node, if it finds that - * this
is followed immediately by
. - * It's awkward but necessary to determine if this - * has happened. - */ - parent = node->parent; - - /* and parse contents of center */ - lexer->excludeBlocks = no; - ParseTag( doc, node, mode); - lexer->excludeBlocks = yes; - - /* now create a new dl element, - * unless node has been blown away because the - * center was empty, as above. - */ - if (parent && parent->last == node) - { - list = TY_(InferredTag)(doc, TidyTag_DL); - TY_(InsertNodeAfterElement)(node, list); - } - continue; - } - - if ( !(nodeIsDT(node) || nodeIsDD(node)) ) - { - TY_(UngetToken)( doc ); - - if (!(node->tag->model & (CM_BLOCK | CM_INLINE))) - { - TY_(Report)(doc, list, node, TAG_NOT_ALLOWED_IN); - return NULL; - } - - /* if DD appeared directly in BODY then exclude blocks */ - if (!(node->tag->model & CM_INLINE) && lexer->excludeBlocks) - return NULL; - - node = TY_(InferredTag)(doc, TidyTag_DD); - TY_(Report)(doc, list, node, MISSING_STARTTAG); - } - - if (node->type == EndTag) - { - TY_(Report)(doc, list, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - - /* node should be
or
*/ - TY_(InsertNodeAtEnd)(list, node); - ParseTag( doc, node, IgnoreWhitespace); - } - - TY_(Report)(doc, list, node, MISSING_ENDTAG_FOR); - return NULL; -} - - -/** MARK: TY_(oldParseList) - * Parses list tags. - */ -void* TY_(oldParseList)(TidyDocImpl* doc, Node *list, GetTokenMode ARG_UNUSED(mode)) -{ -#if defined(ENABLE_DEBUG_LOG) - static int in_parse_list = 0; -#endif - Lexer* lexer = doc->lexer; - Node *node, *parent, *lastli; - Bool wasblock; - Bool nodeisOL = nodeIsOL(list); - -#if defined(ENABLE_DEBUG_LOG) - in_parse_list++; - SPRTF("Entering ParseList %d...\n",in_parse_list); -#endif - if (list->tag->model & CM_EMPTY) - { -#if defined(ENABLE_DEBUG_LOG) - in_parse_list--; - SPRTF("Exit ParseList 1 %d... CM_EMPTY\n",in_parse_list); -#endif - return NULL; - } - lexer->insert = NULL; /* defer implicit inline start tags */ - - while ((node = TY_(GetToken)( doc, IgnoreWhitespace)) != NULL) - { - Bool foundLI = no; - if (node->tag == list->tag && node->type == EndTag) - { - TY_(FreeNode)( doc, node); - list->closed = yes; -#if defined(ENABLE_DEBUG_LOG) - in_parse_list--; - SPRTF("Exit ParseList 2 %d... Endtag\n",in_parse_list); -#endif - return NULL; - } - - /* deal with comments etc. */ - if (InsertMisc(list, node)) - continue; - - if (node->type != TextNode && node->tag == NULL) - { - TY_(Report)(doc, list, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - if (lexer && (node->type == TextNode)) - { - uint ch, ix = node->start; - /* Issue #572 - Skip whitespace. */ - while (ix < node->end && (ch = (lexer->lexbuf[ix] & 0xff)) - && (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n')) - ++ix; - if (ix >= node->end) - { - /* Issue #572 - Discard if ALL whitespace. */ - TY_(FreeNode)(doc, node); - continue; - } - } - - - /* - if this is the end tag for an ancestor element - then infer end tag for this element - */ - if (node->type == EndTag) - { - if ( nodeIsFORM(node) ) - { - BadForm( doc ); - TY_(Report)(doc, list, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node ); - continue; - } - - if (TY_(nodeHasCM)(node,CM_INLINE)) - { - TY_(Report)(doc, list, node, DISCARDING_UNEXPECTED); - TY_(PopInline)( doc, node ); - TY_(FreeNode)( doc, node); - continue; - } - - for ( parent = list->parent; - parent != NULL; parent = parent->parent ) - { - /* Do not match across BODY to avoid infinite loop - between ParseBody and this parser, - See http://tidy.sf.net/bug/1053626. */ - if (nodeIsBODY(parent)) - break; - if (node->tag == parent->tag) - { - TY_(Report)(doc, list, node, MISSING_ENDTAG_BEFORE); - TY_(UngetToken)( doc ); -#if defined(ENABLE_DEBUG_LOG) - in_parse_list--; - SPRTF("Exit ParseList 3 %d... No End Tag\n",in_parse_list); -#endif - return NULL; - } - } - - TY_(Report)(doc, list, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - - if ( !nodeIsLI(node) && nodeisOL ) - { - /* Issue #572 - A
  1. can have nested
      elements */ - foundLI = FindLastLI(list, &lastli); /* find last
    1. */ - } - - if ( nodeIsLI(node) || (TY_(IsHTML5Mode)(doc) && !foundLI) ) - { - /* node is
    2. OR - Issue #396 - A
        can have Zero or more
      • elements - */ - TY_(InsertNodeAtEnd)(list,node); - } - else - { - TY_(UngetToken)( doc ); - - if (TY_(nodeHasCM)(node,CM_BLOCK) && lexer->excludeBlocks) - { - TY_(Report)(doc, list, node, MISSING_ENDTAG_BEFORE); -#if defined(ENABLE_DEBUG_LOG) - in_parse_list--; - SPRTF("Exit ParseList 4 %d... No End Tag\n",in_parse_list); -#endif - return NULL; - } - /* http://tidy.sf.net/issue/1316307 */ - /* In exiled mode, return so table processing can continue. */ - else if ( lexer->exiled - && (TY_(nodeHasCM)(node, CM_TABLE|CM_ROWGRP|CM_ROW) - || nodeIsTABLE(node)) ) - { -#if defined(ENABLE_DEBUG_LOG) - in_parse_list--; - SPRTF("Exit ParseList 5 %d... exiled\n",in_parse_list); -#endif - return NULL; - } - /* http://tidy.sf.net/issue/836462 - If "list" is an unordered list, insert the next tag within - the last
      • to preserve the numbering to match the visual - rendering of most browsers. */ - if ( nodeIsOL(list) && FindLastLI(list, &lastli) ) - { - /* Create a node for error reporting */ - node = TY_(InferredTag)(doc, TidyTag_LI); - TY_(Report)(doc, list, node, MISSING_STARTTAG ); - TY_(FreeNode)( doc, node); - node = lastli; - } - else - { - /* Add an inferred
      • */ - wasblock = TY_(nodeHasCM)(node,CM_BLOCK); - node = TY_(InferredTag)(doc, TidyTag_LI); - /* Add "display: inline" to avoid a blank line after
      • with - Internet Explorer. See http://tidy.sf.net/issue/836462 */ - TY_(AddStyleProperty)( doc, node, - wasblock - ? "list-style: none; display: inline" - : "list-style: none" - ); - TY_(Report)(doc, list, node, MISSING_STARTTAG ); - TY_(InsertNodeAtEnd)(list,node); - } - } - - ParseTag( doc, node, IgnoreWhitespace); - } - - TY_(Report)(doc, list, node, MISSING_ENDTAG_FOR); -#if defined(ENABLE_DEBUG_LOG) - in_parse_list--; - SPRTF("Exit ParseList 6 %d... missing end tag\n",in_parse_list); -#endif - return NULL; -} - - -/** MARK: TY_(oldParseRow) - * Parses the `row` tag. - */ -void* TY_(oldParseRow)(TidyDocImpl* doc, Node *row, GetTokenMode ARG_UNUSED(mode)) -{ - Lexer* lexer = doc->lexer; - Node *node; - Bool exclude_state; - - if (row->tag->model & CM_EMPTY) - return NULL; - - while ((node = TY_(GetToken)(doc, IgnoreWhitespace)) != NULL) - { - if (node->tag == row->tag) - { - if (node->type == EndTag) - { - TY_(FreeNode)( doc, node); - row->closed = yes; - FixEmptyRow( doc, row); - return NULL; - } - - /* New row start implies end of current row */ - TY_(UngetToken)( doc ); - FixEmptyRow( doc, row); - return NULL; - } - - /* - if this is the end tag for an ancestor element - then infer end tag for this element - */ - if ( node->type == EndTag ) - { - if ( (TY_(nodeHasCM)(node, CM_HTML|CM_TABLE) || nodeIsTABLE(node)) - && DescendantOf(row, TagId(node)) ) - { - TY_(UngetToken)( doc ); - return NULL; - } - - if ( nodeIsFORM(node) || TY_(nodeHasCM)(node, CM_BLOCK|CM_INLINE) ) - { - if ( nodeIsFORM(node) ) - BadForm( doc ); - - TY_(Report)(doc, row, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - - if ( nodeIsTD(node) || nodeIsTH(node) ) - { - TY_(Report)(doc, row, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - } - - /* deal with comments etc. */ - if (InsertMisc(row, node)) - continue; - - /* discard unknown tags */ - if (node->tag == NULL && node->type != TextNode) - { - TY_(Report)(doc, row, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - - /* discard unexpected
element */ - if ( nodeIsTABLE(node) ) - { - TY_(Report)(doc, row, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - - /* THEAD, TFOOT or TBODY */ - if ( TY_(nodeHasCM)(node, CM_ROWGRP) ) - { - TY_(UngetToken)( doc ); - return NULL; - } - - if (node->type == EndTag) - { - TY_(Report)(doc, row, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - - /* - if text or inline or block move before table - if head content move to head - */ - - if (node->type != EndTag) - { - if ( nodeIsFORM(node) ) - { - TY_(UngetToken)( doc ); - node = TY_(InferredTag)(doc, TidyTag_TD); - TY_(Report)(doc, row, node, MISSING_STARTTAG); - } - else if ( TY_(nodeIsText)(node) - || TY_(nodeHasCM)(node, CM_BLOCK | CM_INLINE) ) - { - MoveBeforeTable( doc, row, node ); - TY_(Report)(doc, row, node, TAG_NOT_ALLOWED_IN); - lexer->exiled = yes; - exclude_state = lexer->excludeBlocks; - lexer->excludeBlocks = no; - - if (node->type != TextNode) - ParseTag( doc, node, IgnoreWhitespace); - - lexer->exiled = no; - lexer->excludeBlocks = exclude_state; - continue; - } - else if (node->tag->model & CM_HEAD) - { - TY_(Report)(doc, row, node, TAG_NOT_ALLOWED_IN); - MoveToHead( doc, row, node); - continue; - } - } - - if ( !(nodeIsTD(node) || nodeIsTH(node)) ) - { - TY_(Report)(doc, row, node, TAG_NOT_ALLOWED_IN); - TY_(FreeNode)( doc, node); - continue; - } - - /* node should be
or */ - TY_(InsertNodeAtEnd)(row, node); - exclude_state = lexer->excludeBlocks; - lexer->excludeBlocks = no; - ParseTag( doc, node, IgnoreWhitespace); - lexer->excludeBlocks = exclude_state; - - /* pop inline stack */ - - while ( lexer->istacksize > lexer->istackbase ) - TY_(PopInline)( doc, NULL ); - } - return NULL; -} - - -/** MARK: TY_(oldParseRowGroup) - * Parses the `rowgroup` tag. - */ -void* TY_(oldParseRowGroup)(TidyDocImpl* doc, Node *rowgroup, GetTokenMode ARG_UNUSED(mode)) -{ - Lexer* lexer = doc->lexer; - Node *node, *parent; - - if (rowgroup->tag->model & CM_EMPTY) - return NULL; - - while ((node = TY_(GetToken)(doc, IgnoreWhitespace)) != NULL) - { - if (node->tag == rowgroup->tag) - { - if (node->type == EndTag) - { - rowgroup->closed = yes; - TY_(FreeNode)( doc, node); - return NULL; - } - - TY_(UngetToken)( doc ); - return NULL; - } - - /* if
infer end tag */ - if ( nodeIsTABLE(node) && node->type == EndTag ) - { - TY_(UngetToken)( doc ); - return NULL; - } - - /* deal with comments etc. */ - if (InsertMisc(rowgroup, node)) - continue; - - /* discard unknown tags */ - if (node->tag == NULL && node->type != TextNode) - { - TY_(Report)(doc, rowgroup, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - - /* - if TD or TH then infer - if text or inline or block move before table - if head content move to head - */ - - if (node->type != EndTag) - { - if ( nodeIsTD(node) || nodeIsTH(node) ) - { - TY_(UngetToken)( doc ); - node = TY_(InferredTag)(doc, TidyTag_TR); - TY_(Report)(doc, rowgroup, node, MISSING_STARTTAG); - } - else if ( TY_(nodeIsText)(node) - || TY_(nodeHasCM)(node, CM_BLOCK|CM_INLINE) ) - { - MoveBeforeTable( doc, rowgroup, node ); - TY_(Report)(doc, rowgroup, node, TAG_NOT_ALLOWED_IN); - lexer->exiled = yes; - - if (node->type != TextNode) - ParseTag(doc, node, IgnoreWhitespace); - - lexer->exiled = no; - continue; - } - else if (node->tag->model & CM_HEAD) - { - TY_(Report)(doc, rowgroup, node, TAG_NOT_ALLOWED_IN); - MoveToHead(doc, rowgroup, node); - continue; - } - } - - /* - if this is the end tag for ancestor element - then infer end tag for this element - */ - if (node->type == EndTag) - { - if ( nodeIsFORM(node) || TY_(nodeHasCM)(node, CM_BLOCK|CM_INLINE) ) - { - if ( nodeIsFORM(node) ) - BadForm( doc ); - - TY_(Report)(doc, rowgroup, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - - if ( nodeIsTR(node) || nodeIsTD(node) || nodeIsTH(node) ) - { - TY_(Report)(doc, rowgroup, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - - for ( parent = rowgroup->parent; - parent != NULL; - parent = parent->parent ) - { - if (node->tag == parent->tag) - { - TY_(UngetToken)( doc ); - return NULL; - } - } - } - - /* - if THEAD, TFOOT or TBODY then implied end tag - - */ - if (node->tag->model & CM_ROWGRP) - { - if (node->type != EndTag) - { - TY_(UngetToken)( doc ); - return NULL; - } - } - - if (node->type == EndTag) - { - TY_(Report)(doc, rowgroup, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - - if ( !nodeIsTR(node) ) - { - node = TY_(InferredTag)(doc, TidyTag_TR); - TY_(Report)(doc, rowgroup, node, MISSING_STARTTAG); - TY_(UngetToken)( doc ); - } - - /* node should be */ - TY_(InsertNodeAtEnd)(rowgroup, node); - ParseTag(doc, node, IgnoreWhitespace); - } - return NULL; -} - - -/** MARK: TY_(oldParseColGroup) - * Parses the `colgroup` tag. - */ -void* TY_(oldParseColGroup)(TidyDocImpl* doc, Node *colgroup, GetTokenMode ARG_UNUSED(mode)) -{ - Node *node, *parent; - - if (colgroup->tag->model & CM_EMPTY) - return NULL; - - while ((node = TY_(GetToken)(doc, IgnoreWhitespace)) != NULL) - { - if (node->tag == colgroup->tag && node->type == EndTag) - { - TY_(FreeNode)( doc, node); - colgroup->closed = yes; - return NULL; - } - - /* - if this is the end tag for an ancestor element - then infer end tag for this element - */ - if (node->type == EndTag) - { - if ( nodeIsFORM(node) ) - { - BadForm( doc ); - TY_(Report)(doc, colgroup, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - - for ( parent = colgroup->parent; - parent != NULL; - parent = parent->parent ) - { - if (node->tag == parent->tag) - { - TY_(UngetToken)( doc ); - return NULL; - } - } - } - - if (TY_(nodeIsText)(node)) - { - TY_(UngetToken)( doc ); - return NULL; - } - - /* deal with comments etc. */ - if (InsertMisc(colgroup, node)) - continue; - - /* discard unknown tags */ - if (node->tag == NULL) - { - TY_(Report)(doc, colgroup, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - - if ( !nodeIsCOL(node) ) - { - TY_(UngetToken)( doc ); - return NULL; - } - - if (node->type == EndTag) - { - TY_(Report)(doc, colgroup, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - - /* node should be */ - TY_(InsertNodeAtEnd)(colgroup, node); - ParseTag(doc, node, IgnoreWhitespace); - } - return NULL; -} - - -/** MARK: TY_(oldParseTableTag) - * Parses the `table` tag. - */ -void* TY_(oldParseTableTag)(TidyDocImpl* doc, Node *table, GetTokenMode ARG_UNUSED(mode)) -{ -#if defined(ENABLE_DEBUG_LOG) - static int in_parse_table = 0; -#endif - Lexer* lexer = doc->lexer; - Node *node, *parent; - uint istackbase; - - TY_(DeferDup)( doc ); - istackbase = lexer->istackbase; - lexer->istackbase = lexer->istacksize; -#if defined(ENABLE_DEBUG_LOG) - in_parse_table++; - SPRTF("Entering ParseTableTag %d...\n",in_parse_table); -#endif - - while ((node = TY_(GetToken)(doc, IgnoreWhitespace)) != NULL) - { - if (node->tag == table->tag ) - { - if (node->type == EndTag) - { - TY_(FreeNode)(doc, node); - } - else - { - /* Issue #498 - If a in a
- * just close the current table, and issue a - * warning. The previous action was to discard - * this second
- */ - TY_(UngetToken)(doc); - TY_(Report)(doc, table, node, TAG_NOT_ALLOWED_IN); - } - lexer->istackbase = istackbase; - table->closed = yes; -#if defined(ENABLE_DEBUG_LOG) - in_parse_table--; - SPRTF("Exit ParseTableTag 1 %d... EndTag\n",in_parse_table); -#endif - return NULL; - } - - /* deal with comments etc. */ - if (InsertMisc(table, node)) - continue; - - /* discard unknown tags */ - if (node->tag == NULL && node->type != TextNode) - { - TY_(Report)(doc, table, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - - /* if TD or TH or text or inline or block then infer */ - - if (node->type != EndTag) - { - if ( nodeIsTD(node) || nodeIsTH(node) || nodeIsTABLE(node) ) - { - TY_(UngetToken)( doc ); - node = TY_(InferredTag)(doc, TidyTag_TR); - TY_(Report)(doc, table, node, MISSING_STARTTAG); - } - else if ( TY_(nodeIsText)(node) ||TY_(nodeHasCM)(node,CM_BLOCK|CM_INLINE) ) - { - TY_(InsertNodeBeforeElement)(table, node); - TY_(Report)(doc, table, node, TAG_NOT_ALLOWED_IN); - lexer->exiled = yes; - - if (node->type != TextNode) - ParseTag(doc, node, IgnoreWhitespace); - - lexer->exiled = no; - continue; - } - else if (node->tag->model & CM_HEAD) - { - MoveToHead(doc, table, node); - continue; - } - } - - /* - if this is the end tag for an ancestor element - then infer end tag for this element - */ - if (node->type == EndTag) - { - if ( nodeIsFORM(node) ) - { - BadForm( doc ); - TY_(Report)(doc, table, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - - /* best to discard unexpected block/inline end tags */ - if ( TY_(nodeHasCM)(node, CM_TABLE|CM_ROW) || - TY_(nodeHasCM)(node, CM_BLOCK|CM_INLINE) ) - { - TY_(Report)(doc, table, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - - for ( parent = table->parent; - parent != NULL; - parent = parent->parent ) - { - if (node->tag == parent->tag) - { - TY_(Report)(doc, table, node, MISSING_ENDTAG_BEFORE ); - TY_(UngetToken)( doc ); - lexer->istackbase = istackbase; -#if defined(ENABLE_DEBUG_LOG) - in_parse_table--; - SPRTF("Exit ParseTableTag 2 %d... missing EndTag\n",in_parse_table); -#endif - return NULL; - } - } - } - - if (!(node->tag->model & CM_TABLE)) - { - TY_(UngetToken)( doc ); - TY_(Report)(doc, table, node, TAG_NOT_ALLOWED_IN); - lexer->istackbase = istackbase; -#if defined(ENABLE_DEBUG_LOG) - in_parse_table--; - SPRTF("Exit ParseTableTag 3 %d... CM_TABLE\n",in_parse_table); -#endif - return NULL; - } - - if (TY_(nodeIsElement)(node)) - { - TY_(InsertNodeAtEnd)(table, node); - ParseTag(doc, node, IgnoreWhitespace); - continue; - } - - /* discard unexpected text nodes and end tags */ - TY_(Report)(doc, table, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - } - - TY_(Report)(doc, table, node, MISSING_ENDTAG_FOR); - lexer->istackbase = istackbase; -#if defined(ENABLE_DEBUG_LOG) - in_parse_table--; - SPRTF("Exit ParseTableTag 4 %d... missing end\n",in_parse_table); -#endif - return NULL; -} - - -/** MARK: TY_(oldParsePre) - * Parses the `pre` tag. - */ -void* TY_(oldParsePre)( TidyDocImpl* doc, Node *pre, GetTokenMode ARG_UNUSED(mode) ) -{ - Node *node; - - if (pre->tag->model & CM_EMPTY) - return NULL; - - TY_(InlineDup)( doc, NULL ); /* tell lexer to insert inlines if needed */ - - while ((node = TY_(GetToken)(doc, Preformatted)) != NULL) - { - if ( node->type == EndTag && - (node->tag == pre->tag || DescendantOf(pre, TagId(node))) ) - { - if (nodeIsBODY(node) || nodeIsHTML(node)) - { - TY_(Report)(doc, pre, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)(doc, node); - continue; - } - if (node->tag == pre->tag) - { - TY_(FreeNode)(doc, node); - } - else - { - TY_(Report)(doc, pre, node, MISSING_ENDTAG_BEFORE ); - TY_(UngetToken)( doc ); - } - pre->closed = yes; - TrimSpaces(doc, pre); - return NULL; - } - - if (TY_(nodeIsText)(node)) - { - TY_(InsertNodeAtEnd)(pre, node); - continue; - } - - /* deal with comments etc. */ - if (InsertMisc(pre, node)) - continue; - - if (node->tag == NULL) - { - TY_(Report)(doc, pre, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)(doc, node); - continue; - } - - /* strip unexpected tags */ - if ( !PreContent(doc, node) ) - { - Node *newnode; - - /* fix for http://tidy.sf.net/bug/772205 */ - if (node->type == EndTag) - { - /* http://tidy.sf.net/issue/1590220 */ - if ( doc->lexer->exiled - && (TY_(nodeHasCM)(node, CM_TABLE) || nodeIsTABLE(node)) ) - { - TY_(UngetToken)(doc); - TrimSpaces(doc, pre); - return NULL; - } - - TY_(Report)(doc, pre, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)(doc, node); - continue; - } - /* http://tidy.sf.net/issue/1590220 */ - else if (TY_(nodeHasCM)(node, CM_TABLE|CM_ROW) - || nodeIsTABLE(node) ) - { - if (!doc->lexer->exiled) - /* No missing close warning if exiled. */ - TY_(Report)(doc, pre, node, MISSING_ENDTAG_BEFORE); - - TY_(UngetToken)(doc); - return NULL; - } - - /* - This is basically what Tidy 04 August 2000 did and far more accurate - with respect to browser behaivour than the code commented out above. - Tidy could try to propagate the
 into each disallowed child where
-              
 is allowed in order to replicate some browsers behaivour, but
-              there are a lot of exceptions, e.g. Internet Explorer does not propagate
-              
 into table cells while Mozilla does. Opera 6 never propagates
-              
 into blocklevel elements while Opera 7 behaves much like Mozilla.
-
-              Tidy behaves thus mostly like Opera 6 except for nested 
 elements
-              which are handled like Mozilla takes them (Opera6 closes all 
 after
-              the first 
). - - There are similar issues like replacing

in

 with 
, for - example - -
...

...

(Input) -
...
...
(Tidy) -
...
...
(Opera 7 and Internet Explorer) -
...

...
(Opera 6 and Mozilla) - -
...

...

...
(Input) -
...
......
(Tidy, BUG!) -
...
...
...
(Internet Explorer) -
...

...

...
(Mozilla, Opera 6) -
...
...

...
(Opera 7) - - or something similar, they could also be closing the
 and propagate
-              the 
 into the newly opened 

. - - Todo: IMG, OBJECT, APPLET, BIG, SMALL, SUB, SUP, FONT, and BASEFONT are - disallowed in

, Tidy neither detects this nor does it perform any
-              cleanup operation. Tidy should at least issue a warning if it encounters
-              such constructs.
-
-              Todo: discarding 

is abviously a bug, it should be replaced by
. - */ - TY_(InsertNodeAfterElement)(pre, node); - TY_(Report)(doc, pre, node, MISSING_ENDTAG_BEFORE); - ParseTag(doc, node, IgnoreWhitespace); - - newnode = TY_(InferredTag)(doc, TidyTag_PRE); - TY_(Report)(doc, pre, newnode, INSERTING_TAG); - pre = newnode; - TY_(InsertNodeAfterElement)(node, pre); - - continue; - } - - if ( nodeIsP(node) ) - { - if (node->type == StartTag) - { - TY_(Report)(doc, pre, node, USING_BR_INPLACE_OF); - - /* trim white space before

in

*/
-                TrimSpaces(doc, pre);
-
-                /* coerce both 

and

to
*/ - TY_(CoerceNode)(doc, node, TidyTag_BR, no, no); - TY_(FreeAttrs)( doc, node ); /* discard align attribute etc. */ - TY_(InsertNodeAtEnd)( pre, node ); - } - else - { - TY_(Report)(doc, pre, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - } - continue; - } - - if ( TY_(nodeIsElement)(node) ) - { - /* trim white space before
*/ - if ( nodeIsBR(node) ) - TrimSpaces(doc, pre); - - TY_(InsertNodeAtEnd)(pre, node); - ParseTag(doc, node, Preformatted); - continue; - } - - /* discard unexpected tags */ - TY_(Report)(doc, pre, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - } - - TY_(Report)(doc, pre, node, MISSING_ENDTAG_FOR); - return NULL; -} - - -/** MARK: TY_(oldParseOptGroup) - * Parses the `optgroup` tag. - */ -void* TY_(oldParseOptGroup)(TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(mode)) -{ - Lexer* lexer = doc->lexer; - Node *node; - - lexer->insert = NULL; /* defer implicit inline start tags */ - - while ((node = TY_(GetToken)(doc, IgnoreWhitespace)) != NULL) - { - if (node->tag == field->tag && node->type == EndTag) - { - TY_(FreeNode)( doc, node); - field->closed = yes; - TrimSpaces(doc, field); - return NULL; - } - - /* deal with comments etc. */ - if (InsertMisc(field, node)) - continue; - - if ( node->type == StartTag && - (nodeIsOPTION(node) || nodeIsOPTGROUP(node)) ) - { - if ( nodeIsOPTGROUP(node) ) - TY_(Report)(doc, field, node, CANT_BE_NESTED); - - TY_(InsertNodeAtEnd)(field, node); - ParseTag(doc, node, MixedContent); - continue; - } - - /* discard unexpected tags */ - TY_(Report)(doc, field, node, DISCARDING_UNEXPECTED ); - TY_(FreeNode)( doc, node); - } - return NULL; -} - - -/** MARK: TY_(oldParseSelect) - * Parses the `select` tag. - */ -void* TY_(oldParseSelect)(TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(mode)) -{ -#if defined(ENABLE_DEBUG_LOG) - static int in_parse_select = 0; -#endif - Lexer* lexer = doc->lexer; - Node *node; - - lexer->insert = NULL; /* defer implicit inline start tags */ -#if defined(ENABLE_DEBUG_LOG) - in_parse_select++; - SPRTF("Entering ParseSelect %d...\n",in_parse_select); -#endif - - while ((node = TY_(GetToken)(doc, IgnoreWhitespace)) != NULL) - { - if (node->tag == field->tag && node->type == EndTag) - { - TY_(FreeNode)( doc, node); - field->closed = yes; - TrimSpaces(doc, field); -#if defined(ENABLE_DEBUG_LOG) - in_parse_select--; - SPRTF("Exit ParseSelect 1 %d...\n",in_parse_select); -#endif - return NULL; - } - - /* deal with comments etc. */ - if (InsertMisc(field, node)) - continue; - - if ( node->type == StartTag && - ( nodeIsOPTION(node) || - nodeIsOPTGROUP(node) || - nodeIsDATALIST(node) || - nodeIsSCRIPT(node)) - ) - { - TY_(InsertNodeAtEnd)(field, node); - ParseTag(doc, node, IgnoreWhitespace); - continue; - } - - /* discard unexpected tags */ - TY_(Report)(doc, field, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - } - - TY_(Report)(doc, field, node, MISSING_ENDTAG_FOR); -#if defined(ENABLE_DEBUG_LOG) - in_parse_select--; - SPRTF("Exit ParseSelect 2 %d...\n",in_parse_select); -#endif - return NULL; -} - - -/** MARK: TY_(oldParseDataList) - * Parses the `datalist` tag. - */ -void* TY_(oldParseDatalist)(TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(mode)) -{ -#if defined(ENABLE_DEBUG_LOG) - static int in_parse_datalist = 0; -#endif - Lexer* lexer = doc->lexer; - Node *node; - - lexer->insert = NULL; /* defer implicit inline start tags */ -#if defined(ENABLE_DEBUG_LOG) - in_parse_datalist++; - SPRTF("Entering ParseDatalist %d...\n",in_parse_datalist); -#endif - - while ((node = TY_(GetToken)(doc, IgnoreWhitespace)) != NULL) - { - if (node->tag == field->tag && node->type == EndTag) - { - TY_(FreeNode)( doc, node); - field->closed = yes; - TrimSpaces(doc, field); -#if defined(ENABLE_DEBUG_LOG) - in_parse_datalist--; - SPRTF("Exit ParseDatalist 1 %d...\n",in_parse_datalist); -#endif - return NULL; - } - - /* deal with comments etc. */ - if (InsertMisc(field, node)) - continue; - - if ( node->type == StartTag && - ( nodeIsOPTION(node) || - nodeIsOPTGROUP(node) || - nodeIsDATALIST(node) || - nodeIsSCRIPT(node)) - ) - { - TY_(InsertNodeAtEnd)(field, node); - ParseTag(doc, node, IgnoreWhitespace); - continue; - } - - /* discard unexpected tags */ - TY_(Report)(doc, field, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - } - - TY_(Report)(doc, field, node, MISSING_ENDTAG_FOR); -#if defined(ENABLE_DEBUG_LOG) - in_parse_datalist--; - SPRTF("Exit ParseDatalist 2 %d...\n",in_parse_datalist); -#endif - return NULL; -} - - -/** MARK: TY_(oldParseText) - * Parses the `option` and `textarea` tags. - */ -void* TY_(oldParseText)(TidyDocImpl* doc, Node *field, GetTokenMode mode) -{ - Lexer* lexer = doc->lexer; - Node *node; - - lexer->insert = NULL; /* defer implicit inline start tags */ - - if ( nodeIsTEXTAREA(field) ) - mode = Preformatted; - else - mode = MixedContent; /* kludge for font tags */ - - while ((node = TY_(GetToken)(doc, mode)) != NULL) - { - if (node->tag == field->tag && node->type == EndTag) - { - TY_(FreeNode)( doc, node); - field->closed = yes; - TrimSpaces(doc, field); - return NULL; - } - - /* deal with comments etc. */ - if (InsertMisc(field, node)) - continue; - - if (TY_(nodeIsText)(node)) - { - /* only called for 1st child */ - if (field->content == NULL && !(mode & Preformatted)) - TrimSpaces(doc, field); - - if (node->start >= node->end) - { - TY_(FreeNode)( doc, node); - continue; - } - - TY_(InsertNodeAtEnd)(field, node); - continue; - } - - /* for textarea should all cases of < and & be escaped? */ - - /* discard inline tags e.g. font */ - if ( node->tag - && node->tag->model & CM_INLINE - && !(node->tag->model & CM_FIELD)) /* #487283 - fix by Lee Passey 25 Jan 02 */ - { - TY_(Report)(doc, field, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - - /* terminate element on other tags */ - if (!(field->tag->model & CM_OPT)) - TY_(Report)(doc, field, node, MISSING_ENDTAG_BEFORE); - - TY_(UngetToken)( doc ); - TrimSpaces(doc, field); - return NULL; - } - - if (!(field->tag->model & CM_OPT)) - TY_(Report)(doc, field, node, MISSING_ENDTAG_FOR); - return NULL; -} - - -/** MARK: TY_(oldParseTitle) - * Parses the `title` tag. - */ -void* TY_(oldParseTitle)(TidyDocImpl* doc, Node *title, GetTokenMode ARG_UNUSED(mode)) -{ - Node *node; - while ((node = TY_(GetToken)(doc, MixedContent)) != NULL) - { - if (node->tag == title->tag && node->type == StartTag - && cfgBool(doc, TidyCoerceEndTags) ) - { - TY_(Report)(doc, title, node, COERCE_TO_ENDTAG); - node->type = EndTag; - TY_(UngetToken)( doc ); - continue; - } - else if (node->tag == title->tag && node->type == EndTag) - { - TY_(FreeNode)( doc, node); - title->closed = yes; - TrimSpaces(doc, title); - return NULL; - } - - if (TY_(nodeIsText)(node)) - { - /* only called for 1st child */ - if (title->content == NULL) - TrimInitialSpace(doc, title, node); - - if (node->start >= node->end) - { - TY_(FreeNode)( doc, node); - continue; - } - - TY_(InsertNodeAtEnd)(title, node); - continue; - } - - /* deal with comments etc. */ - if (InsertMisc(title, node)) - continue; - - /* discard unknown tags */ - if (node->tag == NULL) - { - TY_(Report)(doc, title, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - - /* pushback unexpected tokens */ - TY_(Report)(doc, title, node, MISSING_ENDTAG_BEFORE); - TY_(UngetToken)( doc ); - TrimSpaces(doc, title); - return NULL; - } - - TY_(Report)(doc, title, node, MISSING_ENDTAG_FOR); - return NULL; -} - - -/** MARK: TY_(oldParseScript) - * Parses the `script` tag. - * - * @todo This isn't quite right for CDATA content as it recognises tags - * within the content and parses them accordingly. This will unfortunately - * screw up scripts which include: - * < + letter - * < + ! - * < + ? - * < + / + letter - */ -void* TY_(oldParseScript)(TidyDocImpl* doc, Node *script, GetTokenMode ARG_UNUSED(mode)) -{ - Node *node; - - doc->lexer->parent = script; - node = TY_(GetToken)(doc, CdataContent); - doc->lexer->parent = NULL; - - if (node) - { - TY_(InsertNodeAtEnd)(script, node); - } - else - { - /* handle e.g. a document like "