From 91f29ea7b88a0f3a810d011f958ea9dd935bd65b Mon Sep 17 00:00:00 2001 From: Jim Derry Date: Thu, 5 Aug 2021 08:18:30 -0400 Subject: [PATCH 1/2] HTML Tidy now parses HTML non-recursively. Instead of recursive calls for each nested level of HTML, the next level is pushed to a stack on the heap, and returned to the main loop. This prevents stack overflow at _n_ depth (where _n_ is operating-system dependent). It's probably still possible to use all of the heap memory, but Tidy's allocators already fail gracefully in this circumstance. Please report any regressions of your own HTML! NOTE: the XML parser is not affected, and is probably still highly recursive. --- .../cases/dev-cases/case-001.conf | 4 + .../cases/dev-cases/case-001@0.html | 26 + .../cases/dev-cases/case-002.conf | 4 + .../cases/dev-cases/case-002@1.html | 33 + .../cases/dev-cases/case-003.conf | 4 + .../cases/dev-cases/case-003@1.html | 27 + .../cases/dev-cases/case-004.conf | 4 + .../cases/dev-cases/case-004@1.html | 41 + .../cases/dev-expects/case-001.html | 41 + .../cases/dev-expects/case-001.txt | 14 + .../cases/dev-expects/case-002.html | 39 + .../cases/dev-expects/case-002.txt | 16 + .../cases/dev-expects/case-003.html | 30 + .../cases/dev-expects/case-003.txt | 26 + .../cases/dev-expects/case-004.html | 61 + .../cases/dev-expects/case-004.txt | 14 + .../cases/special-cases/README.txt | 15 + .../cases/special-cases/case-evil.conf | 4 + .../cases/special-cases/case-evil@1.html | 6 + src/parser.c | 7482 ++++++++--------- src/parser.h | 33 +- src/tags.h | 2 +- 22 files changed, 3890 insertions(+), 4036 deletions(-) create mode 100755 regression_testing/cases/dev-cases/case-001.conf create mode 100755 regression_testing/cases/dev-cases/case-001@0.html create mode 100755 regression_testing/cases/dev-cases/case-002.conf create mode 100755 regression_testing/cases/dev-cases/case-002@1.html create mode 100755 regression_testing/cases/dev-cases/case-003.conf create mode 100644 regression_testing/cases/dev-cases/case-003@1.html create mode 100755 regression_testing/cases/dev-cases/case-004.conf create mode 100644 regression_testing/cases/dev-cases/case-004@1.html create mode 100644 regression_testing/cases/dev-expects/case-001.html create mode 100644 regression_testing/cases/dev-expects/case-001.txt create mode 100644 regression_testing/cases/dev-expects/case-002.html create mode 100644 regression_testing/cases/dev-expects/case-002.txt create mode 100644 regression_testing/cases/dev-expects/case-003.html create mode 100644 regression_testing/cases/dev-expects/case-003.txt create mode 100644 regression_testing/cases/dev-expects/case-004.html create mode 100644 regression_testing/cases/dev-expects/case-004.txt create mode 100644 regression_testing/cases/special-cases/README.txt create mode 100755 regression_testing/cases/special-cases/case-evil.conf create mode 100644 regression_testing/cases/special-cases/case-evil@1.html diff --git a/regression_testing/cases/dev-cases/case-001.conf b/regression_testing/cases/dev-cases/case-001.conf new file mode 100755 index 0000000..85723a4 --- /dev/null +++ b/regression_testing/cases/dev-cases/case-001.conf @@ -0,0 +1,4 @@ +# Config for test case. +tidy-mark: no +indent: yes +wrap: 999 diff --git a/regression_testing/cases/dev-cases/case-001@0.html b/regression_testing/cases/dev-cases/case-001@0.html new file mode 100755 index 0000000..dd9ab09 --- /dev/null +++ b/regression_testing/cases/dev-cases/case-001@0.html @@ -0,0 +1,26 @@ + + + + + This is a title + + + +
+

This is the first paragraph.

+

Now now, second paragraph?

+
+

I'm nested in a div.

+
    +
  • List item one. +
  • List item two. There isn't a third. Hahaha.
  • +
+

Because, you know, lists should have a minimum of three items.

+
+

Penultimate paragraphs are sometimes the best.

+
+

Don't Cray; Buy Amiga!

+ + diff --git a/regression_testing/cases/dev-cases/case-002.conf b/regression_testing/cases/dev-cases/case-002.conf new file mode 100755 index 0000000..85723a4 --- /dev/null +++ b/regression_testing/cases/dev-cases/case-002.conf @@ -0,0 +1,4 @@ +# Config for test case. +tidy-mark: no +indent: yes +wrap: 999 diff --git a/regression_testing/cases/dev-cases/case-002@1.html b/regression_testing/cases/dev-cases/case-002@1.html new file mode 100755 index 0000000..180f995 --- /dev/null +++ b/regression_testing/cases/dev-cases/case-002@1.html @@ -0,0 +1,33 @@ + + + + + This is a title + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/regression_testing/cases/dev-expects/case-001.html b/regression_testing/cases/dev-expects/case-001.html new file mode 100644 index 0000000..472b95b --- /dev/null +++ b/regression_testing/cases/dev-expects/case-001.html @@ -0,0 +1,41 @@ + + + + + + This is a title + + + +
+

+ This is the first paragraph. +

+

+ Now now, second paragraph? +

+
+

+ I'm nested in a div. +

+
    +
  • List item one. +
  • +
  • List item two. There isn't a third. Hahaha. +
  • +
+

+ Because, you know, lists should have a minimum of three items. +

+
+

+ Penultimate paragraphs are sometimes the best. +

+
+

+ Don't Cray; Buy Amiga! +

+ + diff --git a/regression_testing/cases/dev-expects/case-001.txt b/regression_testing/cases/dev-expects/case-001.txt new file mode 100644 index 0000000..c32fbc8 --- /dev/null +++ b/regression_testing/cases/dev-expects/case-001.txt @@ -0,0 +1,14 @@ +line 17 column 13 - Info: missing optional end tag +Info: Document content looks like HTML5 +No warnings or errors were found. + +About HTML Tidy: https://github.com/htacg/tidy-html5 +Bug reports and comments: https://github.com/htacg/tidy-html5/issues +Official mailing list: https://lists.w3.org/Archives/Public/public-htacg/ +Latest HTML specification: https://html.spec.whatwg.org/multipage/ +Validate your HTML documents: https://validator.w3.org/nu/ +Lobby your company to join the W3C: https://www.w3.org/Consortium + +Do you speak a language other than English, or a different variant of +English? Consider helping us to localize HTML Tidy. For details please see +https://github.com/htacg/tidy-html5/blob/master/README/LOCALIZE.md diff --git a/regression_testing/cases/dev-expects/case-002.html b/regression_testing/cases/dev-expects/case-002.html new file mode 100644 index 0000000..2e1f028 --- /dev/null +++ b/regression_testing/cases/dev-expects/case-002.html @@ -0,0 +1,39 @@ + + + + + + This is a title + + + + + + + + + + + + + + + + + + + diff --git a/regression_testing/cases/dev-expects/case-002.txt b/regression_testing/cases/dev-expects/case-002.txt new file mode 100644 index 0000000..e79f1ec --- /dev/null +++ b/regression_testing/cases/dev-expects/case-002.txt @@ -0,0 +1,16 @@ +line 32 column 1 - Warning: discarding unexpected +line 33 column 1 - Warning: discarding unexpected +line 25 column 5 - Warning: missing
+Info: Document content looks like HTML5 +Tidy found 3 warnings and 0 errors! + +About HTML Tidy: https://github.com/htacg/tidy-html5 +Bug reports and comments: https://github.com/htacg/tidy-html5/issues +Official mailing list: https://lists.w3.org/Archives/Public/public-htacg/ +Latest HTML specification: https://html.spec.whatwg.org/multipage/ +Validate your HTML documents: https://validator.w3.org/nu/ +Lobby your company to join the W3C: https://www.w3.org/Consortium + +Do you speak a language other than English, or a different variant of +English? Consider helping us to localize HTML Tidy. For details please see +https://github.com/htacg/tidy-html5/blob/master/README/LOCALIZE.md diff --git a/regression_testing/cases/dev-expects/case-003.html b/regression_testing/cases/dev-expects/case-003.html new file mode 100644 index 0000000..af8bcb5 --- /dev/null +++ b/regression_testing/cases/dev-expects/case-003.html @@ -0,0 +1,30 @@ + + + + + + case-003 + + + +
+
+
+ + + + +
+ What is up? +
+
+
+
+
+
+ Hello +
+ + diff --git a/regression_testing/cases/dev-expects/case-003.txt b/regression_testing/cases/dev-expects/case-003.txt new file mode 100644 index 0000000..f0a93ab --- /dev/null +++ b/regression_testing/cases/dev-expects/case-003.txt @@ -0,0 +1,26 @@ +line 14 column 7 - Warning:
isn't allowed in elements +line 13 column 5 - Info: previously mentioned +line 14 column 7 - Warning: missing
before +line 10 column 3 - Info: missing optional end tag +line 12 column 5 - Warning: The summary attribute on the element is obsolete in HTML5 +line 14 column 7 - Warning: trimming empty
+line 21 column 3 - Warning:
element removed from HTML5 +line 12 column 5 - Warning:
attribute "summary" not allowed for HTML5 +Info: Document content looks like HTML5 +Tidy found 6 warnings and 0 errors! + +One or more empty elements were present in the source document but +dropped on output. If these elements are necessary or you don't want +this behavior, then consider setting the option "drop-empty-elements" +to no. + +About HTML Tidy: https://github.com/htacg/tidy-html5 +Bug reports and comments: https://github.com/htacg/tidy-html5/issues +Official mailing list: https://lists.w3.org/Archives/Public/public-htacg/ +Latest HTML specification: https://html.spec.whatwg.org/multipage/ +Validate your HTML documents: https://validator.w3.org/nu/ +Lobby your company to join the W3C: https://www.w3.org/Consortium + +Do you speak a language other than English, or a different variant of +English? Consider helping us to localize HTML Tidy. For details please see +https://github.com/htacg/tidy-html5/blob/master/README/LOCALIZE.md diff --git a/regression_testing/cases/dev-expects/case-004.html b/regression_testing/cases/dev-expects/case-004.html new file mode 100644 index 0000000..34d97df --- /dev/null +++ b/regression_testing/cases/dev-expects/case-004.html @@ -0,0 +1,61 @@ + + + + + + case-004 + + + + + + + + + + + + + + + + + + diff --git a/regression_testing/cases/dev-expects/case-004.txt b/regression_testing/cases/dev-expects/case-004.txt new file mode 100644 index 0000000..968d986 --- /dev/null +++ b/regression_testing/cases/dev-expects/case-004.txt @@ -0,0 +1,14 @@ +line 30 column 5 - Warning: can't be nested +Info: Document content looks like HTML5 +Tidy found 1 warning and 0 errors! + +About HTML Tidy: https://github.com/htacg/tidy-html5 +Bug reports and comments: https://github.com/htacg/tidy-html5/issues +Official mailing list: https://lists.w3.org/Archives/Public/public-htacg/ +Latest HTML specification: https://html.spec.whatwg.org/multipage/ +Validate your HTML documents: https://validator.w3.org/nu/ +Lobby your company to join the W3C: https://www.w3.org/Consortium + +Do you speak a language other than English, or a different variant of +English? Consider helping us to localize HTML Tidy. For details please see +https://github.com/htacg/tidy-html5/blob/master/README/LOCALIZE.md diff --git a/regression_testing/cases/special-cases/README.txt b/regression_testing/cases/special-cases/README.txt new file mode 100644 index 0000000..bc7ceb9 --- /dev/null +++ b/regression_testing/cases/special-cases/README.txt @@ -0,0 +1,15 @@ +About this test suite: +====================== + +These test files represent that standard regression testing that must be +performed prior to committing changes to Tidy's code. In some circumstances +results are platform specific and these notices will be displayed in the +testing results. + +This test set is intended to collect tests that we **don't** want to run +automatically, either because they take a long time to complete, or their +expectations files are much too large for keeping the repository a reasonable +size. + +Because there's no matching -expects directory, these cases will NOT be included +in the default testing service. diff --git a/regression_testing/cases/special-cases/case-evil.conf b/regression_testing/cases/special-cases/case-evil.conf new file mode 100755 index 0000000..85723a4 --- /dev/null +++ b/regression_testing/cases/special-cases/case-evil.conf @@ -0,0 +1,4 @@ +# Config for test case. +tidy-mark: no +indent: yes +wrap: 999 diff --git a/regression_testing/cases/special-cases/case-evil@1.html b/regression_testing/cases/special-cases/case-evil@1.html new file mode 100644 index 0000000..c7540b8 --- /dev/null +++ b/regression_testing/cases/special-cases/case-evil@1.html @@ -0,0 +1,6 @@ + + + diff --git a/src/parser.c b/src/parser.c index 69befb9..0662779 100644 --- a/src/parser.c +++ b/src/parser.c @@ -15,14 +15,6 @@ #include "sprtf.h" -/****************************************************************************//* - ** MARK: - Forward Declarations - ***************************************************************************/ - - -static void ParseTag( TidyDocImpl* doc, Node *node, GetTokenMode mode ); - - /****************************************************************************//* ** MARK: - Configuration Options ***************************************************************************/ @@ -42,33 +34,43 @@ static void ParseTag( TidyDocImpl* doc, Node *node, GetTokenMode mode ); /** - * Insert "node" into markup tree in place of "element" - * which is moved to become the child of the node + * Generalised search for duplicate elements. + * Issue #166 - repeated
element. */ -static void InsertNodeAsParent(Node *element, Node *node) +static Bool findNodeWithId( Node *node, TidyTagId tid ) { - node->content = element; - node->last = element; - node->parent = element->parent; - element->parent = node; + Node *content; + while (node) + { + if (TagIsId(node,tid)) + return yes; + /*\ + * Issue #459 - Under certain circumstances, with many node this use of + * 'for (content = node->content; content; content = content->content)' + * would produce a **forever** circle, or at least a very extended loop... + * It is sufficient to test the content, if it exists, + * to quickly iterate all nodes. Now all nodes are tested only once. + \*/ + content = node->content; + if (content) + { + if ( findNodeWithId(content,tid) ) + return yes; + } + node = node->next; + } + return no; +} - if (node->parent->content == element) - node->parent->content = node; - if (node->parent->last == element) - node->parent->last = node; - - node->prev = element->prev; - element->prev = NULL; - - if (node->prev) - node->prev->next = node; - - node->next = element->next; - element->next = NULL; - - if (node->next) - node->next->prev = node; +/** + * Perform a global search for an element. + * Issue #166 - repeated
element + */ +static Bool findNodeById( TidyDocImpl* doc, TidyTagId tid ) +{ + Node *node = (doc ? doc->root.content : NULL); + return findNodeWithId( node,tid ); } @@ -120,46 +122,33 @@ static Bool InsertMisc(Node *element, Node *node) /** - * Move node to the head, where element is used as starting - * point in hunt for head. normally called during parsing. + * Insert "node" into markup tree in place of "element" + * which is moved to become the child of the node */ -static void MoveToHead( TidyDocImpl* doc, Node *element, Node *node ) +static void InsertNodeAsParent(Node *element, Node *node) { - Node *head; + node->content = element; + node->last = element; + node->parent = element->parent; + element->parent = node; - TY_(RemoveNode)( node ); /* make sure that node is isolated */ + if (node->parent->content == element) + node->parent->content = node; - if ( TY_(nodeIsElement)(node) ) - { - TY_(Report)(doc, element, node, TAG_NOT_ALLOWED_IN ); + if (node->parent->last == element) + node->parent->last = node; - head = TY_(FindHEAD)(doc); - assert(head != NULL); + node->prev = element->prev; + element->prev = NULL; - TY_(InsertNodeAtEnd)(head, node); + if (node->prev) + node->prev->next = node; - if ( node->tag->parser ) - ParseTag( doc, node, IgnoreWhitespace ); - } - else - { - TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node ); - } -} + node->next = element->next; + element->next = NULL; - -/** - * Moves given node to end of body element. - */ -static void MoveNodeToBody( TidyDocImpl* doc, Node* node ) -{ - Node* body = TY_(FindBody)( doc ); - if ( body ) - { - TY_(RemoveNode)( node ); - TY_(InsertNodeAtEnd)( body, node ); - } + if (node->next) + node->next->prev = node; } @@ -188,43 +177,55 @@ static void MoveBeforeTable( TidyDocImpl* ARG_UNUSED(doc), Node *row, /** - * Generalised search for duplicate elements. - * Issue #166 - repeated
element. + * Moves given node to end of body element. */ -static Bool findNodeWithId( Node *node, TidyTagId tid ) +static void MoveNodeToBody( TidyDocImpl* doc, Node* node ) { - Node *content; - while (node) + Node* body = TY_(FindBody)( doc ); + if ( body ) { - if (TagIsId(node,tid)) - return yes; - /*\ - * Issue #459 - Under certain circumstances, with many node this use of - * 'for (content = node->content; content; content = content->content)' - * would produce a **forever** circle, or at least a very extended loop... - * It is sufficient to test the content, if it exists, - * to quickly iterate all nodes. Now all nodes are tested only once. - \*/ - content = node->content; - if (content) - { - if ( findNodeWithId(content,tid) ) - return yes; - } - node = node->next; + TY_(RemoveNode)( node ); + TY_(InsertNodeAtEnd)( body, node ); } - return no; } /** - * Perform a global search for an element. - * Issue #166 - repeated
element + * Move node to the head, where element is used as starting + * point in hunt for head. Normally called during parsing. */ -static Bool findNodeById( TidyDocImpl* doc, TidyTagId tid ) +static void MoveToHead( TidyDocImpl* doc, Node *element, Node *node ) { - Node *node = (doc ? doc->root.content : NULL); - return findNodeWithId( node,tid ); + Node *head = NULL; + + TY_(RemoveNode)( node ); /* make sure that node is isolated */ + + if ( TY_(nodeIsElement)(node) ) + { + TY_(Report)(doc, element, node, TAG_NOT_ALLOWED_IN ); + + head = TY_(FindHEAD)(doc); + assert(head != NULL); + + TY_(InsertNodeAtEnd)(head, node); + + if ( node->tag->parser ) + { + /* Only one of the existing test cases as of 2021-08-14 invoke + MoveToHead, and it doesn't go deeper than one level. The + parser() call is supposed to return a node if additional + parsing is needed. Keep this in mind if we start to get bug + reports. + */ + Parser* parser = node->tag->parser; + parser( doc, node, IgnoreWhitespace ); + } + } + else + { + TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED); + TY_(FreeNode)( doc, node ); + } } @@ -359,6 +360,73 @@ static Bool IsPreDescendant(Node* node) } +/** + * Indicates whether or not the only content model for the given node + * is CM_INLINE. + */ +static Bool nodeCMIsOnlyInline( Node* node ) +{ + return TY_(nodeHasCM)( node, CM_INLINE ) && !TY_(nodeHasCM)( node, CM_BLOCK ); +} + + +/** + * Indicates whether or not the content of the given node is acceptable + * content for pre elements + */ +static Bool PreContent( TidyDocImpl* ARG_UNUSED(doc), Node* node ) +{ + /* p is coerced to br's, Text OK too */ + if ( nodeIsP(node) || TY_(nodeIsText)(node) ) + return yes; + + if ( node->tag == NULL || + nodeIsPARAM(node) || + !TY_(nodeHasCM)(node, CM_INLINE|CM_NEW) ) + return no; + + return yes; +} + + +/** + * Indicates whether or not leading whitespace should be cleaned. + */ +static Bool CleanLeadingWhitespace(TidyDocImpl* ARG_UNUSED(doc), Node* node) +{ + if (!TY_(nodeIsText)(node)) + return no; + + if (node->parent->type == DocTypeTag) + return no; + + if (IsPreDescendant(node)) + return no; + + if (node->parent->tag && node->parent->tag->parser == TY_(ParseScript)) + return no; + + /*

...
......

*/ + if (nodeIsBR(node->prev)) + return yes; + + /*

...

*/ + if (node->prev == NULL && !TY_(nodeHasCM)(node->parent, CM_INLINE)) + return yes; + + /*

...

... */ + if (node->prev && !TY_(nodeHasCM)(node->prev, CM_INLINE) && + TY_(nodeIsElement)(node->prev)) + return yes; + + /*

...

*/ + if (!node->prev && !node->parent->prev && !TY_(nodeHasCM)(node->parent->parent, CM_INLINE)) + return yes; + + return no; +} + + /** * Indicates whether or not trailing whitespace should be cleaned. */ @@ -414,73 +482,6 @@ static Bool CleanTrailingWhitespace(TidyDocImpl* doc, Node* node) } -/** - * Indicates whether or not leading whitespace should be cleaned. - */ -static Bool CleanLeadingWhitespace(TidyDocImpl* ARG_UNUSED(doc), Node* node) -{ - if (!TY_(nodeIsText)(node)) - return no; - - if (node->parent->type == DocTypeTag) - return no; - - if (IsPreDescendant(node)) - return no; - - if (node->parent->tag && node->parent->tag->parser == TY_(ParseScript)) - return no; - - /*

...
......

*/ - if (nodeIsBR(node->prev)) - return yes; - - /*

...

*/ - if (node->prev == NULL && !TY_(nodeHasCM)(node->parent, CM_INLINE)) - return yes; - - /*

...

... */ - if (node->prev && !TY_(nodeHasCM)(node->prev, CM_INLINE) && - TY_(nodeIsElement)(node->prev)) - return yes; - - /*

...

*/ - if (!node->prev && !node->parent->prev && !TY_(nodeHasCM)(node->parent->parent, CM_INLINE)) - return yes; - - return no; -} - - -/** - * Indicates whether or not the content of the given node is acceptable - * content for pre elements - */ -static Bool PreContent( TidyDocImpl* ARG_UNUSED(doc), Node* node ) -{ - /* p is coerced to br's, Text OK too */ - if ( nodeIsP(node) || TY_(nodeIsText)(node) ) - return yes; - - if ( node->tag == NULL || - nodeIsPARAM(node) || - !TY_(nodeHasCM)(node, CM_INLINE|CM_NEW) ) - return no; - - return yes; -} - - -/** - * Indicates whether or not the only content model for the given node - * is CM_INLINE. - */ -static Bool nodeCMIsOnlyInline( Node* node ) -{ - return TY_(nodeHasCM)( node, CM_INLINE ) && !TY_(nodeHasCM)( node, CM_BLOCK ); -} - - /***************************************************************************//* ** MARK: - Information Accumulation ***************************************************************************/ @@ -504,84 +505,20 @@ static void BadForm( TidyDocImpl* doc ) /** - * This maps - * hello world - * to - * hello world - * - * If last child of element is a text node - * then trim trailing white space character - * moving it to after element's end tag. + * Adds style information as a class in the document or a property + * of the node to prevent indentation of inferred UL tags. */ -static void TrimTrailingSpace( TidyDocImpl* doc, Node *element, Node *last ) +static void AddClassNoIndent( TidyDocImpl* doc, Node *node ) { - Lexer* lexer = doc->lexer; - byte c; - - if (TY_(nodeIsText)(last)) - { - if (last->end > last->start) - { - c = (byte) lexer->lexbuf[ last->end - 1 ]; - - if ( c == ' ' ) - { - last->end -= 1; - if ( (element->tag->model & CM_INLINE) && - !(element->tag->model & CM_FIELD) ) - lexer->insertspace = yes; - } - } - } -} - - -/** - * This maps - *

hello world - * to - *

hello world - * - * Trims initial space, by moving it before the - * start tag, or if this element is the first in - * parent's content, then by discarding the space - */ -static void TrimInitialSpace( TidyDocImpl* doc, Node *element, Node *text ) -{ - Lexer* lexer = doc->lexer; - Node *prev, *node; - - if ( TY_(nodeIsText)(text) && - lexer->lexbuf[text->start] == ' ' && - text->start < text->end ) - { - if ( (element->tag->model & CM_INLINE) && - !(element->tag->model & CM_FIELD) ) - { - prev = element->prev; - - if (TY_(nodeIsText)(prev)) - { - if (prev->end == 0 || lexer->lexbuf[prev->end - 1] != ' ') - lexer->lexbuf[(prev->end)++] = ' '; - - ++(element->start); - } - else /* create new node */ - { - node = TY_(NewNode)(lexer->allocator, lexer); - node->start = (element->start)++; - node->end = element->start; - lexer->lexbuf[node->start] = ' '; - TY_(InsertNodeBeforeElement)(element ,node); - DEBUG_LOG(SPRTF("TrimInitialSpace: Created text node, inserted before <%s>\n", - (element->element ? element->element : "unknown"))); - } - } - - /* discard the space in current node */ - ++(text->start); - } + ctmbstr sprop = + "padding-left: 2ex; margin-left: 0ex" + "; margin-top: 0ex; margin-bottom: 0ex"; + if ( !cfgBool(doc, TidyDecorateInferredUL) ) + return; + if ( cfgBool(doc, TidyMakeClean) ) + TY_(AddStyleAsClass)( doc, node, sprop ); + else + TY_(AddStyleProperty)( doc, node, sprop ); } @@ -628,30 +565,20 @@ static void CleanSpaces(TidyDocImpl* doc, Node* node) /** - * Move initial and trailing space out. - * This routine maps: - * hello world - * to - * hello world - * and - * hello world - * to - * hello world + * If a table row is empty then insert an empty cell. This practice is + * consistent with browser behavior and avoids potential problems with + * row spanning cells. */ -static void TrimSpaces( TidyDocImpl* doc, Node *element) +static void FixEmptyRow(TidyDocImpl* doc, Node *row) { - Node* text = element->content; + Node *cell; - if (nodeIsPRE(element) || IsPreDescendant(element)) - return; - - if (TY_(nodeIsText)(text)) - TrimInitialSpace(doc, element, text); - - text = element->last; - - if (TY_(nodeIsText)(text)) - TrimTrailingSpace(doc, element, text); + if (row->content == NULL) + { + cell = TY_(InferredTag)(doc, TidyTag_TD); + TY_(InsertNodeAtEnd)(row, cell); + TY_(Report)(doc, row, cell, MISSING_STARTTAG); + } } @@ -677,43 +604,116 @@ static void InsertDocType( TidyDocImpl* doc, Node *element, Node *doctype ) } - /** - * Adds style information as a class in the document or a property - * of the node to prevent indentation of inferred UL tags. + * This maps + *

hello world + * to + *

hello world + * + * Trims initial space, by moving it before the + * start tag, or if this element is the first in + * parent's content, then by discarding the space */ -static void AddClassNoIndent( TidyDocImpl* doc, Node *node ) +static void TrimInitialSpace( TidyDocImpl* doc, Node *element, Node *text ) { - ctmbstr sprop = - "padding-left: 2ex; margin-left: 0ex" - "; margin-top: 0ex; margin-bottom: 0ex"; - if ( !cfgBool(doc, TidyDecorateInferredUL) ) - return; - if ( cfgBool(doc, TidyMakeClean) ) - TY_(AddStyleAsClass)( doc, node, sprop ); - else - TY_(AddStyleProperty)( doc, node, sprop ); + Lexer* lexer = doc->lexer; + Node *prev, *node; + + if ( TY_(nodeIsText)(text) && + lexer->lexbuf[text->start] == ' ' && + text->start < text->end ) + { + if ( (element->tag->model & CM_INLINE) && + !(element->tag->model & CM_FIELD) ) + { + prev = element->prev; + + if (TY_(nodeIsText)(prev)) + { + if (prev->end == 0 || lexer->lexbuf[prev->end - 1] != ' ') + lexer->lexbuf[(prev->end)++] = ' '; + + ++(element->start); + } + else /* create new node */ + { + node = TY_(NewNode)(lexer->allocator, lexer); + node->start = (element->start)++; + node->end = element->start; + lexer->lexbuf[node->start] = ' '; + TY_(InsertNodeBeforeElement)(element ,node); + DEBUG_LOG(SPRTF("TrimInitialSpace: Created text node, inserted before <%s>\n", + (element->element ? element->element : "unknown"))); + } + } + + /* discard the space in current node */ + ++(text->start); + } } /** - * If a table row is empty then insert an empty cell. This practice is - * consistent with browser behavior and avoids potential problems with - * row spanning cells. + * This maps + * hello world + * to + * hello world + * + * If last child of element is a text node + * then trim trailing white space character + * moving it to after element's end tag. */ -static void FixEmptyRow(TidyDocImpl* doc, Node *row) +static void TrimTrailingSpace( TidyDocImpl* doc, Node *element, Node *last ) { - Node *cell; + Lexer* lexer = doc->lexer; + byte c; - if (row->content == NULL) + if (TY_(nodeIsText)(last)) { - cell = TY_(InferredTag)(doc, TidyTag_TD); - TY_(InsertNodeAtEnd)(row, cell); - TY_(Report)(doc, row, cell, MISSING_STARTTAG); + if (last->end > last->start) + { + c = (byte) lexer->lexbuf[ last->end - 1 ]; + + if ( c == ' ' ) + { + last->end -= 1; + if ( (element->tag->model & CM_INLINE) && + !(element->tag->model & CM_FIELD) ) + lexer->insertspace = yes; + } + } } } +/** + * Move initial and trailing space out. + * This routine maps: + * hello world + * to + * hello world + * and + * hello world + * to + * hello world + */ +static void TrimSpaces( TidyDocImpl* doc, Node *element) +{ + Node* text = element->content; + + if (nodeIsPRE(element) || IsPreDescendant(element)) + return; + + if (TY_(nodeIsText)(text)) + TrimInitialSpace(doc, element, text); + + text = element->last; + + if (TY_(nodeIsText)(text)) + TrimTrailingSpace(doc, element, text); +} + + /***************************************************************************//* ** MARK: - Parsers Support ***************************************************************************/ @@ -817,13 +817,12 @@ static Bool FindLastLI( Node *list, Node **lastli ) */ void TY_(InitParserStack)( TidyDocImpl* doc ) { - uint default_size = 16; + enum { default_size = 32 }; TidyParserMemory *content = (TidyParserMemory *) TidyAlloc( doc->allocator, sizeof(TidyParserMemory) * default_size ); doc->stack.content = content; doc->stack.size = default_size; doc->stack.top = -1; - doc->stack.allocator = doc->allocator; } @@ -832,7 +831,7 @@ void TY_(InitParserStack)( TidyDocImpl* doc ) */ void TY_(FreeParserStack)( TidyDocImpl* doc ) { - TidyFree( doc->stack.allocator, doc->stack.content ); + TidyFree( doc->allocator, doc->stack.content ); doc->stack.content = NULL; doc->stack.size = 0; @@ -842,15 +841,14 @@ void TY_(FreeParserStack)( TidyDocImpl* doc ) /** * Increase the stack size. - * TODO: don't overflow max_uint. Need a message when we can no longer increase the size beyond 429 million depth. */ static void growParserStack( TidyDocImpl* doc ) { TidyParserMemory *content; - content = (TidyParserMemory *) TidyAlloc( doc->stack.allocator, sizeof(TidyParserMemory) * doc->stack.size * 2 ); + content = (TidyParserMemory *) TidyAlloc( doc->allocator, sizeof(TidyParserMemory) * doc->stack.size * 2 ); memcpy( content, doc->stack.content, sizeof(TidyParserMemory) * (doc->stack.top + 1) ); - TidyFree(doc->stack.allocator, doc->stack.content); + TidyFree(doc->allocator, doc->stack.content); doc->stack.content = content; doc->stack.size = doc->stack.size * 2; @@ -860,12 +858,58 @@ static void growParserStack( TidyDocImpl* doc ) /** * Indicates whether or not the stack is empty. */ -static Bool isEmptyParserStack( TidyDocImpl* doc ) +static inline Bool isEmptyParserStack( TidyDocImpl* doc ) { return doc->stack.top < 0; } +/** + * Peek at the parser memory. + */ +static inline FUNC_UNUSED TidyParserMemory peekMemory( TidyDocImpl* doc ) +{ + return doc->stack.content[doc->stack.top]; +} + + +/** + * Peek at the parser memory "identity" field. This is just a convenience + * to avoid having to create a new struct instance in the caller. + */ +static inline Parser* peekMemoryIdentity( TidyDocImpl* doc ) +{ + return doc->stack.content[doc->stack.top].identity; +} + + +/** + * Peek at the parser memory "mode" field. This is just a convenience + * to avoid having to create a new struct instance in the caller. + */ +static GetTokenMode inline peekMemoryMode( TidyDocImpl* doc ) +{ + return doc->stack.content[doc->stack.top].mode; +} + + +/** + * Pop out a parser memory. + */ +static TidyParserMemory popMemory( TidyDocImpl* doc ) +{ + if ( !isEmptyParserStack( doc ) ) + { + TidyParserMemory data = doc->stack.content[doc->stack.top]; + DEBUG_LOG(SPRTF("\n<--POP %s pointed to is %p,\t memory is %p (size is %lu), depth is %i\n", data.reentry_node ? data.reentry_node->element : NULL, data.reentry_node, &doc->stack.content[doc->stack.top], sizeof(TidyParserMemory), doc->stack.top - 1 )); + doc->stack.top = doc->stack.top - 1; + return data; + } + TidyParserMemory blank = { NULL }; + return blank; +} + + /** * Push the parser memory to the stack. */ @@ -875,52 +919,9 @@ static void pushMemory( TidyDocImpl* doc, TidyParserMemory data ) growParserStack( doc ); doc->stack.top++; + doc->stack.content[doc->stack.top] = data; -} - - -/** - * Peek at the parser memory. - */ -static FUNC_UNUSED TidyParserMemory peekMemory( TidyDocImpl* doc ) -{ - return doc->stack.content[doc->stack.top]; -} - - -/** - * Peek at the parser memory "mode" field. This is just a convenience - * to avoid having to create a new struct instance in the caller. - */ -static GetTokenMode peekMemoryMode( TidyDocImpl* doc ) -{ - return doc->stack.content[doc->stack.top].mode; -} - - -/** - * Peek at the parser memory "identity" field. This is just a convenience - * to avoid having to create a new struct instance in the caller. - */ -static Parser* peekMemoryIdentity( TidyDocImpl* doc ) -{ - return doc->stack.content[doc->stack.top].identity; -} - - -/** - * Pop out a parser memory. - */ -static TidyParserMemory popMemory( TidyDocImpl* doc ) -{ - if ( !isEmptyParserStack( doc ) ) - { - TidyParserMemory data = doc->stack.content[doc->stack.top]; - doc->stack.top = doc->stack.top - 1; - return data; - } - TidyParserMemory blank = { NULL }; - return blank; + DEBUG_LOG(SPRTF("\n-->PUSH %s pointed to is %p,\t memory is %p (size is %lu), depth is %i\n", data.reentry_node ? data.reentry_node->element : NULL, data.reentry_node, &doc->stack.content[doc->stack.top], sizeof(TidyParserMemory), doc->stack.top )); } @@ -938,7 +939,7 @@ static Parser* GetParserForNode( TidyDocImpl* doc, Node *node ) Lexer* lexer = doc->lexer; /* [i_a]2 prevent crash for active content (php, asp) docs */ - if (node->tag == NULL) + if (!node || node->tag == NULL) return NULL; /* @@ -968,28 +969,16 @@ static Parser* GetParserForNode( TidyDocImpl* doc, Node *node ) /** - * Instantiates the correct parser for the given node. This is currently - * maintained ONLY until the legacy parsers have been ported, as this - * introduces recursion when used. - */ -static void ParseTag( TidyDocImpl* doc, Node *node, GetTokenMode mode ) -{ - Parser* parser = GetParserForNode( doc, node ); - - if ( parser ) - (*parser)( doc, node, mode, no ); -} - - -/** - * The main parser body will populate the document's document root starting - * with the provided node, which generally should be the HTML node after the - * pre-HTML stuff is handled at a higher level. + * This parser controller initiates the parsing process with the document's + * root starting with the provided node, which should be the HTML node after + * the pre-HTML stuff is handled at a higher level. * - * This parser works cooperatively with compliant parsers to pass state - * information back and forth in the TidyDocImpl's `stack`, which resides on - * the heap and prevents recursion and stack exhaustion, and also works well - * with the old-style parsers that do recurse. + * This controller is responsible for calling each of the individual parsers, + * based on the tokens it pulls from the lexer, or the tokens passed back via + * the parserMemory stack from each of the parsers. Having a main, central + * looping dispatcher in this fashion allows the prevention of recursion. Note, + * though, that some of the parsers are still recursive and have to be + * refactored in order to cooperate with this controller. * * (The goal is to update the old-style parsers slowly and deliberately * without causing regressions, in a series of smaller commits and updates.) @@ -997,76 +986,78 @@ static void ParseTag( TidyDocImpl* doc, Node *node, GetTokenMode mode ) void ParseHTMLWithNode( TidyDocImpl* doc, Node* node ) { GetTokenMode mode = IgnoreWhitespace; - Parser* parser = NULL; + Parser* parser = GetParserForNode( doc, node ); + Bool something_to_do = yes; /* This main loop is only extinguished when all of the parser tokens are - consumed. Note that most of the parsers consume tokens as well, and - so what we're really doing here is managing parsers and preventing - recursion with cooperating parsers. + consumed. Ideally, EVERY parser will return nodes to this loop for + dispatch to the appropriate parser, but some of the recursive parsers + still consume some tokens on their own. */ - while ( node ) + while (something_to_do) { - if ( (parser = GetParserForNode( doc, node )) ) + node = parser ? parser( doc, node, mode ) : NULL; + + /* + We have a node, so anything deferred was already pushed to the stack + to be dealt with later. + */ + if ( node ) { - if ( (node = parser( doc, node, mode, no )) ) - { - /* - When a parser returns a node, it means that we have - to continue the loop rather than moving on, because it - indicates that the parser encountered a token it does not - handle. It also tells us the correct GetTokenMode to use - for it via the struct that it pushed: - */ - mode = peekMemoryMode( doc ); - continue; - } + parser = GetParserForNode( doc, node ); + continue; } /* - If we've come this far, the parser has bottomed out, and won't be - going any deeper. Now we run back up the stack to close all of the - open elements and handle any parser post-processing that was needed. - Of course, other nodes might cause us to deepen the stack again, too. + We weren't given a node, which means this particular leaf is bottomed + out. We'll re-enter the parsers using information from the stack. */ - if ( !isEmptyParserStack( doc ) ) + if ( !isEmptyParserStack(doc)) { - if ( (parser = peekMemoryIdentity( doc )) ) + parser = peekMemoryIdentity(doc); + if (parser) { - if ( (node = parser( doc, NULL, 0, yes )) ) - { - /* Another assignment from the parser. */ - mode = peekMemoryMode( doc ); - continue; - } - } else { - /* - There's no identity in the stack (it was used to pass back - a GetToken mode, and nothing else, so remove discard it. - */ + continue; + } + else + { + /* No parser means we're only passing back a parsing mode. */ + mode = peekMemoryMode( doc ); popMemory( doc ); } } - + /* - Assuming we've gotten this far, there's no more work to do and - so we can draw a nice, fresh token from the lexer. + At this point, there's nothing being returned from parsers, and + nothing on the stack, so we can draw a new node from the lexer. */ - node = TY_(GetToken)( doc, mode ); + node = TY_(GetToken)( doc, mode ); + DEBUG_LOG(SPRTF("---ParseHTMLWithNode got token %s with mode %u.\n", node ? node->element : NULL, mode)); + + if (node) + parser = GetParserForNode( doc, node ); + else + something_to_do = no; } } /***************************************************************************//* - ** MARK: - Old Parsers + ** MARK: - Parsers ***************************************************************************/ -/** MARK: TY_(oldParseBlock) +/** MARK: TY_(ParseBlock) * `element` is a node created by the lexer upon seeing the start tag, or * by the parser when the start tag is inferred + * + * This is a non-recursing parser. It uses the document's parser memory stack + * to send subsequent nodes back to the controller for dispatching to parsers. + * This parser is also re-enterable, so that post-processing can occur after + * such dispatching. */ -void* TY_(oldParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode) +Node* TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode ) { #if defined(ENABLE_DEBUG_LOG) static int in_parse_block = 0; @@ -1076,63 +1067,74 @@ void* TY_(oldParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode) Node *node; Bool checkstack = yes; uint istackbase = 0; -#if defined(ENABLE_DEBUG_LOG) - in_parse_block++; - parse_block_cnt++; - SPRTF("Entering ParseBlock %d... %d %s\n",in_parse_block,parse_block_cnt, - ((element && element->element) ? element->element : "")); -#endif - - if ( element->tag->model & CM_EMPTY ) { -#if defined(ENABLE_DEBUG_LOG) - in_parse_block--; - SPRTF("Exit ParseBlockL 1 %d...\n",in_parse_block); -#endif - return NULL; + + if ( element == NULL ) + { + TidyParserMemory memory = popMemory( doc ); + node = memory.reentry_node; /* Throwaway, because the loop overwrites this immediately. */ + mode = memory.reentry_mode; + element = memory.original_node; + DEBUG_LOG(SPRTF(">>>Re-Enter ParseBlock with %s\n", node->element)); } + else + { + DEBUG_LOG(SPRTF(">>>Entering ParseBlock %d... %d %s\n",++in_parse_block,++parse_block_cnt, + ((element && element->element) ? element->element : ""))); - if ( nodeIsFORM(element) && - DescendantOf(element, TidyTag_FORM) ) - TY_(Report)(doc, element, NULL, ILLEGAL_NESTING ); + if ( element->tag->model & CM_EMPTY ) + { + DEBUG_LOG(SPRTF("<<tag->model & CM_OBJECT) + { + istackbase = lexer->istackbase; + lexer->istackbase = lexer->istacksize; + } + + if (!(element->tag->model & CM_MIXED)) + TY_(InlineDup)( doc, NULL ); + + /*\ + * Issue #212 - If it is likely that it may be necessary + * to move a leading space into a text node before this + * element, then keep the mode MixedContent to keep any + * leading space + \*/ + if ( !(element->tag->model & CM_INLINE) || + (element->tag->model & CM_FIELD ) ) + { + mode = IgnoreWhitespace; + } + else if (mode == IgnoreWhitespace) + { + /* Issue #212 - Further fix in case ParseBlock() is called with 'IgnoreWhitespace' + when such a leading space may need to be inserted before this element to + preverve the browser view */ + mode = MixedContent; + } + } /* Re-Entering */ + /* - InlineDup() asks the lexer to insert inline emphasis tags - currently pushed on the istack, but take care to avoid - propagating inline emphasis inside OBJECT or APPLET. - For these elements a fresh inline stack context is created - and disposed of upon reaching the end of the element. - They thus behave like table cells in this respect. - */ - if (element->tag->model & CM_OBJECT) - { - istackbase = lexer->istackbase; - lexer->istackbase = lexer->istacksize; - } - - if (!(element->tag->model & CM_MIXED)) - TY_(InlineDup)( doc, NULL ); - - /*\ - * Issue #212 - If it is likely that it may be necessary - * to move a leading space into a text node before this - * element, then keep the mode MixedContent to keep any - * leading space - \*/ - if ( !(element->tag->model & CM_INLINE) || - (element->tag->model & CM_FIELD ) ) - { - mode = IgnoreWhitespace; - } - else if (mode == IgnoreWhitespace) - { - /* Issue #212 - Further fix in case ParseBlock() is called with 'IgnoreWhitespace' - when such a leading space may need to be inserted before this element to - preverve the browser view */ - mode = MixedContent; - } - + Main Loop + */ + while ((node = TY_(GetToken)(doc, mode /*MixedContent*/)) != NULL) { + DEBUG_LOG(SPRTF("---ParseBlock got token %s with mode %u\n", node->element, IgnoreWhitespace)); /* end tag for this element */ if (node->type == EndTag && node->tag && (node->tag == element->tag || element->was == node->tag)) @@ -1149,10 +1151,7 @@ void* TY_(oldParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode) element->closed = yes; TrimSpaces( doc, element ); -#if defined(ENABLE_DEBUG_LOG) - in_parse_block--; - SPRTF("Exit ParseBlock 2 %d...\n",in_parse_block); -#endif + DEBUG_LOG(SPRTF("<<istackbase = istackbase; TrimSpaces( doc, element ); -#if defined(ENABLE_DEBUG_LOG) - in_parse_block--; - SPRTF("Exit ParseBlock 4 %d...\n",in_parse_block); -#endif + DEBUG_LOG(SPRTF("<<parent->tag->parser == TY_(ParseList) ) { TrimSpaces( doc, element ); -#if defined(ENABLE_DEBUG_LOG) - in_parse_block--; - SPRTF("Exit ParseBlock 5 %d...\n",in_parse_block); -#endif + DEBUG_LOG(SPRTF("<<parent) ) { TrimSpaces( doc, element ); -#if defined(ENABLE_DEBUG_LOG) - in_parse_block--; - SPRTF("Exit ParseBlock 6 %d...\n",in_parse_block); -#endif + DEBUG_LOG(SPRTF("<<exiled) { -#if defined(ENABLE_DEBUG_LOG) - in_parse_block--; - SPRTF("Exit ParseBlock 7 %d...\n",in_parse_block); -#endif + if (lexer->exiled) + { + DEBUG_LOG(SPRTF("<<istackbase = istackbase; TrimSpaces( doc, element ); -#if defined(ENABLE_DEBUG_LOG) - in_parse_block--; - SPRTF("Exit ParseBlock 8 %d...\n",in_parse_block); -#endif + DEBUG_LOG(SPRTF("<<element)); + } + return node; } /* discard unexpected tags */ @@ -1614,2189 +1596,53 @@ void* TY_(oldParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode) TrimSpaces( doc, element ); -#if defined(ENABLE_DEBUG_LOG) - in_parse_block--; - SPRTF("Exit ParseBlock 10 %d...\n",in_parse_block); -#endif + DEBUG_LOG(SPRTF("<<lexer; - Node *node, *parent; -#if defined(ENABLE_DEBUG_LOG) - in_parse_inline++; - SPRTF("Entering ParseInline %d...\n",in_parse_inline); -#endif - - if (element->tag->model & CM_EMPTY) { -#if defined(ENABLE_DEBUG_LOG) - in_parse_inline--; - SPRTF("Exit ParseInline 1 %d...\n",in_parse_inline); -#endif - return NULL; - } - - /* - ParseInline is used for some block level elements like H1 to H6 - For such elements we need to insert inline emphasis tags currently - on the inline stack. For Inline elements, we normally push them - onto the inline stack provided they aren't implicit or OBJECT/APPLET. - This test is carried out in PushInline and PopInline, see istack.c - - InlineDup(...) is not called for elements with a CM_MIXED (inline and - block) content model, e.g. or , otherwise constructs like - -

111222333444555

-

111222333444555

-

111222333444555

- - will get corrupted. - */ - if ((TY_(nodeHasCM)(element, CM_BLOCK) || nodeIsDT(element)) && - !TY_(nodeHasCM)(element, CM_MIXED)) - TY_(InlineDup)(doc, NULL); - else if (TY_(nodeHasCM)(element, CM_INLINE)) - TY_(PushInline)(doc, element); - - if ( nodeIsNOBR(element) ) - doc->badLayout |= USING_NOBR; - else if ( nodeIsFONT(element) ) - doc->badLayout |= USING_FONT; - - /* Inline elements may or may not be within a preformatted element */ - if (mode != Preformatted) - mode = MixedContent; - - while ((node = TY_(GetToken)(doc, mode)) != NULL) - { - /* end tag for current element */ - if (node->tag == element->tag && node->type == EndTag) - { - if (element->tag->model & CM_INLINE) - TY_(PopInline)( doc, node ); - - TY_(FreeNode)( doc, node ); - - if (!(mode & Preformatted)) - TrimSpaces(doc, element); - - /* - if a font element wraps an anchor and nothing else - then move the font element inside the anchor since - otherwise it won't alter the anchor text color - */ - if ( nodeIsFONT(element) && - element->content && element->content == element->last ) - { - Node *child = element->content; - - if ( nodeIsA(child) ) - { - child->parent = element->parent; - child->next = element->next; - child->prev = element->prev; - - element->next = NULL; - element->prev = NULL; - element->parent = child; - - element->content = child->content; - element->last = child->last; - child->content = element; - - TY_(FixNodeLinks)(child); - TY_(FixNodeLinks)(element); - } - } - - element->closed = yes; - TrimSpaces( doc, element ); -#if defined(ENABLE_DEBUG_LOG) - in_parse_inline--; - SPRTF("Exit ParseInline 2 %d...\n",in_parse_inline); -#endif - return NULL; - } - - /* ... map 2nd to if 1st is explicit */ - /* (see additional conditions below) */ - /* otherwise emphasis nesting is probably unintentional */ - /* big, small, sub, sup have cumulative effect to leave them alone */ - if ( node->type == StartTag - && node->tag == element->tag - && TY_(IsPushed)( doc, node ) - && !node->implicit - && !element->implicit - && node->tag && (node->tag->model & CM_INLINE) - && !nodeIsA(node) - && !nodeIsFONT(node) - && !nodeIsBIG(node) - && !nodeIsSMALL(node) - && !nodeIsSUB(node) - && !nodeIsSUP(node) - && !nodeIsQ(node) - && !nodeIsSPAN(node) - && cfgBool(doc, TidyCoerceEndTags) - ) - { - /* proceeds only if "node" does not have any attribute and - follows a text node not finishing with a space */ - if (element->content != NULL && node->attributes == NULL - && TY_(nodeIsText)(element->last) - && !TY_(TextNodeEndWithSpace)(doc->lexer, element->last) ) - { - TY_(Report)(doc, element, node, COERCE_TO_ENDTAG); - node->type = EndTag; - TY_(UngetToken)(doc); - continue; - } - - if (node->attributes == NULL || element->attributes == NULL) - TY_(Report)(doc, element, node, NESTED_EMPHASIS); - } - else if ( TY_(IsPushed)(doc, node) && node->type == StartTag && - nodeIsQ(node) ) - { - /*\ - * Issue #215 - such nested quotes are NOT a problem if HTML5, so - * only issue this warning if NOT HTML5 mode. - \*/ - if (TY_(HTMLVersion)(doc) != HT50) - { - TY_(Report)(doc, element, node, NESTED_QUOTATION); - } - } - - if ( TY_(nodeIsText)(node) ) - { - /* only called for 1st child */ - if ( element->content == NULL && !(mode & Preformatted) ) - TrimSpaces( doc, element ); - - if ( node->start >= node->end ) - { - TY_(FreeNode)( doc, node ); - continue; - } - - TY_(InsertNodeAtEnd)(element, node); - continue; - } - - /* mixed content model so allow text */ - if (InsertMisc(element, node)) - continue; - - /* deal with HTML tags */ - if ( nodeIsHTML(node) ) - { - if ( TY_(nodeIsElement)(node) ) - { - TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED ); - TY_(FreeNode)( doc, node ); - continue; - } - - /* otherwise infer end of inline element */ - TY_(UngetToken)( doc ); - - if (!(mode & Preformatted)) - TrimSpaces(doc, element); -#if defined(ENABLE_DEBUG_LOG) - in_parse_inline--; - SPRTF("Exit ParseInline 3 %d...\n",in_parse_inline); -#endif - return NULL; - } - - /* within
or
 map 

to
*/ - if ( nodeIsP(node) && - node->type == StartTag && - ( (mode & Preformatted) || - nodeIsDT(element) || - DescendantOf(element, TidyTag_DT ) - ) - ) - { - node->tag = TY_(LookupTagDef)( TidyTag_BR ); - TidyDocFree(doc, node->element); - node->element = TY_(tmbstrdup)(doc->allocator, "br"); - TrimSpaces(doc, element); - TY_(InsertNodeAtEnd)(element, node); - continue; - } - - /*

allowed within

in HTML 4.01 Transitional */ - if ( nodeIsP(node) && - node->type == StartTag && - nodeIsADDRESS(element) ) - { - TY_(ConstrainVersion)( doc, ~VERS_HTML40_STRICT ); - TY_(InsertNodeAtEnd)(element, node); - (*node->tag->parser)( doc, node, mode, no ); - continue; - } - - /* ignore unknown and PARAM tags */ - if ( node->tag == NULL || nodeIsPARAM(node) ) - { - TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node ); - continue; - } - - if ( nodeIsBR(node) && node->type == EndTag ) - node->type = StartTag; - - if ( node->type == EndTag ) - { - /* coerce
to
*/ - if ( nodeIsBR(node) ) - node->type = StartTag; - else if ( nodeIsP(node) ) - { - /* coerce unmatched

to

*/ - if ( !DescendantOf(element, TidyTag_P) ) - { - TY_(CoerceNode)(doc, node, TidyTag_BR, no, no); - TrimSpaces( doc, element ); - TY_(InsertNodeAtEnd)( element, node ); - node = TY_(InferredTag)(doc, TidyTag_BR); - TY_(InsertNodeAtEnd)( element, node ); /* todo: check this */ - continue; - } - } - else if ( TY_(nodeHasCM)(node, CM_INLINE) - && !nodeIsA(node) - && !TY_(nodeHasCM)(node, CM_OBJECT) - && TY_(nodeHasCM)(element, CM_INLINE) ) - { - /* allow any inline end tag to end current element */ - - /* http://tidy.sf.net/issue/1426419 */ - /* but, like the browser, retain an earlier inline element. - This is implemented by setting the lexer into a mode - where it gets tokens from the inline stack rather than - from the input stream. Check if the scenerio fits. */ - if ( !nodeIsA(element) - && (node->tag != element->tag) - && TY_(IsPushed)( doc, node ) - && TY_(IsPushed)( doc, element ) ) - { - /* we have something like - bold bold and italic italics */ - if ( TY_(SwitchInline)( doc, element, node ) ) - { - TY_(Report)(doc, element, node, NON_MATCHING_ENDTAG); - TY_(UngetToken)( doc ); /* put this back */ - TY_(InlineDup1)( doc, NULL, element ); /* dupe the , after */ - if (!(mode & Preformatted)) - TrimSpaces( doc, element ); -#if defined(ENABLE_DEBUG_LOG) - in_parse_inline--; - SPRTF("Exit ParseInline 4 %d...\n",in_parse_inline); -#endif - return NULL; /* close , but will re-open it, after */ - } - } - TY_(PopInline)( doc, element ); - - if ( !nodeIsA(element) ) - { - if ( nodeIsA(node) && node->tag != element->tag ) - { - TY_(Report)(doc, element, node, MISSING_ENDTAG_BEFORE ); - TY_(UngetToken)( doc ); - } - else - { - TY_(Report)(doc, element, node, NON_MATCHING_ENDTAG); - TY_(FreeNode)( doc, node); - } - - if (!(mode & Preformatted)) - TrimSpaces(doc, element); -#if defined(ENABLE_DEBUG_LOG) - in_parse_inline--; - SPRTF("Exit ParseInline 5 %d...\n",in_parse_inline); -#endif - return NULL; - } - - /* if parent is then discard unexpected inline end tag */ - TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } /* special case etc. for stuff moved in front of table */ - else if ( lexer->exiled - && (TY_(nodeHasCM)(node, CM_TABLE) || nodeIsTABLE(node)) ) - { - TY_(UngetToken)( doc ); - TrimSpaces(doc, element); -#if defined(ENABLE_DEBUG_LOG) - in_parse_inline--; - SPRTF("Exit ParseInline 6 %d...\n",in_parse_inline); -#endif - return NULL; - } - } - - /* allow any header tag to end current header */ - if ( TY_(nodeHasCM)(node, CM_HEADING) && TY_(nodeHasCM)(element, CM_HEADING) ) - { - - if ( node->tag == element->tag ) - { - TY_(Report)(doc, element, node, NON_MATCHING_ENDTAG ); - TY_(FreeNode)( doc, node); - } - else - { - TY_(Report)(doc, element, node, MISSING_ENDTAG_BEFORE ); - TY_(UngetToken)( doc ); - } - - if (!(mode & Preformatted)) - TrimSpaces(doc, element); - -#if defined(ENABLE_DEBUG_LOG) - in_parse_inline--; - SPRTF("Exit ParseInline 7 %d...\n",in_parse_inline); -#endif - return NULL; - } - - /* - an tag to ends any open element - but is mapped to - */ - /* #427827 - fix by Randy Waki and Bjoern Hoehrmann 23 Aug 00 */ - /* if (node->tag == doc->tags.tag_a && !node->implicit && TY_(IsPushed)(doc, node)) */ - if ( nodeIsA(node) && !node->implicit && - (nodeIsA(element) || DescendantOf(element, TidyTag_A)) ) - { - /* coerce to unless it has some attributes */ - /* #427827 - fix by Randy Waki and Bjoern Hoehrmann 23 Aug 00 */ - /* other fixes by Dave Raggett */ - /* if (node->attributes == NULL) */ - if (node->type != EndTag && node->attributes == NULL - && cfgBool(doc, TidyCoerceEndTags) ) - { - node->type = EndTag; - TY_(Report)(doc, element, node, COERCE_TO_ENDTAG); - /* TY_(PopInline)( doc, node ); */ - TY_(UngetToken)( doc ); - continue; - } - - TY_(UngetToken)( doc ); - TY_(Report)(doc, element, node, MISSING_ENDTAG_BEFORE); - /* TY_(PopInline)( doc, element ); */ - - if (!(mode & Preformatted)) - TrimSpaces(doc, element); - -#if defined(ENABLE_DEBUG_LOG) - in_parse_inline--; - SPRTF("Exit ParseInline 8 %d...\n",in_parse_inline); -#endif - return NULL; - } - - if (element->tag->model & CM_HEADING) - { - if ( nodeIsCENTER(node) || nodeIsDIV(node) ) - { - if (!TY_(nodeIsElement)(node)) - { - TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - - TY_(Report)(doc, element, node, TAG_NOT_ALLOWED_IN); - - /* insert center as parent if heading is empty */ - if (element->content == NULL) - { - InsertNodeAsParent(element, node); - continue; - } - - /* split heading and make center parent of 2nd part */ - TY_(InsertNodeAfterElement)(element, node); - - if (!(mode & Preformatted)) - TrimSpaces(doc, element); - - element = TY_(CloneNode)( doc, element ); - TY_(InsertNodeAtEnd)(node, element); - continue; - } - - if ( nodeIsHR(node) ) - { - if ( !TY_(nodeIsElement)(node) ) - { - TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - - TY_(Report)(doc, element, node, TAG_NOT_ALLOWED_IN); - - /* insert hr before heading if heading is empty */ - if (element->content == NULL) - { - TY_(InsertNodeBeforeElement)(element, node); - continue; - } - - /* split heading and insert hr before 2nd part */ - TY_(InsertNodeAfterElement)(element, node); - - if (!(mode & Preformatted)) - TrimSpaces(doc, element); - - element = TY_(CloneNode)( doc, element ); - TY_(InsertNodeAfterElement)(node, element); - continue; - } - } - - if ( nodeIsDT(element) ) - { - if ( nodeIsHR(node) ) - { - Node *dd; - if ( !TY_(nodeIsElement)(node) ) - { - TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - - TY_(Report)(doc, element, node, TAG_NOT_ALLOWED_IN); - dd = TY_(InferredTag)(doc, TidyTag_DD); - - /* insert hr within dd before dt if dt is empty */ - if (element->content == NULL) - { - TY_(InsertNodeBeforeElement)(element, dd); - TY_(InsertNodeAtEnd)(dd, node); - continue; - } - - /* split dt and insert hr within dd before 2nd part */ - TY_(InsertNodeAfterElement)(element, dd); - TY_(InsertNodeAtEnd)(dd, node); - - if (!(mode & Preformatted)) - TrimSpaces(doc, element); - - element = TY_(CloneNode)( doc, element ); - TY_(InsertNodeAfterElement)(dd, element); - continue; - } - } - - - /* - if this is the end tag for an ancestor element - then infer end tag for this element - */ - if (node->type == EndTag) - { - for (parent = element->parent; - parent != NULL; parent = parent->parent) - { - if (node->tag == parent->tag) - { - if (!(element->tag->model & CM_OPT) && !element->implicit) - TY_(Report)(doc, element, node, MISSING_ENDTAG_BEFORE); - - if( TY_(IsPushedLast)( doc, element, node ) ) - TY_(PopInline)( doc, element ); - TY_(UngetToken)( doc ); - - if (!(mode & Preformatted)) - TrimSpaces(doc, element); - -#if defined(ENABLE_DEBUG_LOG) - in_parse_inline--; - SPRTF("Exit ParseInline 9 %d...\n",in_parse_inline); -#endif - return NULL; - } - } - } - - /*\ - * block level tags end this element - * Issue #333 - There seems an exception if the element is a 'span', - * and the node just collected is a 'meta'. The 'meta' can not have - * CM_INLINE added, nor can the 'span' have CM_MIXED added without - * big consequences. - * There may be other exceptions to be added... - \*/ - if (!(node->tag->model & CM_INLINE) && - !(element->tag->model & CM_MIXED) && - !(nodeIsSPAN(element) && nodeIsMETA(node)) ) - { - if ( !TY_(nodeIsElement)(node) ) - { - TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - /* HTML5 */ - if (nodeIsDATALIST(element)) { - TY_(ConstrainVersion)( doc, ~VERS_HTML5 ); - } else - if (!(element->tag->model & CM_OPT)) - TY_(Report)(doc, element, node, MISSING_ENDTAG_BEFORE); - - if (node->tag->model & CM_HEAD && !(node->tag->model & CM_BLOCK)) - { - MoveToHead(doc, element, node); - continue; - } - - /* - prevent anchors from propagating into block tags - except for headings h1 to h6 - */ - if ( nodeIsA(element) ) - { - if (node->tag && !(node->tag->model & CM_HEADING)) - TY_(PopInline)( doc, element ); - else if (!(element->content)) - { - TY_(DiscardElement)( doc, element ); - TY_(UngetToken)( doc ); -#if defined(ENABLE_DEBUG_LOG) - in_parse_inline--; - SPRTF("Exit ParseInline 10 %d...\n",in_parse_inline); -#endif - return NULL; - } - } - - TY_(UngetToken)( doc ); - - if (!(mode & Preformatted)) - TrimSpaces(doc, element); - -#if defined(ENABLE_DEBUG_LOG) - in_parse_inline--; - SPRTF("Exit ParseInline 11 %d...\n",in_parse_inline); -#endif - return NULL; - } - - /* parse inline element */ - if (TY_(nodeIsElement)(node)) - { - if (node->implicit) - TY_(Report)(doc, element, node, INSERTING_TAG); - - /* trim white space before
*/ - if ( nodeIsBR(node) ) - TrimSpaces(doc, element); - - TY_(InsertNodeAtEnd)(element, node); - ParseTag(doc, node, mode); - continue; - } - - /* discard unexpected tags */ - TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node ); - continue; - } - - if (!(element->tag->model & CM_OPT)) - TY_(Report)(doc, element, node, MISSING_ENDTAG_FOR); - -#if defined(ENABLE_DEBUG_LOG) - in_parse_inline--; - SPRTF("Exit ParseInline 12 %d...\n",in_parse_inline); -#endif - return NULL; -} - - -/** MARK: TY_(oldParseDefList) - * Parses the `dl` tag. - */ -void* TY_(oldParseDefList)(TidyDocImpl* doc, Node *list, GetTokenMode mode) -{ - Lexer* lexer = doc->lexer; - Node *node, *parent; - - if (list->tag->model & CM_EMPTY) - return NULL; - - lexer->insert = NULL; /* defer implicit inline start tags */ - - while ((node = TY_(GetToken)( doc, IgnoreWhitespace)) != NULL) - { - if (node->tag == list->tag && node->type == EndTag) - { - TY_(FreeNode)( doc, node); - list->closed = yes; - return NULL; - } - - /* deal with comments etc. */ - if (InsertMisc(list, node)) - continue; - - if (TY_(nodeIsText)(node)) - { - TY_(UngetToken)( doc ); - node = TY_(InferredTag)(doc, TidyTag_DT); - TY_(Report)(doc, list, node, MISSING_STARTTAG); - } - - if (node->tag == NULL) - { - TY_(Report)(doc, list, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - - /* - if this is the end tag for an ancestor element - then infer end tag for this element - */ - if (node->type == EndTag) - { - Bool discardIt = no; - if ( nodeIsFORM(node) ) - { - BadForm( doc ); - TY_(Report)(doc, list, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node ); - continue; - } - - for (parent = list->parent; - parent != NULL; parent = parent->parent) - { - /* Do not match across BODY to avoid infinite loop - between ParseBody and this parser, - See http://tidy.sf.net/bug/1098012. */ - if (nodeIsBODY(parent)) - { - discardIt = yes; - break; - } - if (node->tag == parent->tag) - { - TY_(Report)(doc, list, node, MISSING_ENDTAG_BEFORE); - - TY_(UngetToken)( doc ); - return NULL; - } - } - if (discardIt) - { - TY_(Report)(doc, list, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - } - - /* center in a dt or a dl breaks the dl list in two */ - if ( nodeIsCENTER(node) ) - { - if (list->content) - TY_(InsertNodeAfterElement)(list, node); - else /* trim empty dl list */ - { - TY_(InsertNodeBeforeElement)(list, node); - } - - /* #426885 - fix by Glenn Carroll 19 Apr 00, and - Gary Dechaines 11 Aug 00 */ - /* ParseTag can destroy node, if it finds that - * this
is followed immediately by
. - * It's awkward but necessary to determine if this - * has happened. - */ - parent = node->parent; - - /* and parse contents of center */ - lexer->excludeBlocks = no; - ParseTag( doc, node, mode); - lexer->excludeBlocks = yes; - - /* now create a new dl element, - * unless node has been blown away because the - * center was empty, as above. - */ - if (parent && parent->last == node) - { - list = TY_(InferredTag)(doc, TidyTag_DL); - TY_(InsertNodeAfterElement)(node, list); - } - continue; - } - - if ( !(nodeIsDT(node) || nodeIsDD(node)) ) - { - TY_(UngetToken)( doc ); - - if (!(node->tag->model & (CM_BLOCK | CM_INLINE))) - { - TY_(Report)(doc, list, node, TAG_NOT_ALLOWED_IN); - return NULL; - } - - /* if DD appeared directly in BODY then exclude blocks */ - if (!(node->tag->model & CM_INLINE) && lexer->excludeBlocks) - return NULL; - - node = TY_(InferredTag)(doc, TidyTag_DD); - TY_(Report)(doc, list, node, MISSING_STARTTAG); - } - - if (node->type == EndTag) - { - TY_(Report)(doc, list, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - - /* node should be
or
*/ - TY_(InsertNodeAtEnd)(list, node); - ParseTag( doc, node, IgnoreWhitespace); - } - - TY_(Report)(doc, list, node, MISSING_ENDTAG_FOR); - return NULL; -} - - -/** MARK: TY_(oldParseList) - * Parses list tags. - */ -void* TY_(oldParseList)(TidyDocImpl* doc, Node *list, GetTokenMode ARG_UNUSED(mode)) -{ -#if defined(ENABLE_DEBUG_LOG) - static int in_parse_list = 0; -#endif - Lexer* lexer = doc->lexer; - Node *node, *parent, *lastli; - Bool wasblock; - Bool nodeisOL = nodeIsOL(list); - -#if defined(ENABLE_DEBUG_LOG) - in_parse_list++; - SPRTF("Entering ParseList %d...\n",in_parse_list); -#endif - if (list->tag->model & CM_EMPTY) - { -#if defined(ENABLE_DEBUG_LOG) - in_parse_list--; - SPRTF("Exit ParseList 1 %d... CM_EMPTY\n",in_parse_list); -#endif - return NULL; - } - lexer->insert = NULL; /* defer implicit inline start tags */ - - while ((node = TY_(GetToken)( doc, IgnoreWhitespace)) != NULL) - { - Bool foundLI = no; - if (node->tag == list->tag && node->type == EndTag) - { - TY_(FreeNode)( doc, node); - list->closed = yes; -#if defined(ENABLE_DEBUG_LOG) - in_parse_list--; - SPRTF("Exit ParseList 2 %d... Endtag\n",in_parse_list); -#endif - return NULL; - } - - /* deal with comments etc. */ - if (InsertMisc(list, node)) - continue; - - if (node->type != TextNode && node->tag == NULL) - { - TY_(Report)(doc, list, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - if (lexer && (node->type == TextNode)) - { - uint ch, ix = node->start; - /* Issue #572 - Skip whitespace. */ - while (ix < node->end && (ch = (lexer->lexbuf[ix] & 0xff)) - && (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n')) - ++ix; - if (ix >= node->end) - { - /* Issue #572 - Discard if ALL whitespace. */ - TY_(FreeNode)(doc, node); - continue; - } - } - - - /* - if this is the end tag for an ancestor element - then infer end tag for this element - */ - if (node->type == EndTag) - { - if ( nodeIsFORM(node) ) - { - BadForm( doc ); - TY_(Report)(doc, list, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node ); - continue; - } - - if (TY_(nodeHasCM)(node,CM_INLINE)) - { - TY_(Report)(doc, list, node, DISCARDING_UNEXPECTED); - TY_(PopInline)( doc, node ); - TY_(FreeNode)( doc, node); - continue; - } - - for ( parent = list->parent; - parent != NULL; parent = parent->parent ) - { - /* Do not match across BODY to avoid infinite loop - between ParseBody and this parser, - See http://tidy.sf.net/bug/1053626. */ - if (nodeIsBODY(parent)) - break; - if (node->tag == parent->tag) - { - TY_(Report)(doc, list, node, MISSING_ENDTAG_BEFORE); - TY_(UngetToken)( doc ); -#if defined(ENABLE_DEBUG_LOG) - in_parse_list--; - SPRTF("Exit ParseList 3 %d... No End Tag\n",in_parse_list); -#endif - return NULL; - } - } - - TY_(Report)(doc, list, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - - if ( !nodeIsLI(node) && nodeisOL ) - { - /* Issue #572 - A
  1. can have nested
      elements */ - foundLI = FindLastLI(list, &lastli); /* find last
    1. */ - } - - if ( nodeIsLI(node) || (TY_(IsHTML5Mode)(doc) && !foundLI) ) - { - /* node is
    2. OR - Issue #396 - A
        can have Zero or more
      • elements - */ - TY_(InsertNodeAtEnd)(list,node); - } - else - { - TY_(UngetToken)( doc ); - - if (TY_(nodeHasCM)(node,CM_BLOCK) && lexer->excludeBlocks) - { - TY_(Report)(doc, list, node, MISSING_ENDTAG_BEFORE); -#if defined(ENABLE_DEBUG_LOG) - in_parse_list--; - SPRTF("Exit ParseList 4 %d... No End Tag\n",in_parse_list); -#endif - return NULL; - } - /* http://tidy.sf.net/issue/1316307 */ - /* In exiled mode, return so table processing can continue. */ - else if ( lexer->exiled - && (TY_(nodeHasCM)(node, CM_TABLE|CM_ROWGRP|CM_ROW) - || nodeIsTABLE(node)) ) - { -#if defined(ENABLE_DEBUG_LOG) - in_parse_list--; - SPRTF("Exit ParseList 5 %d... exiled\n",in_parse_list); -#endif - return NULL; - } - /* http://tidy.sf.net/issue/836462 - If "list" is an unordered list, insert the next tag within - the last
      • to preserve the numbering to match the visual - rendering of most browsers. */ - if ( nodeIsOL(list) && FindLastLI(list, &lastli) ) - { - /* Create a node for error reporting */ - node = TY_(InferredTag)(doc, TidyTag_LI); - TY_(Report)(doc, list, node, MISSING_STARTTAG ); - TY_(FreeNode)( doc, node); - node = lastli; - } - else - { - /* Add an inferred
      • */ - wasblock = TY_(nodeHasCM)(node,CM_BLOCK); - node = TY_(InferredTag)(doc, TidyTag_LI); - /* Add "display: inline" to avoid a blank line after
      • with - Internet Explorer. See http://tidy.sf.net/issue/836462 */ - TY_(AddStyleProperty)( doc, node, - wasblock - ? "list-style: none; display: inline" - : "list-style: none" - ); - TY_(Report)(doc, list, node, MISSING_STARTTAG ); - TY_(InsertNodeAtEnd)(list,node); - } - } - - ParseTag( doc, node, IgnoreWhitespace); - } - - TY_(Report)(doc, list, node, MISSING_ENDTAG_FOR); -#if defined(ENABLE_DEBUG_LOG) - in_parse_list--; - SPRTF("Exit ParseList 6 %d... missing end tag\n",in_parse_list); -#endif - return NULL; -} - - -/** MARK: TY_(oldParseRow) - * Parses the `row` tag. - */ -void* TY_(oldParseRow)(TidyDocImpl* doc, Node *row, GetTokenMode ARG_UNUSED(mode)) -{ - Lexer* lexer = doc->lexer; - Node *node; - Bool exclude_state; - - if (row->tag->model & CM_EMPTY) - return NULL; - - while ((node = TY_(GetToken)(doc, IgnoreWhitespace)) != NULL) - { - if (node->tag == row->tag) - { - if (node->type == EndTag) - { - TY_(FreeNode)( doc, node); - row->closed = yes; - FixEmptyRow( doc, row); - return NULL; - } - - /* New row start implies end of current row */ - TY_(UngetToken)( doc ); - FixEmptyRow( doc, row); - return NULL; - } - - /* - if this is the end tag for an ancestor element - then infer end tag for this element - */ - if ( node->type == EndTag ) - { - if ( (TY_(nodeHasCM)(node, CM_HTML|CM_TABLE) || nodeIsTABLE(node)) - && DescendantOf(row, TagId(node)) ) - { - TY_(UngetToken)( doc ); - return NULL; - } - - if ( nodeIsFORM(node) || TY_(nodeHasCM)(node, CM_BLOCK|CM_INLINE) ) - { - if ( nodeIsFORM(node) ) - BadForm( doc ); - - TY_(Report)(doc, row, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - - if ( nodeIsTD(node) || nodeIsTH(node) ) - { - TY_(Report)(doc, row, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - } - - /* deal with comments etc. */ - if (InsertMisc(row, node)) - continue; - - /* discard unknown tags */ - if (node->tag == NULL && node->type != TextNode) - { - TY_(Report)(doc, row, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - - /* discard unexpected
element */ - if ( nodeIsTABLE(node) ) - { - TY_(Report)(doc, row, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - - /* THEAD, TFOOT or TBODY */ - if ( TY_(nodeHasCM)(node, CM_ROWGRP) ) - { - TY_(UngetToken)( doc ); - return NULL; - } - - if (node->type == EndTag) - { - TY_(Report)(doc, row, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - - /* - if text or inline or block move before table - if head content move to head - */ - - if (node->type != EndTag) - { - if ( nodeIsFORM(node) ) - { - TY_(UngetToken)( doc ); - node = TY_(InferredTag)(doc, TidyTag_TD); - TY_(Report)(doc, row, node, MISSING_STARTTAG); - } - else if ( TY_(nodeIsText)(node) - || TY_(nodeHasCM)(node, CM_BLOCK | CM_INLINE) ) - { - MoveBeforeTable( doc, row, node ); - TY_(Report)(doc, row, node, TAG_NOT_ALLOWED_IN); - lexer->exiled = yes; - exclude_state = lexer->excludeBlocks; - lexer->excludeBlocks = no; - - if (node->type != TextNode) - ParseTag( doc, node, IgnoreWhitespace); - - lexer->exiled = no; - lexer->excludeBlocks = exclude_state; - continue; - } - else if (node->tag->model & CM_HEAD) - { - TY_(Report)(doc, row, node, TAG_NOT_ALLOWED_IN); - MoveToHead( doc, row, node); - continue; - } - } - - if ( !(nodeIsTD(node) || nodeIsTH(node)) ) - { - TY_(Report)(doc, row, node, TAG_NOT_ALLOWED_IN); - TY_(FreeNode)( doc, node); - continue; - } - - /* node should be
or */ - TY_(InsertNodeAtEnd)(row, node); - exclude_state = lexer->excludeBlocks; - lexer->excludeBlocks = no; - ParseTag( doc, node, IgnoreWhitespace); - lexer->excludeBlocks = exclude_state; - - /* pop inline stack */ - - while ( lexer->istacksize > lexer->istackbase ) - TY_(PopInline)( doc, NULL ); - } - return NULL; -} - - -/** MARK: TY_(oldParseRowGroup) - * Parses the `rowgroup` tag. - */ -void* TY_(oldParseRowGroup)(TidyDocImpl* doc, Node *rowgroup, GetTokenMode ARG_UNUSED(mode)) -{ - Lexer* lexer = doc->lexer; - Node *node, *parent; - - if (rowgroup->tag->model & CM_EMPTY) - return NULL; - - while ((node = TY_(GetToken)(doc, IgnoreWhitespace)) != NULL) - { - if (node->tag == rowgroup->tag) - { - if (node->type == EndTag) - { - rowgroup->closed = yes; - TY_(FreeNode)( doc, node); - return NULL; - } - - TY_(UngetToken)( doc ); - return NULL; - } - - /* if
infer end tag */ - if ( nodeIsTABLE(node) && node->type == EndTag ) - { - TY_(UngetToken)( doc ); - return NULL; - } - - /* deal with comments etc. */ - if (InsertMisc(rowgroup, node)) - continue; - - /* discard unknown tags */ - if (node->tag == NULL && node->type != TextNode) - { - TY_(Report)(doc, rowgroup, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - - /* - if TD or TH then infer - if text or inline or block move before table - if head content move to head - */ - - if (node->type != EndTag) - { - if ( nodeIsTD(node) || nodeIsTH(node) ) - { - TY_(UngetToken)( doc ); - node = TY_(InferredTag)(doc, TidyTag_TR); - TY_(Report)(doc, rowgroup, node, MISSING_STARTTAG); - } - else if ( TY_(nodeIsText)(node) - || TY_(nodeHasCM)(node, CM_BLOCK|CM_INLINE) ) - { - MoveBeforeTable( doc, rowgroup, node ); - TY_(Report)(doc, rowgroup, node, TAG_NOT_ALLOWED_IN); - lexer->exiled = yes; - - if (node->type != TextNode) - ParseTag(doc, node, IgnoreWhitespace); - - lexer->exiled = no; - continue; - } - else if (node->tag->model & CM_HEAD) - { - TY_(Report)(doc, rowgroup, node, TAG_NOT_ALLOWED_IN); - MoveToHead(doc, rowgroup, node); - continue; - } - } - - /* - if this is the end tag for ancestor element - then infer end tag for this element - */ - if (node->type == EndTag) - { - if ( nodeIsFORM(node) || TY_(nodeHasCM)(node, CM_BLOCK|CM_INLINE) ) - { - if ( nodeIsFORM(node) ) - BadForm( doc ); - - TY_(Report)(doc, rowgroup, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - - if ( nodeIsTR(node) || nodeIsTD(node) || nodeIsTH(node) ) - { - TY_(Report)(doc, rowgroup, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - - for ( parent = rowgroup->parent; - parent != NULL; - parent = parent->parent ) - { - if (node->tag == parent->tag) - { - TY_(UngetToken)( doc ); - return NULL; - } - } - } - - /* - if THEAD, TFOOT or TBODY then implied end tag - - */ - if (node->tag->model & CM_ROWGRP) - { - if (node->type != EndTag) - { - TY_(UngetToken)( doc ); - return NULL; - } - } - - if (node->type == EndTag) - { - TY_(Report)(doc, rowgroup, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - - if ( !nodeIsTR(node) ) - { - node = TY_(InferredTag)(doc, TidyTag_TR); - TY_(Report)(doc, rowgroup, node, MISSING_STARTTAG); - TY_(UngetToken)( doc ); - } - - /* node should be */ - TY_(InsertNodeAtEnd)(rowgroup, node); - ParseTag(doc, node, IgnoreWhitespace); - } - return NULL; -} - - -/** MARK: TY_(oldParseColGroup) - * Parses the `colgroup` tag. - */ -void* TY_(oldParseColGroup)(TidyDocImpl* doc, Node *colgroup, GetTokenMode ARG_UNUSED(mode)) -{ - Node *node, *parent; - - if (colgroup->tag->model & CM_EMPTY) - return NULL; - - while ((node = TY_(GetToken)(doc, IgnoreWhitespace)) != NULL) - { - if (node->tag == colgroup->tag && node->type == EndTag) - { - TY_(FreeNode)( doc, node); - colgroup->closed = yes; - return NULL; - } - - /* - if this is the end tag for an ancestor element - then infer end tag for this element - */ - if (node->type == EndTag) - { - if ( nodeIsFORM(node) ) - { - BadForm( doc ); - TY_(Report)(doc, colgroup, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - - for ( parent = colgroup->parent; - parent != NULL; - parent = parent->parent ) - { - if (node->tag == parent->tag) - { - TY_(UngetToken)( doc ); - return NULL; - } - } - } - - if (TY_(nodeIsText)(node)) - { - TY_(UngetToken)( doc ); - return NULL; - } - - /* deal with comments etc. */ - if (InsertMisc(colgroup, node)) - continue; - - /* discard unknown tags */ - if (node->tag == NULL) - { - TY_(Report)(doc, colgroup, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - - if ( !nodeIsCOL(node) ) - { - TY_(UngetToken)( doc ); - return NULL; - } - - if (node->type == EndTag) - { - TY_(Report)(doc, colgroup, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - - /* node should be */ - TY_(InsertNodeAtEnd)(colgroup, node); - ParseTag(doc, node, IgnoreWhitespace); - } - return NULL; -} - - -/** MARK: TY_(oldParseTableTag) - * Parses the `table` tag. - */ -void* TY_(oldParseTableTag)(TidyDocImpl* doc, Node *table, GetTokenMode ARG_UNUSED(mode)) -{ -#if defined(ENABLE_DEBUG_LOG) - static int in_parse_table = 0; -#endif - Lexer* lexer = doc->lexer; - Node *node, *parent; - uint istackbase; - - TY_(DeferDup)( doc ); - istackbase = lexer->istackbase; - lexer->istackbase = lexer->istacksize; -#if defined(ENABLE_DEBUG_LOG) - in_parse_table++; - SPRTF("Entering ParseTableTag %d...\n",in_parse_table); -#endif - - while ((node = TY_(GetToken)(doc, IgnoreWhitespace)) != NULL) - { - if (node->tag == table->tag ) - { - if (node->type == EndTag) - { - TY_(FreeNode)(doc, node); - } - else - { - /* Issue #498 - If a in a
- * just close the current table, and issue a - * warning. The previous action was to discard - * this second
- */ - TY_(UngetToken)(doc); - TY_(Report)(doc, table, node, TAG_NOT_ALLOWED_IN); - } - lexer->istackbase = istackbase; - table->closed = yes; -#if defined(ENABLE_DEBUG_LOG) - in_parse_table--; - SPRTF("Exit ParseTableTag 1 %d... EndTag\n",in_parse_table); -#endif - return NULL; - } - - /* deal with comments etc. */ - if (InsertMisc(table, node)) - continue; - - /* discard unknown tags */ - if (node->tag == NULL && node->type != TextNode) - { - TY_(Report)(doc, table, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - - /* if TD or TH or text or inline or block then infer */ - - if (node->type != EndTag) - { - if ( nodeIsTD(node) || nodeIsTH(node) || nodeIsTABLE(node) ) - { - TY_(UngetToken)( doc ); - node = TY_(InferredTag)(doc, TidyTag_TR); - TY_(Report)(doc, table, node, MISSING_STARTTAG); - } - else if ( TY_(nodeIsText)(node) ||TY_(nodeHasCM)(node,CM_BLOCK|CM_INLINE) ) - { - TY_(InsertNodeBeforeElement)(table, node); - TY_(Report)(doc, table, node, TAG_NOT_ALLOWED_IN); - lexer->exiled = yes; - - if (node->type != TextNode) - ParseTag(doc, node, IgnoreWhitespace); - - lexer->exiled = no; - continue; - } - else if (node->tag->model & CM_HEAD) - { - MoveToHead(doc, table, node); - continue; - } - } - - /* - if this is the end tag for an ancestor element - then infer end tag for this element - */ - if (node->type == EndTag) - { - if ( nodeIsFORM(node) ) - { - BadForm( doc ); - TY_(Report)(doc, table, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - - /* best to discard unexpected block/inline end tags */ - if ( TY_(nodeHasCM)(node, CM_TABLE|CM_ROW) || - TY_(nodeHasCM)(node, CM_BLOCK|CM_INLINE) ) - { - TY_(Report)(doc, table, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - - for ( parent = table->parent; - parent != NULL; - parent = parent->parent ) - { - if (node->tag == parent->tag) - { - TY_(Report)(doc, table, node, MISSING_ENDTAG_BEFORE ); - TY_(UngetToken)( doc ); - lexer->istackbase = istackbase; -#if defined(ENABLE_DEBUG_LOG) - in_parse_table--; - SPRTF("Exit ParseTableTag 2 %d... missing EndTag\n",in_parse_table); -#endif - return NULL; - } - } - } - - if (!(node->tag->model & CM_TABLE)) - { - TY_(UngetToken)( doc ); - TY_(Report)(doc, table, node, TAG_NOT_ALLOWED_IN); - lexer->istackbase = istackbase; -#if defined(ENABLE_DEBUG_LOG) - in_parse_table--; - SPRTF("Exit ParseTableTag 3 %d... CM_TABLE\n",in_parse_table); -#endif - return NULL; - } - - if (TY_(nodeIsElement)(node)) - { - TY_(InsertNodeAtEnd)(table, node); - ParseTag(doc, node, IgnoreWhitespace); - continue; - } - - /* discard unexpected text nodes and end tags */ - TY_(Report)(doc, table, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - } - - TY_(Report)(doc, table, node, MISSING_ENDTAG_FOR); - lexer->istackbase = istackbase; -#if defined(ENABLE_DEBUG_LOG) - in_parse_table--; - SPRTF("Exit ParseTableTag 4 %d... missing end\n",in_parse_table); -#endif - return NULL; -} - - -/** MARK: TY_(oldParsePre) - * Parses the `pre` tag. - */ -void* TY_(oldParsePre)( TidyDocImpl* doc, Node *pre, GetTokenMode ARG_UNUSED(mode) ) -{ - Node *node; - - if (pre->tag->model & CM_EMPTY) - return NULL; - - TY_(InlineDup)( doc, NULL ); /* tell lexer to insert inlines if needed */ - - while ((node = TY_(GetToken)(doc, Preformatted)) != NULL) - { - if ( node->type == EndTag && - (node->tag == pre->tag || DescendantOf(pre, TagId(node))) ) - { - if (nodeIsBODY(node) || nodeIsHTML(node)) - { - TY_(Report)(doc, pre, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)(doc, node); - continue; - } - if (node->tag == pre->tag) - { - TY_(FreeNode)(doc, node); - } - else - { - TY_(Report)(doc, pre, node, MISSING_ENDTAG_BEFORE ); - TY_(UngetToken)( doc ); - } - pre->closed = yes; - TrimSpaces(doc, pre); - return NULL; - } - - if (TY_(nodeIsText)(node)) - { - TY_(InsertNodeAtEnd)(pre, node); - continue; - } - - /* deal with comments etc. */ - if (InsertMisc(pre, node)) - continue; - - if (node->tag == NULL) - { - TY_(Report)(doc, pre, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)(doc, node); - continue; - } - - /* strip unexpected tags */ - if ( !PreContent(doc, node) ) - { - Node *newnode; - - /* fix for http://tidy.sf.net/bug/772205 */ - if (node->type == EndTag) - { - /* http://tidy.sf.net/issue/1590220 */ - if ( doc->lexer->exiled - && (TY_(nodeHasCM)(node, CM_TABLE) || nodeIsTABLE(node)) ) - { - TY_(UngetToken)(doc); - TrimSpaces(doc, pre); - return NULL; - } - - TY_(Report)(doc, pre, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)(doc, node); - continue; - } - /* http://tidy.sf.net/issue/1590220 */ - else if (TY_(nodeHasCM)(node, CM_TABLE|CM_ROW) - || nodeIsTABLE(node) ) - { - if (!doc->lexer->exiled) - /* No missing close warning if exiled. */ - TY_(Report)(doc, pre, node, MISSING_ENDTAG_BEFORE); - - TY_(UngetToken)(doc); - return NULL; - } - - /* - This is basically what Tidy 04 August 2000 did and far more accurate - with respect to browser behaivour than the code commented out above. - Tidy could try to propagate the
 into each disallowed child where
-              
 is allowed in order to replicate some browsers behaivour, but
-              there are a lot of exceptions, e.g. Internet Explorer does not propagate
-              
 into table cells while Mozilla does. Opera 6 never propagates
-              
 into blocklevel elements while Opera 7 behaves much like Mozilla.
-
-              Tidy behaves thus mostly like Opera 6 except for nested 
 elements
-              which are handled like Mozilla takes them (Opera6 closes all 
 after
-              the first 
). - - There are similar issues like replacing

in

 with 
, for - example - -
...

...

(Input) -
...
...
(Tidy) -
...
...
(Opera 7 and Internet Explorer) -
...

...
(Opera 6 and Mozilla) - -
...

...

...
(Input) -
...
......
(Tidy, BUG!) -
...
...
...
(Internet Explorer) -
...

...

...
(Mozilla, Opera 6) -
...
...

...
(Opera 7) - - or something similar, they could also be closing the
 and propagate
-              the 
 into the newly opened 

. - - Todo: IMG, OBJECT, APPLET, BIG, SMALL, SUB, SUP, FONT, and BASEFONT are - disallowed in

, Tidy neither detects this nor does it perform any
-              cleanup operation. Tidy should at least issue a warning if it encounters
-              such constructs.
-
-              Todo: discarding 

is abviously a bug, it should be replaced by
. - */ - TY_(InsertNodeAfterElement)(pre, node); - TY_(Report)(doc, pre, node, MISSING_ENDTAG_BEFORE); - ParseTag(doc, node, IgnoreWhitespace); - - newnode = TY_(InferredTag)(doc, TidyTag_PRE); - TY_(Report)(doc, pre, newnode, INSERTING_TAG); - pre = newnode; - TY_(InsertNodeAfterElement)(node, pre); - - continue; - } - - if ( nodeIsP(node) ) - { - if (node->type == StartTag) - { - TY_(Report)(doc, pre, node, USING_BR_INPLACE_OF); - - /* trim white space before

in

*/
-                TrimSpaces(doc, pre);
-
-                /* coerce both 

and

to
*/ - TY_(CoerceNode)(doc, node, TidyTag_BR, no, no); - TY_(FreeAttrs)( doc, node ); /* discard align attribute etc. */ - TY_(InsertNodeAtEnd)( pre, node ); - } - else - { - TY_(Report)(doc, pre, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - } - continue; - } - - if ( TY_(nodeIsElement)(node) ) - { - /* trim white space before
*/ - if ( nodeIsBR(node) ) - TrimSpaces(doc, pre); - - TY_(InsertNodeAtEnd)(pre, node); - ParseTag(doc, node, Preformatted); - continue; - } - - /* discard unexpected tags */ - TY_(Report)(doc, pre, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - } - - TY_(Report)(doc, pre, node, MISSING_ENDTAG_FOR); - return NULL; -} - - -/** MARK: TY_(oldParseOptGroup) - * Parses the `optgroup` tag. - */ -void* TY_(oldParseOptGroup)(TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(mode)) -{ - Lexer* lexer = doc->lexer; - Node *node; - - lexer->insert = NULL; /* defer implicit inline start tags */ - - while ((node = TY_(GetToken)(doc, IgnoreWhitespace)) != NULL) - { - if (node->tag == field->tag && node->type == EndTag) - { - TY_(FreeNode)( doc, node); - field->closed = yes; - TrimSpaces(doc, field); - return NULL; - } - - /* deal with comments etc. */ - if (InsertMisc(field, node)) - continue; - - if ( node->type == StartTag && - (nodeIsOPTION(node) || nodeIsOPTGROUP(node)) ) - { - if ( nodeIsOPTGROUP(node) ) - TY_(Report)(doc, field, node, CANT_BE_NESTED); - - TY_(InsertNodeAtEnd)(field, node); - ParseTag(doc, node, MixedContent); - continue; - } - - /* discard unexpected tags */ - TY_(Report)(doc, field, node, DISCARDING_UNEXPECTED ); - TY_(FreeNode)( doc, node); - } - return NULL; -} - - -/** MARK: TY_(oldParseSelect) - * Parses the `select` tag. - */ -void* TY_(oldParseSelect)(TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(mode)) -{ -#if defined(ENABLE_DEBUG_LOG) - static int in_parse_select = 0; -#endif - Lexer* lexer = doc->lexer; - Node *node; - - lexer->insert = NULL; /* defer implicit inline start tags */ -#if defined(ENABLE_DEBUG_LOG) - in_parse_select++; - SPRTF("Entering ParseSelect %d...\n",in_parse_select); -#endif - - while ((node = TY_(GetToken)(doc, IgnoreWhitespace)) != NULL) - { - if (node->tag == field->tag && node->type == EndTag) - { - TY_(FreeNode)( doc, node); - field->closed = yes; - TrimSpaces(doc, field); -#if defined(ENABLE_DEBUG_LOG) - in_parse_select--; - SPRTF("Exit ParseSelect 1 %d...\n",in_parse_select); -#endif - return NULL; - } - - /* deal with comments etc. */ - if (InsertMisc(field, node)) - continue; - - if ( node->type == StartTag && - ( nodeIsOPTION(node) || - nodeIsOPTGROUP(node) || - nodeIsDATALIST(node) || - nodeIsSCRIPT(node)) - ) - { - TY_(InsertNodeAtEnd)(field, node); - ParseTag(doc, node, IgnoreWhitespace); - continue; - } - - /* discard unexpected tags */ - TY_(Report)(doc, field, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - } - - TY_(Report)(doc, field, node, MISSING_ENDTAG_FOR); -#if defined(ENABLE_DEBUG_LOG) - in_parse_select--; - SPRTF("Exit ParseSelect 2 %d...\n",in_parse_select); -#endif - return NULL; -} - - -/** MARK: TY_(oldParseDataList) - * Parses the `datalist` tag. - */ -void* TY_(oldParseDatalist)(TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(mode)) -{ -#if defined(ENABLE_DEBUG_LOG) - static int in_parse_datalist = 0; -#endif - Lexer* lexer = doc->lexer; - Node *node; - - lexer->insert = NULL; /* defer implicit inline start tags */ -#if defined(ENABLE_DEBUG_LOG) - in_parse_datalist++; - SPRTF("Entering ParseDatalist %d...\n",in_parse_datalist); -#endif - - while ((node = TY_(GetToken)(doc, IgnoreWhitespace)) != NULL) - { - if (node->tag == field->tag && node->type == EndTag) - { - TY_(FreeNode)( doc, node); - field->closed = yes; - TrimSpaces(doc, field); -#if defined(ENABLE_DEBUG_LOG) - in_parse_datalist--; - SPRTF("Exit ParseDatalist 1 %d...\n",in_parse_datalist); -#endif - return NULL; - } - - /* deal with comments etc. */ - if (InsertMisc(field, node)) - continue; - - if ( node->type == StartTag && - ( nodeIsOPTION(node) || - nodeIsOPTGROUP(node) || - nodeIsDATALIST(node) || - nodeIsSCRIPT(node)) - ) - { - TY_(InsertNodeAtEnd)(field, node); - ParseTag(doc, node, IgnoreWhitespace); - continue; - } - - /* discard unexpected tags */ - TY_(Report)(doc, field, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - } - - TY_(Report)(doc, field, node, MISSING_ENDTAG_FOR); -#if defined(ENABLE_DEBUG_LOG) - in_parse_datalist--; - SPRTF("Exit ParseDatalist 2 %d...\n",in_parse_datalist); -#endif - return NULL; -} - - -/** MARK: TY_(oldParseText) - * Parses the `option` and `textarea` tags. - */ -void* TY_(oldParseText)(TidyDocImpl* doc, Node *field, GetTokenMode mode) -{ - Lexer* lexer = doc->lexer; - Node *node; - - lexer->insert = NULL; /* defer implicit inline start tags */ - - if ( nodeIsTEXTAREA(field) ) - mode = Preformatted; - else - mode = MixedContent; /* kludge for font tags */ - - while ((node = TY_(GetToken)(doc, mode)) != NULL) - { - if (node->tag == field->tag && node->type == EndTag) - { - TY_(FreeNode)( doc, node); - field->closed = yes; - TrimSpaces(doc, field); - return NULL; - } - - /* deal with comments etc. */ - if (InsertMisc(field, node)) - continue; - - if (TY_(nodeIsText)(node)) - { - /* only called for 1st child */ - if (field->content == NULL && !(mode & Preformatted)) - TrimSpaces(doc, field); - - if (node->start >= node->end) - { - TY_(FreeNode)( doc, node); - continue; - } - - TY_(InsertNodeAtEnd)(field, node); - continue; - } - - /* for textarea should all cases of < and & be escaped? */ - - /* discard inline tags e.g. font */ - if ( node->tag - && node->tag->model & CM_INLINE - && !(node->tag->model & CM_FIELD)) /* #487283 - fix by Lee Passey 25 Jan 02 */ - { - TY_(Report)(doc, field, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - - /* terminate element on other tags */ - if (!(field->tag->model & CM_OPT)) - TY_(Report)(doc, field, node, MISSING_ENDTAG_BEFORE); - - TY_(UngetToken)( doc ); - TrimSpaces(doc, field); - return NULL; - } - - if (!(field->tag->model & CM_OPT)) - TY_(Report)(doc, field, node, MISSING_ENDTAG_FOR); - return NULL; -} - - -/** MARK: TY_(oldParseTitle) - * Parses the `title` tag. - */ -void* TY_(oldParseTitle)(TidyDocImpl* doc, Node *title, GetTokenMode ARG_UNUSED(mode)) -{ - Node *node; - while ((node = TY_(GetToken)(doc, MixedContent)) != NULL) - { - if (node->tag == title->tag && node->type == StartTag - && cfgBool(doc, TidyCoerceEndTags) ) - { - TY_(Report)(doc, title, node, COERCE_TO_ENDTAG); - node->type = EndTag; - TY_(UngetToken)( doc ); - continue; - } - else if (node->tag == title->tag && node->type == EndTag) - { - TY_(FreeNode)( doc, node); - title->closed = yes; - TrimSpaces(doc, title); - return NULL; - } - - if (TY_(nodeIsText)(node)) - { - /* only called for 1st child */ - if (title->content == NULL) - TrimInitialSpace(doc, title, node); - - if (node->start >= node->end) - { - TY_(FreeNode)( doc, node); - continue; - } - - TY_(InsertNodeAtEnd)(title, node); - continue; - } - - /* deal with comments etc. */ - if (InsertMisc(title, node)) - continue; - - /* discard unknown tags */ - if (node->tag == NULL) - { - TY_(Report)(doc, title, node, DISCARDING_UNEXPECTED); - TY_(FreeNode)( doc, node); - continue; - } - - /* pushback unexpected tokens */ - TY_(Report)(doc, title, node, MISSING_ENDTAG_BEFORE); - TY_(UngetToken)( doc ); - TrimSpaces(doc, title); - return NULL; - } - - TY_(Report)(doc, title, node, MISSING_ENDTAG_FOR); - return NULL; -} - - -/** MARK: TY_(oldParseScript) - * Parses the `script` tag. - * - * @todo This isn't quite right for CDATA content as it recognises tags - * within the content and parses them accordingly. This will unfortunately - * screw up scripts which include: - * < + letter - * < + ! - * < + ? - * < + / + letter - */ -void* TY_(oldParseScript)(TidyDocImpl* doc, Node *script, GetTokenMode ARG_UNUSED(mode)) -{ - Node *node; - - doc->lexer->parent = script; - node = TY_(GetToken)(doc, CdataContent); - doc->lexer->parent = NULL; - - if (node) - { - TY_(InsertNodeAtEnd)(script, node); - } - else - { - /* handle e.g. a document like "