From 132fb352b16d2c96f91e7baa07bd5ec3b10faa4e Mon Sep 17 00:00:00 2001 From: Jim Derry Date: Mon, 16 Aug 2021 09:40:19 -0400 Subject: [PATCH] The XML Parser and XML Pretty Printer are now non-recursive. --- include/tidyplatform.h | 4 + .../cases/dev-cases/case-005.conf | 5 + .../cases/dev-cases/case-005@0.xml | 123 ++++++ .../case-005.txt} | 0 .../cases/dev-expects/case-005.xml | 102 +++++ .../cases/legacy-cases/case-480406.conf | 3 - .../cases/legacy-cases/case-480406@0.xml | 4 - .../cases/legacy-cases/case-634889.conf | 10 - .../cases/legacy-cases/case-634889@1.html | 9 - .../cases/legacy-cases/case-646946@0.xml | 6 - .../cases/legacy-expects/case-480406.xml | 3 - .../cases/legacy-expects/case-634889.html | 9 - .../cases/legacy-expects/case-634889.txt | 15 - .../cases/legacy-expects/case-646946.txt | 12 - .../cases/legacy-expects/case-646946.xml | 5 - src/parser.c | 376 ++++++++++-------- src/parser.h | 42 +- src/pprint.c | 217 ++++++---- src/tidylib.c | 16 +- 19 files changed, 639 insertions(+), 322 deletions(-) create mode 100755 regression_testing/cases/dev-cases/case-005.conf create mode 100644 regression_testing/cases/dev-cases/case-005@0.xml rename regression_testing/cases/{legacy-expects/case-480406.txt => dev-expects/case-005.txt} (100%) create mode 100644 regression_testing/cases/dev-expects/case-005.xml delete mode 100644 regression_testing/cases/legacy-cases/case-480406.conf delete mode 100644 regression_testing/cases/legacy-cases/case-480406@0.xml delete mode 100644 regression_testing/cases/legacy-cases/case-634889.conf delete mode 100644 regression_testing/cases/legacy-cases/case-634889@1.html delete mode 100644 regression_testing/cases/legacy-cases/case-646946@0.xml delete mode 100644 regression_testing/cases/legacy-expects/case-480406.xml delete mode 100644 regression_testing/cases/legacy-expects/case-634889.html delete mode 100644 regression_testing/cases/legacy-expects/case-634889.txt delete mode 100644 regression_testing/cases/legacy-expects/case-646946.txt delete mode 100644 regression_testing/cases/legacy-expects/case-646946.xml diff --git a/include/tidyplatform.h b/include/tidyplatform.h index 6fa7be4..2fca4c4 100644 --- a/include/tidyplatform.h +++ b/include/tidyplatform.h @@ -611,6 +611,10 @@ extern "C" { # define TIDY_THREAD_LOCAL __thread #endif +#ifndef TIDY_INDENTATION_LIMIT +# define TIDY_INDENTATION_LIMIT 50 +#endif + typedef unsigned char byte; typedef uint tchar; /* single, full character */ diff --git a/regression_testing/cases/dev-cases/case-005.conf b/regression_testing/cases/dev-cases/case-005.conf new file mode 100755 index 0000000..eaf1e60 --- /dev/null +++ b/regression_testing/cases/dev-cases/case-005.conf @@ -0,0 +1,5 @@ +# Config for test case. +tidy-mark: no +indent: yes +wrap: 999 +input-xml: yes diff --git a/regression_testing/cases/dev-cases/case-005@0.xml b/regression_testing/cases/dev-cases/case-005@0.xml new file mode 100644 index 0000000..3427ec2 --- /dev/null +++ b/regression_testing/cases/dev-cases/case-005@0.xml @@ -0,0 +1,123 @@ + + + + + Gambardella, Matthew + XML Developer's Guide + Computer + 44.95 + 2000-10-01 + An in-depth look at creating applications + with XML. + + + Ralls, Kim + Midnight Rain + Fantasy + 5.95 + 2000-12-16 + A former architect battles corporate zombies, + an evil sorceress, and her own childhood to become queen + of the world. + + + Corets, Eva + Maeve Ascendant + Fantasy + 5.95 + 2000-11-17 + After the collapse of a nanotechnology + society in England, the young survivors lay the + foundation for a new society. + + + Corets, Eva + Oberon's Legacy + Fantasy + 5.95 + 2001-03-10 + In post-apocalypse England, the mysterious + agent known only as Oberon helps to create a new life + for the inhabitants of London. Sequel to Maeve + Ascendant. + + + Corets, Eva + The Sundered Grail + Fantasy + 5.95 + 2001-09-10 + The two daughters of Maeve, half-sisters, + battle one another for control of England. Sequel to + Oberon's Legacy. + + + Randall, Cynthia + Lover Birds + Romance + 4.95 + 2000-09-02 + When Carla meets Paul at an ornithology + conference, tempers fly as feathers get ruffled. + + + Thurman, Paula + Splish Splash + Romance + 4.95 + 2000-11-02 + A deep sea diver finds true love twenty + thousand leagues beneath the sea. + + + Knorr, Stefan + Creepy Crawlies + Horror + 4.95 + 2000-12-06 + An anthology of horror stories about roaches, + centipedes, scorpions and other insects. + + + Kress, Peter + Paradox Lost + Science Fiction + 6.95 + 2000-11-02 + After an inadvertant trip through a Heisenberg + Uncertainty Device, James Salway discovers the problems + of being quantum. + + + O'Brien, Tim + Microsoft .NET: The Programming Bible + Computer + 36.95 + 2000-12-09 + Microsoft's .NET initiative is explored in + detail in this deep programmer's reference. + + + O'Brien, Tim + MSXML3: A Comprehensive Guide + Computer + 36.95 + 2000-12-01 + The Microsoft MSXML3 parser is covered in + detail, with attention to XML DOM interfaces, XSLT processing, + SAX and more. + + + Galos, Mike + Visual Studio 7: A Comprehensive Guide + Computer + 49.95 + 2001-04-16 + Microsoft Visual Studio 7 is explored in depth, + looking at how Visual Basic, Visual C++, C#, and ASP+ are + integrated into a comprehensive development + environment. + + diff --git a/regression_testing/cases/legacy-expects/case-480406.txt b/regression_testing/cases/dev-expects/case-005.txt similarity index 100% rename from regression_testing/cases/legacy-expects/case-480406.txt rename to regression_testing/cases/dev-expects/case-005.txt diff --git a/regression_testing/cases/dev-expects/case-005.xml b/regression_testing/cases/dev-expects/case-005.xml new file mode 100644 index 0000000..e6d2e71 --- /dev/null +++ b/regression_testing/cases/dev-expects/case-005.xml @@ -0,0 +1,102 @@ + + + + + Gambardella, Matthew + XML Developer's Guide + Computer + 44.95 + 2000-10-01 + An in-depth look at creating applications with XML. + + + Ralls, Kim + Midnight Rain + Fantasy + 5.95 + 2000-12-16 + A former architect battles corporate zombies, an evil sorceress, and her own childhood to become queen of the world. + + + Corets, Eva + Maeve Ascendant + Fantasy + 5.95 + 2000-11-17 + After the collapse of a nanotechnology society in England, the young survivors lay the foundation for a new society. + + + Corets, Eva + Oberon's Legacy + Fantasy + 5.95 + 2001-03-10 + In post-apocalypse England, the mysterious agent known only as Oberon helps to create a new life for the inhabitants of London. Sequel to Maeve Ascendant. + + + Corets, Eva + The Sundered Grail + Fantasy + 5.95 + 2001-09-10 + The two daughters of Maeve, half-sisters, battle one another for control of England. Sequel to Oberon's Legacy. + + + Randall, Cynthia + Lover Birds + Romance + 4.95 + 2000-09-02 + When Carla meets Paul at an ornithology conference, tempers fly as feathers get ruffled. + + + Thurman, Paula + Splish Splash + Romance + 4.95 + 2000-11-02 + A deep sea diver finds true love twenty thousand leagues beneath the sea. + + + Knorr, Stefan + Creepy Crawlies + Horror + 4.95 + 2000-12-06 + An anthology of horror stories about roaches, centipedes, scorpions and other insects. + + + Kress, Peter + Paradox Lost + Science Fiction + 6.95 + 2000-11-02 + After an inadvertant trip through a Heisenberg Uncertainty Device, James Salway discovers the problems of being quantum. + + + O'Brien, Tim + Microsoft .NET: The Programming Bible + Computer + 36.95 + 2000-12-09 + Microsoft's .NET initiative is explored in detail in this deep programmer's reference. + + + O'Brien, Tim + MSXML3: A Comprehensive Guide + Computer + 36.95 + 2000-12-01 + The Microsoft MSXML3 parser is covered in detail, with attention to XML DOM interfaces, XSLT processing, SAX and more. + + + Galos, Mike + Visual Studio 7: A Comprehensive Guide + Computer + 49.95 + 2001-04-16 + Microsoft Visual Studio 7 is explored in depth, looking at how Visual Basic, Visual C++, C#, and ASP+ are integrated into a comprehensive development environment. + + diff --git a/regression_testing/cases/legacy-cases/case-480406.conf b/regression_testing/cases/legacy-cases/case-480406.conf deleted file mode 100644 index 50bc5f5..0000000 --- a/regression_testing/cases/legacy-cases/case-480406.conf +++ /dev/null @@ -1,3 +0,0 @@ -// Tidy configuration file for bug #480406 -input-xml: yes -output-xml: yes diff --git a/regression_testing/cases/legacy-cases/case-480406@0.xml b/regression_testing/cases/legacy-cases/case-480406@0.xml deleted file mode 100644 index 63acee1..0000000 --- a/regression_testing/cases/legacy-cases/case-480406@0.xml +++ /dev/null @@ -1,4 +0,0 @@ - - - - diff --git a/regression_testing/cases/legacy-cases/case-634889.conf b/regression_testing/cases/legacy-cases/case-634889.conf deleted file mode 100644 index 6ca6d8d..0000000 --- a/regression_testing/cases/legacy-cases/case-634889.conf +++ /dev/null @@ -1,10 +0,0 @@ -tidy-mark: no -output-xml: yes -drop-proprietary-attributes: no -new-inline-tags: o:lock, o:p, v-f, v-formula, v-formulas, - v-imagedata, v-path, v-shape, v-shapetype, v-stroke -new-empty-tags: -new-blocklevel-tags: -new-pre-tags: -wrap-sections: no -drop-empty-paras: no diff --git a/regression_testing/cases/legacy-cases/case-634889@1.html b/regression_testing/cases/legacy-cases/case-634889@1.html deleted file mode 100644 index 1747b79..0000000 --- a/regression_testing/cases/legacy-cases/case-634889@1.html +++ /dev/null @@ -1,9 +0,0 @@ - - - [ 634889 ] Problem with <o:p> ms word tag - - -

Probably OK, now that ParseTagNames() is fixed.

- - - diff --git a/regression_testing/cases/legacy-cases/case-646946@0.xml b/regression_testing/cases/legacy-cases/case-646946@0.xml deleted file mode 100644 index 05d6fb5..0000000 --- a/regression_testing/cases/legacy-cases/case-646946@0.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - diff --git a/regression_testing/cases/legacy-expects/case-480406.xml b/regression_testing/cases/legacy-expects/case-480406.xml deleted file mode 100644 index fc8fb78..0000000 --- a/regression_testing/cases/legacy-expects/case-480406.xml +++ /dev/null @@ -1,3 +0,0 @@ - - - diff --git a/regression_testing/cases/legacy-expects/case-634889.html b/regression_testing/cases/legacy-expects/case-634889.html deleted file mode 100644 index 3157677..0000000 --- a/regression_testing/cases/legacy-expects/case-634889.html +++ /dev/null @@ -1,9 +0,0 @@ - - -[ 634889 ] Problem with <o:p> ms word tag - - -

Probably OK, now that ParseTagNames() is fixed.

- - - diff --git a/regression_testing/cases/legacy-expects/case-634889.txt b/regression_testing/cases/legacy-expects/case-634889.txt deleted file mode 100644 index 218cfe6..0000000 --- a/regression_testing/cases/legacy-expects/case-634889.txt +++ /dev/null @@ -1,15 +0,0 @@ -line 1 column 1 - Warning: missing declaration -line 7 column 3 - Warning: is not approved by W3C -Info: Document content looks like XHTML5 -Tidy found 2 warnings and 0 errors! - -About HTML Tidy: https://github.com/htacg/tidy-html5 -Bug reports and comments: https://github.com/htacg/tidy-html5/issues -Official mailing list: https://lists.w3.org/Archives/Public/public-htacg/ -Latest HTML specification: https://html.spec.whatwg.org/multipage/ -Validate your HTML documents: https://validator.w3.org/nu/ -Lobby your company to join the W3C: https://www.w3.org/Consortium - -Do you speak a language other than English, or a different variant of -English? Consider helping us to localize HTML Tidy. For details please see -https://github.com/htacg/tidy-html5/blob/master/README/LOCALIZE.md diff --git a/regression_testing/cases/legacy-expects/case-646946.txt b/regression_testing/cases/legacy-expects/case-646946.txt deleted file mode 100644 index 3425d35..0000000 --- a/regression_testing/cases/legacy-expects/case-646946.txt +++ /dev/null @@ -1,12 +0,0 @@ -No warnings or errors were found. - -About HTML Tidy: https://github.com/htacg/tidy-html5 -Bug reports and comments: https://github.com/htacg/tidy-html5/issues -Official mailing list: https://lists.w3.org/Archives/Public/public-htacg/ -Latest HTML specification: https://html.spec.whatwg.org/multipage/ -Validate your HTML documents: https://validator.w3.org/nu/ -Lobby your company to join the W3C: https://www.w3.org/Consortium - -Do you speak a language other than English, or a different variant of -English? Consider helping us to localize HTML Tidy. For details please see -https://github.com/htacg/tidy-html5/blob/master/README/LOCALIZE.md diff --git a/regression_testing/cases/legacy-expects/case-646946.xml b/regression_testing/cases/legacy-expects/case-646946.xml deleted file mode 100644 index bb88916..0000000 --- a/regression_testing/cases/legacy-expects/case-646946.xml +++ /dev/null @@ -1,5 +0,0 @@ - - - - diff --git a/src/parser.c b/src/parser.c index cb02e04..9844ab2 100644 --- a/src/parser.c +++ b/src/parser.c @@ -28,6 +28,14 @@ #define showingBodyOnly(doc) (cfgAutoBool(doc,TidyBodyOnly) == TidyYesState) ? yes : no +/****************************************************************************//* + ** MARK: - Forward Declarations + ***************************************************************************/ + + +static Node* ParseXMLElement(TidyDocImpl* doc, Node *element, GetTokenMode mode); + + /****************************************************************************//* ** MARK: - Node Operations ***************************************************************************/ @@ -858,7 +866,7 @@ static void growParserStack( TidyDocImpl* doc ) /** * Indicates whether or not the stack is empty. */ -static inline Bool isEmptyParserStack( TidyDocImpl* doc ) +Bool TY_(isEmptyParserStack)( TidyDocImpl* doc ) { return doc->stack.top < 0; } @@ -867,7 +875,7 @@ static inline Bool isEmptyParserStack( TidyDocImpl* doc ) /** * Peek at the parser memory. */ -static inline FUNC_UNUSED TidyParserMemory peekMemory( TidyDocImpl* doc ) +TidyParserMemory TY_(peekMemory)( TidyDocImpl* doc ) { return doc->stack.content[doc->stack.top]; } @@ -877,7 +885,7 @@ static inline FUNC_UNUSED TidyParserMemory peekMemory( TidyDocImpl* doc ) * Peek at the parser memory "identity" field. This is just a convenience * to avoid having to create a new struct instance in the caller. */ -static inline Parser* peekMemoryIdentity( TidyDocImpl* doc ) +Parser* TY_(peekMemoryIdentity)( TidyDocImpl* doc ) { return doc->stack.content[doc->stack.top].identity; } @@ -887,7 +895,7 @@ static inline Parser* peekMemoryIdentity( TidyDocImpl* doc ) * Peek at the parser memory "mode" field. This is just a convenience * to avoid having to create a new struct instance in the caller. */ -static GetTokenMode inline peekMemoryMode( TidyDocImpl* doc ) +GetTokenMode TY_(peekMemoryMode)( TidyDocImpl* doc ) { return doc->stack.content[doc->stack.top].mode; } @@ -896,12 +904,23 @@ static GetTokenMode inline peekMemoryMode( TidyDocImpl* doc ) /** * Pop out a parser memory. */ -static TidyParserMemory popMemory( TidyDocImpl* doc ) +TidyParserMemory TY_(popMemory)( TidyDocImpl* doc ) { - if ( !isEmptyParserStack( doc ) ) + if ( !TY_(isEmptyParserStack)( doc ) ) { TidyParserMemory data = doc->stack.content[doc->stack.top]; - DEBUG_LOG(SPRTF("\n<--POP %s pointed to is %p,\t memory is %p (size is %lu), depth is %i\n", data.reentry_node ? data.reentry_node->element : NULL, data.reentry_node, &doc->stack.content[doc->stack.top], sizeof(TidyParserMemory), doc->stack.top - 1 )); + DEBUG_LOG(SPRTF("\n" + "<--POP original: %s @ %p\n" + " reentry: %s @ %p\n" + " stack depth: %lu @ %p\n" + " register 1: %i\n" + " register 2: %i\n\n", + data.original_node ? data.original_node->element : "none", data.original_node, + data.reentry_node ? data.reentry_node->element : "none", data.reentry_node, + doc->stack.top, &doc->stack.content[doc->stack.top], + data.register_1, + data.register_2 + )); doc->stack.top = doc->stack.top - 1; return data; } @@ -913,7 +932,7 @@ static TidyParserMemory popMemory( TidyDocImpl* doc ) /** * Push the parser memory to the stack. */ -static void pushMemory( TidyDocImpl* doc, TidyParserMemory data ) +void TY_(pushMemory)( TidyDocImpl* doc, TidyParserMemory data ) { if ( doc->stack.top == doc->stack.size - 1 ) growParserStack( doc ); @@ -921,7 +940,18 @@ static void pushMemory( TidyDocImpl* doc, TidyParserMemory data ) doc->stack.top++; doc->stack.content[doc->stack.top] = data; - DEBUG_LOG(SPRTF("\n-->PUSH %s pointed to is %p,\t memory is %p (size is %lu), depth is %i\n", data.reentry_node ? data.reentry_node->element : NULL, data.reentry_node, &doc->stack.content[doc->stack.top], sizeof(TidyParserMemory), doc->stack.top )); + DEBUG_LOG(SPRTF("\n" + "-->PUSH original: %s @ %p\n" + " reentry: %s @ %p\n" + " stack depth: %lu @ %p\n" + " register 1: %i\n" + " register 2: %i\n\n", + data.original_node ? data.original_node->element : "none", data.original_node, + data.reentry_node ? data.reentry_node->element : "none", data.reentry_node, + doc->stack.top, &doc->stack.content[doc->stack.top], + data.register_1, + data.register_2 + )); } @@ -938,6 +968,9 @@ static Parser* GetParserForNode( TidyDocImpl* doc, Node *node ) { Lexer* lexer = doc->lexer; + if ( cfgBool( doc, TidyXmlTags ) ) + return ParseXMLElement; + /* [i_a]2 prevent crash for active content (php, asp) docs */ if (!node || node->tag == NULL) return NULL; @@ -1008,9 +1041,9 @@ void ParseHTMLWithNode( TidyDocImpl* doc, Node* node ) We weren't given a node, which means this particular leaf is bottomed out. We'll re-enter the parsers using information from the stack. */ - if ( !isEmptyParserStack(doc)) + if ( !TY_(isEmptyParserStack)(doc)) { - parser = peekMemoryIdentity(doc); + parser = TY_(peekMemoryIdentity)(doc); if (parser) { continue; @@ -1018,8 +1051,8 @@ void ParseHTMLWithNode( TidyDocImpl* doc, Node* node ) else { /* No parser means we're only passing back a parsing mode. */ - mode = peekMemoryMode( doc ); - popMemory( doc ); + mode = TY_(peekMemoryMode)( doc ); + TY_(popMemory)( doc ); } } @@ -1065,7 +1098,7 @@ Node* TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode ) if ( element == NULL ) { - TidyParserMemory memory = popMemory( doc ); + TidyParserMemory memory = TY_(popMemory)( doc ); node = memory.reentry_node; /* Throwaway, because the loop overwrites this immediately. */ mode = memory.reentry_mode; element = memory.original_node; @@ -1563,7 +1596,7 @@ Node* TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode ) memory.reentry_node = node; memory.reentry_mode = mode; memory.original_node = element; - pushMemory(doc, memory); + TY_(pushMemory)(doc, memory); DEBUG_LOG(SPRTF("<<element)); } return node; @@ -1621,11 +1654,11 @@ Node* TY_(ParseBody)( TidyDocImpl* doc, Node *body, GetTokenMode mode ) */ if ( body == NULL ) { - TidyParserMemory memory = popMemory( doc ); + TidyParserMemory memory = TY_(popMemory)( doc ); node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */ body = memory.original_node; - checkstack = memory.register_b_1; - iswhitenode = memory.register_b_2; + checkstack = memory.register_1; + iswhitenode = memory.register_2; mode = memory.mode; DEBUG_LOG(SPRTF(">>>Re-Enter ParseBody with %s\n", node->element)); } @@ -1691,10 +1724,10 @@ Node* TY_(ParseBody)( TidyDocImpl* doc, Node *body, GetTokenMode mode ) memory.identity = TY_(ParseBody); memory.original_node = body; memory.reentry_node = node; - memory.register_b_1 = checkstack; - memory.register_b_2 = iswhitenode; + memory.register_1 = checkstack; + memory.register_2 = iswhitenode; memory.mode = mode; - pushMemory( doc, memory ); + TY_(pushMemory)( doc, memory ); return node; } @@ -1907,10 +1940,10 @@ Node* TY_(ParseBody)( TidyDocImpl* doc, Node *body, GetTokenMode mode ) memory.identity = TY_(ParseBody); memory.original_node = body; memory.reentry_node = node; - memory.register_b_1 = checkstack; - memory.register_b_2 = iswhitenode; + memory.register_1 = checkstack; + memory.register_2 = iswhitenode; memory.mode = mode; - pushMemory( doc, memory ); + TY_(pushMemory)( doc, memory ); } DEBUG_LOG(SPRTF("<<element)); return node; @@ -1944,7 +1977,7 @@ Node* TY_(ParseColGroup)( TidyDocImpl* doc, Node *colgroup, GetTokenMode ARG_UNU */ if ( colgroup == NULL ) { - TidyParserMemory memory = popMemory( doc ); + TidyParserMemory memory = TY_(popMemory)( doc ); node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */ colgroup = memory.original_node; mode = memory.mode; @@ -2034,7 +2067,7 @@ Node* TY_(ParseColGroup)( TidyDocImpl* doc, Node *colgroup, GetTokenMode ARG_UNU memory.original_node = colgroup; memory.reentry_node = node; memory.mode = mode; - pushMemory( doc, memory ); + TY_(pushMemory)( doc, memory ); } DEBUG_LOG(SPRTF("<<element)); return node; @@ -2061,7 +2094,7 @@ Node* TY_(ParseDatalist)( TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED if ( field == NULL ) { - TidyParserMemory memory = popMemory( doc ); + TidyParserMemory memory = TY_(popMemory)( doc ); field = memory.original_node; node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */ DEBUG_LOG(SPRTF(">>>Re-Enter ParseDataList with %s\n", node->element)); @@ -2103,7 +2136,7 @@ Node* TY_(ParseDatalist)( TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED memory.reentry_mode = IgnoreWhitespace; TY_(InsertNodeAtEnd)(field, node); - pushMemory(doc, memory); + TY_(pushMemory)(doc, memory); return node; } @@ -2144,7 +2177,7 @@ Node* TY_(ParseDefList)( TidyDocImpl* doc, Node *list, GetTokenMode mode ) if ( list == NULL ) { - TidyParserMemory memory = popMemory( doc ); + TidyParserMemory memory = TY_(popMemory)( doc ); list = memory.original_node; node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */ state = memory.reentry_state; @@ -2272,7 +2305,7 @@ Node* TY_(ParseDefList)( TidyDocImpl* doc, Node *list, GetTokenMode mode ) memory.original_node = list; memory.reentry_node = node; memory.reentry_state = STATE_POST_NODEISCENTER; - pushMemory( doc, memory ); + TY_(pushMemory)( doc, memory ); DEBUG_LOG(SPRTF("<<element)); return node; } @@ -2315,7 +2348,7 @@ Node* TY_(ParseDefList)( TidyDocImpl* doc, Node *list, GetTokenMode mode ) memory.original_node = list; memory.reentry_node = node; memory.reentry_state = STATE_INITIAL; - pushMemory( doc, memory ); + TY_(pushMemory)( doc, memory ); DEBUG_LOG(SPRTF("<<element)); return node; } @@ -2402,7 +2435,7 @@ Node* TY_(ParseFrameSet)( TidyDocImpl* doc, Node *frameset, GetTokenMode ARG_UNU */ if ( frameset == NULL ) { - TidyParserMemory memory = popMemory( doc ); + TidyParserMemory memory = TY_(popMemory)( doc ); node = memory.reentry_node; /* Throwaway, because we replace it entering the loop. */ frameset = memory.original_node; DEBUG_LOG(SPRTF(">>>Re-Enter ParseFrameSet with %s\n", node->element)); @@ -2468,7 +2501,7 @@ Node* TY_(ParseFrameSet)( TidyDocImpl* doc, Node *frameset, GetTokenMode ARG_UNU memory.original_node = frameset; memory.reentry_node = node; memory.mode = MixedContent; - pushMemory( doc, memory ); + TY_(pushMemory)( doc, memory ); DEBUG_LOG(SPRTF("<<element)); return node; } @@ -2509,11 +2542,11 @@ Node* TY_(ParseHead)( TidyDocImpl* doc, Node *head, GetTokenMode ARG_UNUSED(mode if ( head == NULL ) { - TidyParserMemory memory = popMemory( doc ); + TidyParserMemory memory = TY_(popMemory)( doc ); head = memory.original_node; node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */ - HasTitle = memory.register_b_1; - HasBase = memory.register_b_2; + HasTitle = memory.register_1; + HasBase = memory.register_2; DEBUG_LOG(SPRTF(">>>Re-Enter ParseHead with %s\n", node->element)); } else @@ -2622,9 +2655,9 @@ Node* TY_(ParseHead)( TidyDocImpl* doc, Node *head, GetTokenMode ARG_UNUSED(mode memory.identity = TY_(ParseHead); memory.original_node = head; memory.reentry_node = node; - memory.register_b_1 = HasTitle; - memory.register_b_2 = HasBase; - pushMemory( doc, memory ); + memory.register_1 = HasTitle; + memory.register_2 = HasBase; + TY_(pushMemory)( doc, memory ); DEBUG_LOG(SPRTF("<<element)); return node; } @@ -2684,7 +2717,7 @@ Node* TY_(ParseHTML)( TidyDocImpl *doc, Node *html, GetTokenMode mode ) */ if ( html == NULL ) { - TidyParserMemory memory = popMemory( doc ); + TidyParserMemory memory = TY_(popMemory)( doc ); node = memory.reentry_node; mode = memory.reentry_mode; state = memory.reentry_state; @@ -2956,7 +2989,7 @@ Node* TY_(ParseHTML)( TidyDocImpl *doc, Node *html, GetTokenMode mode ) memory.reentry_mode = mode; memory.reentry_state = STATE_PARSE_HEAD_REENTER; TY_(InsertNodeAtEnd)(html, node); - pushMemory( doc, memory ); + TY_(pushMemory)( doc, memory ); DEBUG_LOG(SPRTF("<<element)); return node; } @@ -3693,7 +3726,7 @@ Node* TY_(ParseList)( TidyDocImpl* doc, Node *list, GetTokenMode ARG_UNUSED(mode if ( list == NULL ) { - TidyParserMemory memory = popMemory( doc ); + TidyParserMemory memory = TY_(popMemory)( doc ); list = memory.original_node; node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */ DEBUG_LOG(SPRTF(">>>Re-Enter ParseList with %s\n", node->element)); @@ -3859,7 +3892,7 @@ Node* TY_(ParseList)( TidyDocImpl* doc, Node *list, GetTokenMode ARG_UNUSED(mode memory.original_node = list; memory.reentry_node = node; memory.mode = IgnoreWhitespace; - pushMemory( doc, memory ); + TY_(pushMemory)( doc, memory ); DEBUG_LOG(SPRTF("<<element)); return node; } @@ -4041,11 +4074,11 @@ Node* TY_(ParseNoFrames)( TidyDocImpl* doc, Node *noframes, GetTokenMode mode ) */ if ( noframes == NULL ) { - TidyParserMemory memory = popMemory( doc ); + TidyParserMemory memory = TY_(popMemory)( doc ); node = memory.reentry_node; /* Throwaway, because we replace it entering the loop anyway.*/ noframes = memory.original_node; state = memory.reentry_state; - body_seen = memory.register_b_1; + body_seen = memory.register_1; DEBUG_LOG(SPRTF(">>>Re-Enter ParseNoFrames with %s\n", node->element)); } else @@ -4123,11 +4156,11 @@ Node* TY_(ParseNoFrames)( TidyDocImpl* doc, Node *noframes, GetTokenMode mode ) memory.original_node = noframes; memory.reentry_node = node; memory.reentry_state = STATE_POST_NODEISBODY; - memory.register_b_1 = lexer->seenEndBody; + memory.register_1 = lexer->seenEndBody; memory.mode = IgnoreWhitespace; TY_(InsertNodeAtEnd)(noframes, node); - pushMemory( doc, memory ); + TY_(pushMemory)( doc, memory ); DEBUG_LOG(SPRTF("<<element)); return node; } @@ -4168,7 +4201,7 @@ Node* TY_(ParseNoFrames)( TidyDocImpl* doc, Node *noframes, GetTokenMode mode ) memory.reentry_node = node; memory.mode = IgnoreWhitespace; /*MixedContent*/ memory.reentry_state = STATE_INITIAL; - pushMemory( doc, memory ); + TY_(pushMemory)( doc, memory ); DEBUG_LOG(SPRTF("<<element)); return node; } @@ -4220,7 +4253,7 @@ Node* TY_(ParseOptGroup)( TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED if ( field == NULL ) { - TidyParserMemory memory = popMemory( doc ); + TidyParserMemory memory = TY_(popMemory)( doc ); field = memory.original_node; node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */ DEBUG_LOG(SPRTF(">>>Re-Enter ParseOptGroup with %s\n", node->element)); @@ -4259,7 +4292,7 @@ Node* TY_(ParseOptGroup)( TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED memory.identity = TY_(ParseOptGroup); memory.original_node = field; memory.reentry_node = node; - pushMemory( doc, memory ); + TY_(pushMemory)( doc, memory ); DEBUG_LOG(SPRTF("<<element)); return node; } @@ -4293,7 +4326,7 @@ Node* TY_(ParsePre)( TidyDocImpl* doc, Node *pre, GetTokenMode ARG_UNUSED(mode) if ( pre == NULL ) { - TidyParserMemory memory = popMemory( doc ); + TidyParserMemory memory = TY_(popMemory)( doc ); pre = memory.original_node; node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */ state = memory.reentry_state; @@ -4446,7 +4479,7 @@ Node* TY_(ParsePre)( TidyDocImpl* doc, Node *pre, GetTokenMode ARG_UNUSED(mode) memory.original_node = pre; memory.reentry_node = node; memory.reentry_state = STATE_RENTRY_ACTION; - pushMemory( doc, memory ); + TY_(pushMemory)( doc, memory ); DEBUG_LOG(SPRTF("<<element)); return node; } @@ -4488,7 +4521,7 @@ Node* TY_(ParsePre)( TidyDocImpl* doc, Node *pre, GetTokenMode ARG_UNUSED(mode) memory.original_node = pre; memory.reentry_node = node; memory.reentry_state = STATE_INITIAL; - pushMemory( doc, memory ); + TY_(pushMemory)( doc, memory ); DEBUG_LOG(SPRTF("<<element)); return node; } @@ -4548,11 +4581,11 @@ Node* TY_(ParseRow)( TidyDocImpl* doc, Node *row, GetTokenMode ARG_UNUSED(mode) if ( row == NULL ) { - TidyParserMemory memory = popMemory( doc ); + TidyParserMemory memory = TY_(popMemory)( doc ); row = memory.original_node; node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */ state = memory.reentry_state; - exclude_state = memory.register_b_1; + exclude_state = memory.register_1; DEBUG_LOG(SPRTF(">>>Re-Enter ParseRow with %s\n", node->element)); } else @@ -4692,8 +4725,8 @@ Node* TY_(ParseRow)( TidyDocImpl* doc, Node *row, GetTokenMode ARG_UNUSED(mode) memory.original_node = row; memory.reentry_node = node; memory.reentry_state = STATE_POST_NOT_ENDTAG; - memory.register_b_1 = exclude_state; - pushMemory( doc, memory ); + memory.register_1 = exclude_state; + TY_(pushMemory)( doc, memory ); DEBUG_LOG(SPRTF("<<element)); return node; } @@ -4727,8 +4760,8 @@ Node* TY_(ParseRow)( TidyDocImpl* doc, Node *row, GetTokenMode ARG_UNUSED(mode) memory.original_node = row; memory.reentry_node = node; memory.reentry_state = STATE_POST_TD_TH; - memory.register_b_1 = exclude_state; - pushMemory( doc, memory ); + memory.register_1 = exclude_state; + TY_(pushMemory)( doc, memory ); DEBUG_LOG(SPRTF("<<element)); return node; } @@ -4792,7 +4825,7 @@ Node* TY_(ParseRowGroup)( TidyDocImpl* doc, Node *rowgroup, GetTokenMode ARG_UNU if ( rowgroup == NULL ) { - TidyParserMemory memory = popMemory( doc ); + TidyParserMemory memory = TY_(popMemory)( doc ); rowgroup = memory.original_node; node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */ state = memory.reentry_state; @@ -4887,7 +4920,7 @@ Node* TY_(ParseRowGroup)( TidyDocImpl* doc, Node *rowgroup, GetTokenMode ARG_UNU memory.original_node = rowgroup; memory.reentry_node = node; memory.reentry_state = STATE_POST_NOT_TEXTNODE; - pushMemory( doc, memory ); + TY_(pushMemory)( doc, memory ); DEBUG_LOG(SPRTF("<<element)); return node; } @@ -4973,7 +5006,7 @@ Node* TY_(ParseRowGroup)( TidyDocImpl* doc, Node *rowgroup, GetTokenMode ARG_UNU memory.original_node = rowgroup; memory.reentry_node = node; memory.reentry_state = STATE_INITIAL; - pushMemory( doc, memory ); + TY_(pushMemory)( doc, memory ); DEBUG_LOG(SPRTF("<<element)); return node; } break; @@ -5067,7 +5100,7 @@ Node* TY_(ParseSelect)( TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(m if ( field == NULL ) { - TidyParserMemory memory = popMemory( doc ); + TidyParserMemory memory = TY_(popMemory)( doc ); field = memory.original_node; node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */ DEBUG_LOG(SPRTF(">>>Re-Enter ParseSelect with %s\n", node->element)); @@ -5108,7 +5141,7 @@ Node* TY_(ParseSelect)( TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(m memory.reentry_node = node; TY_(InsertNodeAtEnd)(field, node); - pushMemory( doc, memory ); + TY_(pushMemory)( doc, memory ); DEBUG_LOG(SPRTF("<<element)); return node; } @@ -5144,10 +5177,10 @@ Node* TY_(ParseTableTag)( TidyDocImpl* doc, Node *table, GetTokenMode ARG_UNUSED if ( table == NULL ) { - TidyParserMemory memory = popMemory( doc ); + TidyParserMemory memory = TY_(popMemory)( doc ); node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */ table = memory.original_node; - lexer->exiled = memory.register_b_1; + lexer->exiled = memory.register_1; DEBUG_LOG(SPRTF(">>>Re-Enter ParseTableTag with %s\n", node->element)); } else @@ -5219,9 +5252,9 @@ Node* TY_(ParseTableTag)( TidyDocImpl* doc, Node *table, GetTokenMode ARG_UNUSED memory.identity = TY_(ParseTableTag); memory.original_node = table; memory.reentry_node = node; - memory.register_b_1 = no; /* later, lexer->exiled = no */ + memory.register_1 = no; /* later, lexer->exiled = no */ memory.mode = IgnoreWhitespace; - pushMemory( doc, memory ); + TY_(pushMemory)( doc, memory ); DEBUG_LOG(SPRTF("<<element)); return node; } @@ -5292,8 +5325,8 @@ Node* TY_(ParseTableTag)( TidyDocImpl* doc, Node *table, GetTokenMode ARG_UNUSED memory.identity = TY_(ParseTableTag); memory.original_node = table; memory.reentry_node = node; - memory.register_b_1 = lexer->exiled; - pushMemory( doc, memory ); + memory.register_1 = lexer->exiled; + TY_(pushMemory)( doc, memory ); DEBUG_LOG(SPRTF("<<element)); return node; } @@ -5457,6 +5490,116 @@ Node* TY_(ParseTitle)( TidyDocImpl* doc, Node *title, GetTokenMode ARG_UNUSED(mo } +/** MARK: ParseXMLElement + * Parses the given XML element. + */ +static Node* ParseXMLElement(TidyDocImpl* doc, Node *element, GetTokenMode mode) +{ + Lexer* lexer = doc->lexer; + Node *node; + + if ( element == NULL ) + { + TidyParserMemory memory = TY_(popMemory)( doc ); + element = memory.original_node; + node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */ + mode = memory.reentry_mode; + TY_(InsertNodeAtEnd)(element, node); /* The only re-entry action needed. */ + } + else + { + /* if node is pre or has xml:space="preserve" then do so */ + if ( TY_(XMLPreserveWhiteSpace)(doc, element) ) + mode = Preformatted; + + /* deal with comments etc. */ + InsertMisc( &doc->root, element); + + /* we shouldn't have plain text at this point. */ + if (TY_(nodeIsText)(element)) + { + TY_(Report)(doc, &doc->root, element, DISCARDING_UNEXPECTED); + TY_(FreeNode)( doc, element); + return NULL; + } + } + while ((node = TY_(GetToken)(doc, mode)) != NULL) + { + if (node->type == EndTag && + node->element && element->element && + TY_(tmbstrcmp)(node->element, element->element) == 0) + { + TY_(FreeNode)( doc, node); + element->closed = yes; + break; + } + + /* discard unexpected end tags */ + if (node->type == EndTag) + { + if (element) + TY_(Report)(doc, element, node, UNEXPECTED_ENDTAG_IN); + else + TY_(Report)(doc, element, node, UNEXPECTED_ENDTAG_ERR); + + TY_(FreeNode)( doc, node); + continue; + } + + /* parse content on seeing start tag */ + if (node->type == StartTag) + { + TidyParserMemory memory = {0}; + memory.identity = ParseXMLElement; + memory.original_node = element; + memory.reentry_node = node; + memory.reentry_mode = mode; + TY_(pushMemory)( doc, memory ); + return node; + } + + TY_(InsertNodeAtEnd)(element, node); + } /* while */ + + /* + if first child is text then trim initial space and + delete text node if it is empty. + */ + + node = element->content; + + if (TY_(nodeIsText)(node) && mode != Preformatted) + { + if ( lexer->lexbuf[node->start] == ' ' ) + { + node->start++; + + if (node->start >= node->end) + TY_(DiscardElement)( doc, node ); + } + } + + /* + if last child is text then trim final space and + delete the text node if it is empty + */ + + node = element->last; + + if (TY_(nodeIsText)(node) && mode != Preformatted) + { + if ( lexer->lexbuf[node->end - 1] == ' ' ) + { + node->end--; + + if (node->start >= node->end) + TY_(DiscardElement)( doc, node ); + } + } + return NULL; +} + + /***************************************************************************//* ** MARK: - Post-Parse Operations ***************************************************************************/ @@ -6101,87 +6244,6 @@ void TY_(ParseDocument)(TidyDocImpl* doc) } -/** MARK: TY_(ParseXMLElement) - * Parses the given XML element. - */ -static void ParseXMLElement(TidyDocImpl* doc, Node *element, GetTokenMode mode) -{ - Lexer* lexer = doc->lexer; - Node *node; - - /* if node is pre or has xml:space="preserve" then do so */ - - if ( TY_(XMLPreserveWhiteSpace)(doc, element) ) - mode = Preformatted; - - while ((node = TY_(GetToken)(doc, mode)) != NULL) - { - if (node->type == EndTag && - node->element && element->element && - TY_(tmbstrcmp)(node->element, element->element) == 0) - { - TY_(FreeNode)( doc, node); - element->closed = yes; - break; - } - - /* discard unexpected end tags */ - if (node->type == EndTag) - { - if (element) - TY_(Report)(doc, element, node, UNEXPECTED_ENDTAG_IN); - else - TY_(Report)(doc, element, node, UNEXPECTED_ENDTAG_ERR); - - TY_(FreeNode)( doc, node); - continue; - } - - /* parse content on seeing start tag */ - if (node->type == StartTag) - ParseXMLElement( doc, node, mode ); - - TY_(InsertNodeAtEnd)(element, node); - } - - /* - if first child is text then trim initial space and - delete text node if it is empty. - */ - - node = element->content; - - if (TY_(nodeIsText)(node) && mode != Preformatted) - { - if ( lexer->lexbuf[node->start] == ' ' ) - { - node->start++; - - if (node->start >= node->end) - TY_(DiscardElement)( doc, node ); - } - } - - /* - if last child is text then trim final space and - delete the text node if it is empty - */ - - node = element->last; - - if (TY_(nodeIsText)(node) && mode != Preformatted) - { - if ( lexer->lexbuf[node->end - 1] == ' ' ) - { - node->end--; - - if (node->start >= node->end) - TY_(DiscardElement)( doc, node ); - } - } -} - - /** MARK: TY_(ParseXMLDocument) * Parses the document using Tidy's XML parser. */ @@ -6232,7 +6294,7 @@ void TY_(ParseXMLDocument)(TidyDocImpl* doc) if (node->type == StartTag) { TY_(InsertNodeAtEnd)( &doc->root, node ); - ParseXMLElement( doc, node, IgnoreWhitespace ); + ParseHTMLWithNode( doc, node ); continue; } diff --git a/src/parser.h b/src/parser.h index 0ccec79..8980372 100644 --- a/src/parser.h +++ b/src/parser.h @@ -55,8 +55,8 @@ typedef struct _TidyParserMemory GetTokenMode reentry_mode; /**< The token mode to use when re-entering. */ int reentry_state; /**< State to set during re-entry. Defined locally in each parser. */ GetTokenMode mode; /**< The caller will peek at this value to get the correct mode. */ - Bool register_b_1; /**< Local variable storage. */ - Bool register_b_2; /**< Local variable storage. */ + int register_1; /**< Local variable storage. */ + int register_2; /**< Local variable storage. */ } TidyParserMemory; @@ -86,6 +86,44 @@ void TY_(InitParserStack)( TidyDocImpl* doc ); void TY_(FreeParserStack)( TidyDocImpl* doc ); +/** + * Indicates whether or not the stack is empty. + */ +Bool TY_(isEmptyParserStack)( TidyDocImpl* doc ); + + +/** + * Peek at the parser memory. + */ +TidyParserMemory TY_(peekMemory)( TidyDocImpl* doc ); + + +/** + * Peek at the parser memory "identity" field. This is just a convenience + * to avoid having to create a new struct instance in the caller. + */ +Parser* TY_(peekMemoryIdentity)( TidyDocImpl* doc ); + + +/** + * Peek at the parser memory "mode" field. This is just a convenience + * to avoid having to create a new struct instance in the caller. + */ +GetTokenMode TY_(peekMemoryMode)( TidyDocImpl* doc ); + + +/** + * Pop out a parser memory. + */ +TidyParserMemory TY_(popMemory)( TidyDocImpl* doc ); + + +/** + * Push the parser memory to the stack. + */ +void TY_(pushMemory)( TidyDocImpl* doc, TidyParserMemory data ); + + /** * Is used to perform a node integrity check recursively after parsing * an HTML or XML document. diff --git a/src/pprint.c b/src/pprint.c index c433db3..e123c34 100644 --- a/src/pprint.c +++ b/src/pprint.c @@ -16,6 +16,7 @@ #include "entities.h" #include "tmbstr.h" #include "utf8.h" +#include "sprtf.h" /* *** FOR DEBUG ONLY *** */ /* #define DEBUG_PPRINT */ @@ -2330,102 +2331,152 @@ void TY_(PPrintTree)( TidyDocImpl* doc, uint mode, uint indent, Node *node ) } } + void TY_(PPrintXMLTree)( TidyDocImpl* doc, uint mode, uint indent, Node *node ) { Bool xhtmlOut = cfgBool( doc, TidyXhtmlOut ); - if (node == NULL) - return; - - if (doc->progressCallback) - { - doc->progressCallback( tidyImplToDoc(doc), node->line, node->column, doc->pprint.line + 1 ); - } + Node* next = NULL; - if ( node->type == TextNode) + while ( node ) { - PPrintText( doc, mode, indent, node ); - } - else if ( node->type == CommentTag ) - { - PCondFlushLineSmart( doc, indent ); - PPrintComment( doc, indent, node); - /* PCondFlushLine( doc, 0 ); */ - } - else if ( node->type == RootNode ) - { - Node *content; - for ( content = node->content; - content != NULL; - content = content->next ) - TY_(PPrintXMLTree)( doc, mode, indent, content ); - } - else if ( node->type == DocTypeTag ) - PPrintDocType( doc, indent, node ); - else if ( node->type == ProcInsTag ) - PPrintPI( doc, indent, node ); - else if ( node->type == XmlDecl ) - PPrintXmlDecl( doc, indent, node ); - else if ( node->type == CDATATag ) - PPrintCDATA( doc, indent, node ); - else if ( node->type == SectionTag ) - PPrintSection( doc, indent, node ); - else if ( node->type == AspTag ) - PPrintAsp( doc, indent, node ); - else if ( node->type == JsteTag) - PPrintJste( doc, indent, node ); - else if ( node->type == PhpTag) - PPrintPhp( doc, indent, node ); - else if ( TY_(nodeHasCM)(node, CM_EMPTY) || - (node->type == StartEndTag && !xhtmlOut) ) - { - PCondFlushLineSmart( doc, indent ); - PPrintTag( doc, mode, indent, node ); - /* TY_(PFlushLine)( doc, indent ); */ - } - else /* some kind of container element */ - { - uint spaces = cfg( doc, TidyIndentSpaces ); - Node *content; - Bool mixed = no; - uint cindent; - - for ( content = node->content; content; content = content->next ) + next = node->next; + + if (doc->progressCallback) { - if ( TY_(nodeIsText)(content) ) + doc->progressCallback( tidyImplToDoc(doc), node->line, node->column, doc->pprint.line + 1 ); + } + + if ( node->type == TextNode) + { + PPrintText( doc, mode, indent, node ); + } + else if ( node->type == RootNode ) + { + if (node->content) + node = node->content; + continue; + } + else if ( node->type == CommentTag ) + { + PCondFlushLineSmart( doc, indent ); + PPrintComment( doc, indent, node); + /* PCondFlushLine( doc, 0 ); */ + } + else if ( node->type == DocTypeTag ) + PPrintDocType( doc, indent, node ); + else if ( node->type == ProcInsTag ) + PPrintPI( doc, indent, node ); + else if ( node->type == XmlDecl ) + PPrintXmlDecl( doc, indent, node ); + else if ( node->type == CDATATag ) + PPrintCDATA( doc, indent, node ); + else if ( node->type == SectionTag ) + PPrintSection( doc, indent, node ); + else if ( node->type == AspTag ) + PPrintAsp( doc, indent, node ); + else if ( node->type == JsteTag) + PPrintJste( doc, indent, node ); + else if ( node->type == PhpTag) + PPrintPhp( doc, indent, node ); + else if ( TY_(nodeHasCM)(node, CM_EMPTY) || + (node->type == StartEndTag && !xhtmlOut) ) + { + PCondFlushLineSmart( doc, indent ); + PPrintTag( doc, mode, indent, node ); + /* TY_(PFlushLine)( doc, indent ); */ + } + else if ( node->type != RootNode ) /* some kind of container element */ + { + TidyParserMemory memory = {0}; + uint spaces = cfg( doc, TidyIndentSpaces ); + Node *content; + Bool mixed = no; + uint cindent; + + for ( content = node->content; content; content = content->next ) { - mixed = yes; - break; + if ( TY_(nodeIsText)(content) ) + { + mixed = yes; + break; + } } - } - PCondFlushLineSmart( doc, indent ); - - if ( TY_(XMLPreserveWhiteSpace)(doc, node) ) - { - indent = 0; - mixed = no; - cindent = 0; - } - else if (mixed) - cindent = indent; - else - cindent = indent + spaces; - - PPrintTag( doc, mode, indent, node ); - if ( !mixed && node->content ) - TY_(PFlushLineSmart)( doc, cindent ); - - for ( content = node->content; content; content = content->next ) - TY_(PPrintXMLTree)( doc, mode, cindent, content ); - - if ( !mixed && node->content ) PCondFlushLineSmart( doc, indent ); - PPrintEndTag( doc, mode, indent, node ); - /* PCondFlushLine( doc, indent ); */ - } + if ( TY_(XMLPreserveWhiteSpace)(doc, node) ) + { + indent = 0; + mixed = no; + cindent = 0; + } + else if (mixed) + cindent = indent; + else + cindent = indent + spaces; + + PPrintTag( doc, mode, indent, node ); + if ( !mixed && node->content ) + TY_(PFlushLineSmart)( doc, cindent ); + + memory.original_node = node; + memory.reentry_node = next; + memory.register_1 = mixed; + memory.register_2 = indent; + TY_(pushMemory)(doc, memory); + + /* Prevent infinite indentation. Seriously, at what point is + anyone going to read a file with infinite indentation? It + slows down rendering for arbitrarily-deep test cases that + are only meant to crash Tidy in the first place. Let's + consider whether to remove this limitation, lower it, + increase it, or add a new configuration option to control + it, or even emit an info-level message about it. + */ + if (indent < TIDY_INDENTATION_LIMIT * spaces) + indent = cindent; + + if (node->content) + { + node = node->content; + continue; + } + } + + if (next) + { + node = next; + continue; + } + + if ( TY_(isEmptyParserStack)(doc) == no ) + { + /* It's possible that the reentry_node is null, because we + only pushed this record as a marker for the end tag while + there was no next node. Thus the loop will pop until we have + what we need. This also closes multiple end tags. + */ + do { + TidyParserMemory memory = TY_(popMemory)(doc); + Node* close_node = memory.original_node; + Bool mixed = memory.register_1; + indent = memory.register_2; + + if ( !mixed && close_node->content ) + PCondFlushLineSmart( doc, indent ); + + PPrintEndTag( doc, mode, indent, close_node ); + /* PCondFlushLine( doc, indent ); */ + + node = memory.reentry_node; + } while ( node == NULL && TY_(isEmptyParserStack)(doc) == no ); + continue;; + } + node = NULL; + } /* while */ } + /* * local variables: * mode: c diff --git a/src/tidylib.c b/src/tidylib.c index 5907a96..48cf2cc 100644 --- a/src/tidylib.c +++ b/src/tidylib.c @@ -2048,16 +2048,24 @@ void dbg_show_node( TidyDocImpl* doc, Node *node, int caller, int indent ) SPRTF("\n"); } -void dbg_show_all_nodes( TidyDocImpl* doc, Node *node, int indent ) +/* Tail recursion here with sensible compilers will re-use + the stack frame and avoid overflows during debugging. + */ +void dbg_show_all_nodes_loop( TidyDocImpl* doc, Node *node, int indent ) { - while (node) + while ( node && (node = node->next) ) { dbg_show_node( doc, node, 0, indent ); - dbg_show_all_nodes( doc, node->content, indent + 1 ); - node = node->next; + dbg_show_all_nodes_loop( doc, node->content, indent + 1 ); } } +void dbg_show_all_nodes( TidyDocImpl* doc, Node *node, int indent ) +{ + dbg_show_node( doc, node, 0, indent ); + dbg_show_all_nodes_loop( doc, node->content, indent + 1 ); +} + #endif int tidyDocCleanAndRepair( TidyDocImpl* doc )