Merge pull request #988 from htacg/xml_recurse

The XML Parser and XML Pretty Printer are now non-recursive.
2021-08-17 07:32:07 -04:00 · 2021-08-17 07:32:07 -04:00 · 845e55e5d4
parent 71ff9a7a8a 132fb352b1
commit 845e55e5d4
19 changed files with 639 additions and 322 deletions
--- a/include/tidyplatform.h
+++ b/include/tidyplatform.h
@ -611,6 +611,10 @@ extern "C" {
 #  define TIDY_THREAD_LOCAL __thread
 #endif

+#ifndef TIDY_INDENTATION_LIMIT
+#  define TIDY_INDENTATION_LIMIT 50
+#endif
+
 typedef unsigned char byte;

 typedef uint tchar;         /* single, full character */
--- a/regression_testing/cases/dev-cases/case-005.conf
+++ b/regression_testing/cases/dev-cases/case-005.conf
@ -0,0 +1,5 @@
+# Config for test case.
+tidy-mark: no
+indent: yes
+wrap: 999
+input-xml: yes
--- a/regression_testing/cases/dev-cases/case-005@0.xml
+++ b/regression_testing/cases/dev-cases/case-005@0.xml
@ -0,0 +1,123 @@
+<!--
+This is a sample XML file.
+ -->
+<?xml version="1.0"?>
+<catalog>
+   <book id="bk101">
+      <author>Gambardella, Matthew</author>
+      <title>XML Developer's Guide</title>
+      <genre>Computer</genre>
+      <price>44.95</price>
+      <publish_date>2000-10-01</publish_date>
+      <description>An in-depth look at creating applications
+      with XML.</description>
+   </book>
+   <book id="bk102">
+      <author>Ralls, Kim</author>
+      <title>Midnight Rain</title>
+      <genre>Fantasy</genre>
+      <price>5.95</price>
+      <publish_date>2000-12-16</publish_date>
+      <description>A former architect battles corporate zombies,
+      an evil sorceress, and her own childhood to become queen
+      of the world.</description>
+   </book>
+   <book id="bk103">
+      <author>Corets, Eva</author>
+      <title>Maeve Ascendant</title>
+      <genre>Fantasy</genre>
+      <price>5.95</price>
+      <publish_date>2000-11-17</publish_date>
+      <description>After the collapse of a nanotechnology
+      society in England, the young survivors lay the
+      foundation for a new society.</description>
+   </book>
+   <book id="bk104">
+      <author>Corets, Eva</author>
+      <title>Oberon's Legacy</title>
+      <genre>Fantasy</genre>
+      <price>5.95</price>
+      <publish_date>2001-03-10</publish_date>
+      <description>In post-apocalypse England, the mysterious
+      agent known only as Oberon helps to create a new life
+      for the inhabitants of London. Sequel to Maeve
+      Ascendant.</description>
+   </book>
+   <book id="bk105">
+      <author>Corets, Eva</author>
+      <title>The Sundered Grail</title>
+      <genre>Fantasy</genre>
+      <price>5.95</price>
+      <publish_date>2001-09-10</publish_date>
+      <description>The two daughters of Maeve, half-sisters,
+      battle one another for control of England. Sequel to
+      Oberon's Legacy.</description>
+   </book>
+   <book id="bk106">
+      <author>Randall, Cynthia</author>
+      <title>Lover Birds</title>
+      <genre>Romance</genre>
+      <price>4.95</price>
+      <publish_date>2000-09-02</publish_date>
+      <description>When Carla meets Paul at an ornithology
+      conference, tempers fly as feathers get ruffled.</description>
+   </book>
+   <book id="bk107">
+      <author>Thurman, Paula</author>
+      <title>Splish Splash</title>
+      <genre>Romance</genre>
+      <price>4.95</price>
+      <publish_date>2000-11-02</publish_date>
+      <description>A deep sea diver finds true love twenty
+      thousand leagues beneath the sea.</description>
+   </book>
+   <book id="bk108">
+      <author>Knorr, Stefan</author>
+      <title>Creepy Crawlies</title>
+      <genre>Horror</genre>
+      <price>4.95</price>
+      <publish_date>2000-12-06</publish_date>
+      <description>An anthology of horror stories about roaches,
+      centipedes, scorpions  and other insects.</description>
+   </book>
+   <book id="bk109">
+      <author>Kress, Peter</author>
+      <title>Paradox Lost</title>
+      <genre>Science Fiction</genre>
+      <price>6.95</price>
+      <publish_date>2000-11-02</publish_date>
+      <description>After an inadvertant trip through a Heisenberg
+      Uncertainty Device, James Salway discovers the problems
+      of being quantum.</description>
+   </book>
+   <book id="bk110">
+      <author>O'Brien, Tim</author>
+      <title>Microsoft .NET: The Programming Bible</title>
+      <genre>Computer</genre>
+      <price>36.95</price>
+      <publish_date>2000-12-09</publish_date>
+      <description>Microsoft's .NET initiative is explored in
+      detail in this deep programmer's reference.</description>
+   </book>
+   <book id="bk111">
+      <author>O'Brien, Tim</author>
+      <title>MSXML3: A Comprehensive Guide</title>
+      <genre>Computer</genre>
+      <price>36.95</price>
+      <publish_date>2000-12-01</publish_date>
+      <description>The Microsoft MSXML3 parser is covered in
+      detail, with attention to XML DOM interfaces, XSLT processing,
+      SAX and more.</description>
+   </book>
+   <book id="bk112">
+      <author>Galos, Mike</author>
+      <title>Visual Studio 7: A Comprehensive Guide</title>
+      <genre>Computer</genre>
+      <price>49.95</price>
+      <publish_date>2001-04-16</publish_date>
+      <description>Microsoft Visual Studio 7 is explored in depth,
+      looking at how Visual Basic, Visual C++, C#, and ASP+ are
+      integrated into a comprehensive development
+      environment.</description>
+   </book>
+</catalog>
--- a/regression_testing/cases/legacy-expects/case-480406.txt
+++ b/regression_testing/cases/legacy-expects/case-480406.txt
--- a/regression_testing/cases/dev-expects/case-005.xml
+++ b/regression_testing/cases/dev-expects/case-005.xml
@ -0,0 +1,102 @@
+<?xml version="1.0"?>
+<!--
+This is a sample XML file.
+ -->
+<catalog>
+  <book id="bk101">
+    <author>Gambardella, Matthew</author>
+    <title>XML Developer's Guide</title>
+    <genre>Computer</genre>
+    <price>44.95</price>
+    <publish_date>2000-10-01</publish_date>
+    <description>An in-depth look at creating applications with XML.</description>
+  </book>
+  <book id="bk102">
+    <author>Ralls, Kim</author>
+    <title>Midnight Rain</title>
+    <genre>Fantasy</genre>
+    <price>5.95</price>
+    <publish_date>2000-12-16</publish_date>
+    <description>A former architect battles corporate zombies, an evil sorceress, and her own childhood to become queen of the world.</description>
+  </book>
+  <book id="bk103">
+    <author>Corets, Eva</author>
+    <title>Maeve Ascendant</title>
+    <genre>Fantasy</genre>
+    <price>5.95</price>
+    <publish_date>2000-11-17</publish_date>
+    <description>After the collapse of a nanotechnology society in England, the young survivors lay the foundation for a new society.</description>
+  </book>
+  <book id="bk104">
+    <author>Corets, Eva</author>
+    <title>Oberon's Legacy</title>
+    <genre>Fantasy</genre>
+    <price>5.95</price>
+    <publish_date>2001-03-10</publish_date>
+    <description>In post-apocalypse England, the mysterious agent known only as Oberon helps to create a new life for the inhabitants of London. Sequel to Maeve Ascendant.</description>
+  </book>
+  <book id="bk105">
+    <author>Corets, Eva</author>
+    <title>The Sundered Grail</title>
+    <genre>Fantasy</genre>
+    <price>5.95</price>
+    <publish_date>2001-09-10</publish_date>
+    <description>The two daughters of Maeve, half-sisters, battle one another for control of England. Sequel to Oberon's Legacy.</description>
+  </book>
+  <book id="bk106">
+    <author>Randall, Cynthia</author>
+    <title>Lover Birds</title>
+    <genre>Romance</genre>
+    <price>4.95</price>
+    <publish_date>2000-09-02</publish_date>
+    <description>When Carla meets Paul at an ornithology conference, tempers fly as feathers get ruffled.</description>
+  </book>
+  <book id="bk107">
+    <author>Thurman, Paula</author>
+    <title>Splish Splash</title>
+    <genre>Romance</genre>
+    <price>4.95</price>
+    <publish_date>2000-11-02</publish_date>
+    <description>A deep sea diver finds true love twenty thousand leagues beneath the sea.</description>
+  </book>
+  <book id="bk108">
+    <author>Knorr, Stefan</author>
+    <title>Creepy Crawlies</title>
+    <genre>Horror</genre>
+    <price>4.95</price>
+    <publish_date>2000-12-06</publish_date>
+    <description>An anthology of horror stories about roaches, centipedes, scorpions and other insects.</description>
+  </book>
+  <book id="bk109">
+    <author>Kress, Peter</author>
+    <title>Paradox Lost</title>
+    <genre>Science Fiction</genre>
+    <price>6.95</price>
+    <publish_date>2000-11-02</publish_date>
+    <description>After an inadvertant trip through a Heisenberg Uncertainty Device, James Salway discovers the problems of being quantum.</description>
+  </book>
+  <book id="bk110">
+    <author>O'Brien, Tim</author>
+    <title>Microsoft .NET: The Programming Bible</title>
+    <genre>Computer</genre>
+    <price>36.95</price>
+    <publish_date>2000-12-09</publish_date>
+    <description>Microsoft's .NET initiative is explored in detail in this deep programmer's reference.</description>
+  </book>
+  <book id="bk111">
+    <author>O'Brien, Tim</author>
+    <title>MSXML3: A Comprehensive Guide</title>
+    <genre>Computer</genre>
+    <price>36.95</price>
+    <publish_date>2000-12-01</publish_date>
+    <description>The Microsoft MSXML3 parser is covered in detail, with attention to XML DOM interfaces, XSLT processing, SAX and more.</description>
+  </book>
+  <book id="bk112">
+    <author>Galos, Mike</author>
+    <title>Visual Studio 7: A Comprehensive Guide</title>
+    <genre>Computer</genre>
+    <price>49.95</price>
+    <publish_date>2001-04-16</publish_date>
+    <description>Microsoft Visual Studio 7 is explored in depth, looking at how Visual Basic, Visual C++, C#, and ASP+ are integrated into a comprehensive development environment.</description>
+  </book>
+</catalog>
--- a/regression_testing/cases/legacy-cases/case-480406.conf
+++ b/regression_testing/cases/legacy-cases/case-480406.conf
@ -1,3 +0,0 @@
-// Tidy configuration file for bug #480406
-input-xml: yes
-output-xml: yes
--- a/regression_testing/cases/legacy-cases/case-480406@0.xml
+++ b/regression_testing/cases/legacy-cases/case-480406@0.xml
@ -1,4 +0,0 @@
-<?xml version="1.0"?>
-<!-- [ #480406 ] Single document element discarded - use "-xml" on command line -->
-<test />
-
--- a/regression_testing/cases/legacy-cases/case-634889.conf
+++ b/regression_testing/cases/legacy-cases/case-634889.conf
@ -1,10 +0,0 @@
-tidy-mark: no
-output-xml: yes 
-drop-proprietary-attributes: no 
-new-inline-tags: o:lock, o:p, v-f, v-formula, v-formulas,
-                 v-imagedata, v-path, v-shape, v-shapetype, v-stroke 
-new-empty-tags: 
-new-blocklevel-tags: 
-new-pre-tags: 
-wrap-sections: no 
-drop-empty-paras: no
--- a/regression_testing/cases/legacy-cases/case-634889@1.html
+++ b/regression_testing/cases/legacy-cases/case-634889@1.html
@ -1,9 +0,0 @@
-<html>
-<head>
-  <title>[ 634889 ] Problem with &lt;o:p&gt; ms word tag</title>
-</head>
-<body>
-  <p>Probably OK, now that ParseTagNames() is fixed.</p>
-  <o:p></o:p>
-</body>
-</html>
--- a/regression_testing/cases/legacy-cases/case-646946@0.xml
+++ b/regression_testing/cases/legacy-cases/case-646946@0.xml
@ -1,6 +0,0 @@
-<?xml version="1.0" standalone="yes"?> 
-<!-- [ 646946 ] Bad doctype guessing in XML mode -->
-<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN"
-    "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd"> 
-<svg width="1800" height="1500"> 
-</svg> 
--- a/regression_testing/cases/legacy-expects/case-480406.xml
+++ b/regression_testing/cases/legacy-expects/case-480406.xml
@ -1,3 +0,0 @@
-<?xml version="1.0"?>
-<!-- [ #480406 ] Single document element discarded - use "-xml" on command line -->
-<test />
--- a/regression_testing/cases/legacy-expects/case-634889.html
+++ b/regression_testing/cases/legacy-expects/case-634889.html
@ -1,9 +0,0 @@
-<html>
-<head>
-<title>[ 634889 ] Problem with &lt;o:p&gt; ms word tag</title>
-</head>
-<body>
-<p>Probably OK, now that ParseTagNames() is fixed.</p>
-<o:p></o:p>
-</body>
-</html>
--- a/regression_testing/cases/legacy-expects/case-634889.txt
+++ b/regression_testing/cases/legacy-expects/case-634889.txt
@ -1,15 +0,0 @@
-line 1 column 1 - Warning: missing <!DOCTYPE> declaration
-line 7 column 3 - Warning: <o:p> is not approved by W3C
-Info: Document content looks like XHTML5
-Tidy found 2 warnings and 0 errors!
-
-About HTML Tidy: https://github.com/htacg/tidy-html5
-Bug reports and comments: https://github.com/htacg/tidy-html5/issues
-Official mailing list: https://lists.w3.org/Archives/Public/public-htacg/
-Latest HTML specification: https://html.spec.whatwg.org/multipage/
-Validate your HTML documents: https://validator.w3.org/nu/
-Lobby your company to join the W3C: https://www.w3.org/Consortium
-
-Do you speak a language other than English, or a different variant of 
-English? Consider helping us to localize HTML Tidy. For details please see 
-https://github.com/htacg/tidy-html5/blob/master/README/LOCALIZE.md
--- a/regression_testing/cases/legacy-expects/case-646946.txt
+++ b/regression_testing/cases/legacy-expects/case-646946.txt
@ -1,12 +0,0 @@
-No warnings or errors were found.
-
-About HTML Tidy: https://github.com/htacg/tidy-html5
-Bug reports and comments: https://github.com/htacg/tidy-html5/issues
-Official mailing list: https://lists.w3.org/Archives/Public/public-htacg/
-Latest HTML specification: https://html.spec.whatwg.org/multipage/
-Validate your HTML documents: https://validator.w3.org/nu/
-Lobby your company to join the W3C: https://www.w3.org/Consortium
-
-Do you speak a language other than English, or a different variant of 
-English? Consider helping us to localize HTML Tidy. For details please see 
-https://github.com/htacg/tidy-html5/blob/master/README/LOCALIZE.md
--- a/regression_testing/cases/legacy-expects/case-646946.xml
+++ b/regression_testing/cases/legacy-expects/case-646946.xml
@ -1,5 +0,0 @@
-<?xml version="1.0" standalone="yes"?>
-<!-- [ 646946 ] Bad doctype guessing in XML mode -->
-<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN"
-"http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd">
-<svg width="1800" height="1500"></svg>
--- a/src/parser.c
+++ b/src/parser.c
@ -28,6 +28,14 @@
 #define showingBodyOnly(doc) (cfgAutoBool(doc,TidyBodyOnly) == TidyYesState) ? yes : no


+/****************************************************************************//*
+ ** MARK: - Forward Declarations
+ ***************************************************************************/
+
+
+static Node* ParseXMLElement(TidyDocImpl* doc, Node *element, GetTokenMode mode);
+
+
 /****************************************************************************//*
 ** MARK: - Node Operations
 ***************************************************************************/
@ -858,7 +866,7 @@ static void growParserStack( TidyDocImpl* doc )
 /**
 *  Indicates whether or not the stack is empty.
 */
-static inline Bool isEmptyParserStack( TidyDocImpl* doc )
+Bool TY_(isEmptyParserStack)( TidyDocImpl* doc )
 {
    return doc->stack.top < 0;
 }
@ -867,7 +875,7 @@ static inline Bool isEmptyParserStack( TidyDocImpl* doc )
 /**
 *  Peek at the parser memory.
 */
-static inline FUNC_UNUSED TidyParserMemory peekMemory( TidyDocImpl* doc )
+TidyParserMemory TY_(peekMemory)( TidyDocImpl* doc )
 {
    return doc->stack.content[doc->stack.top];
 }
@ -877,7 +885,7 @@ static inline FUNC_UNUSED TidyParserMemory peekMemory( TidyDocImpl* doc )
 *  Peek at the parser memory "identity" field. This is just a convenience
 *  to avoid having to create a new struct instance in the caller.
 */
-static inline Parser* peekMemoryIdentity( TidyDocImpl* doc )
+Parser* TY_(peekMemoryIdentity)( TidyDocImpl* doc )
 {
    return doc->stack.content[doc->stack.top].identity;
 }
@ -887,7 +895,7 @@ static inline Parser* peekMemoryIdentity( TidyDocImpl* doc )
 *  Peek at the parser memory "mode" field. This is just a convenience
 *  to avoid having to create a new struct instance in the caller.
 */
-static GetTokenMode inline peekMemoryMode( TidyDocImpl* doc )
+GetTokenMode TY_(peekMemoryMode)( TidyDocImpl* doc )
 {
    return doc->stack.content[doc->stack.top].mode;
 }
@ -896,12 +904,23 @@ static GetTokenMode inline peekMemoryMode( TidyDocImpl* doc )
 /**
 *  Pop out a parser memory.
 */
-static TidyParserMemory popMemory( TidyDocImpl* doc )
+TidyParserMemory TY_(popMemory)( TidyDocImpl* doc )
 {
-    if ( !isEmptyParserStack( doc ) )
+    if ( !TY_(isEmptyParserStack)( doc ) )
    {
        TidyParserMemory data = doc->stack.content[doc->stack.top];
-        DEBUG_LOG(SPRTF("\n<--POP  %s pointed to is %p,\t memory is %p (size is %lu), depth is %i\n", data.reentry_node ? data.reentry_node->element : NULL, data.reentry_node, &doc->stack.content[doc->stack.top], sizeof(TidyParserMemory), doc->stack.top - 1 ));
+        DEBUG_LOG(SPRTF("\n"
+                        "<--POP  original: %s @ %p\n"
+                        "         reentry: %s @ %p\n"
+                        "     stack depth: %lu @ %p\n"
+                        "      register 1: %i\n"
+                        "      register 2: %i\n\n",
+                        data.original_node ? data.original_node->element : "none", data.original_node,
+                        data.reentry_node ? data.reentry_node->element : "none", data.reentry_node,
+                        doc->stack.top, &doc->stack.content[doc->stack.top],
+                        data.register_1,
+                        data.register_2
+                        ));
        doc->stack.top = doc->stack.top - 1;
        return data;
    }
@ -913,7 +932,7 @@ static TidyParserMemory popMemory( TidyDocImpl* doc )
 /**
 * Push the parser memory to the stack.
 */
-static void pushMemory( TidyDocImpl* doc, TidyParserMemory data )
+void TY_(pushMemory)( TidyDocImpl* doc, TidyParserMemory data )
 {
    if ( doc->stack.top == doc->stack.size - 1 )
        growParserStack( doc );
@ -921,7 +940,18 @@ static void pushMemory( TidyDocImpl* doc, TidyParserMemory data )
    doc->stack.top++;
    
    doc->stack.content[doc->stack.top] = data;
-    DEBUG_LOG(SPRTF("\n-->PUSH %s pointed to is %p,\t memory is %p (size is %lu), depth is %i\n", data.reentry_node ? data.reentry_node->element : NULL, data.reentry_node, &doc->stack.content[doc->stack.top], sizeof(TidyParserMemory), doc->stack.top ));
+    DEBUG_LOG(SPRTF("\n"
+                    "-->PUSH original: %s @ %p\n"
+                    "         reentry: %s @ %p\n"
+                    "     stack depth: %lu @ %p\n"
+                    "      register 1: %i\n"
+                    "      register 2: %i\n\n",
+                    data.original_node ? data.original_node->element : "none", data.original_node,
+                    data.reentry_node ? data.reentry_node->element : "none", data.reentry_node,
+                    doc->stack.top, &doc->stack.content[doc->stack.top],
+                    data.register_1,
+                    data.register_2
+                    ));
 }


@ -938,6 +968,9 @@ static Parser* GetParserForNode( TidyDocImpl* doc, Node *node )
 {
    Lexer* lexer = doc->lexer;

+    if ( cfgBool( doc, TidyXmlTags ) )
+        return ParseXMLElement;
+    
    /* [i_a]2 prevent crash for active content (php, asp) docs */
    if (!node || node->tag == NULL)
        return NULL;
@ -1008,9 +1041,9 @@ void ParseHTMLWithNode( TidyDocImpl* doc, Node* node )
         We weren't given a node, which means this particular leaf is bottomed
         out. We'll re-enter the parsers using information from the stack.
         */
-        if ( !isEmptyParserStack(doc))
+        if ( !TY_(isEmptyParserStack)(doc))
        {
-            parser = peekMemoryIdentity(doc);
+            parser = TY_(peekMemoryIdentity)(doc);
            if (parser)
            {
                continue;
@ -1018,8 +1051,8 @@ void ParseHTMLWithNode( TidyDocImpl* doc, Node* node )
            else
            {
                /* No parser means we're only passing back a parsing mode. */
-                mode = peekMemoryMode( doc );
-                popMemory( doc );
+                mode = TY_(peekMemoryMode)( doc );
+                TY_(popMemory)( doc );
            }
        }
        
@ -1065,7 +1098,7 @@ Node* TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
    
    if ( element == NULL )
    {
-        TidyParserMemory memory = popMemory( doc );
+        TidyParserMemory memory = TY_(popMemory)( doc );
        node = memory.reentry_node; /* Throwaway, because the loop overwrites this immediately. */
        mode = memory.reentry_mode;
        element = memory.original_node;
@ -1563,7 +1596,7 @@ Node* TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
                memory.reentry_node = node;
                memory.reentry_mode = mode;
                memory.original_node = element;
-                pushMemory(doc, memory);
+                TY_(pushMemory)(doc, memory);
                DEBUG_LOG(SPRTF("<<<Leave ParseBlock to return node %s\n", node->element));
            }
            return node;
@ -1621,11 +1654,11 @@ Node* TY_(ParseBody)( TidyDocImpl* doc, Node *body, GetTokenMode mode )
     */
    if ( body == NULL )
    {
-        TidyParserMemory memory = popMemory( doc );
+        TidyParserMemory memory = TY_(popMemory)( doc );
        node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */
        body = memory.original_node;
-        checkstack = memory.register_b_1;
-        iswhitenode = memory.register_b_2;
+        checkstack = memory.register_1;
+        iswhitenode = memory.register_2;
        mode = memory.mode;
        DEBUG_LOG(SPRTF(">>>Re-Enter ParseBody with %s\n", node->element));
    }
@ -1691,10 +1724,10 @@ Node* TY_(ParseBody)( TidyDocImpl* doc, Node *body, GetTokenMode mode )
                memory.identity = TY_(ParseBody);
                memory.original_node = body;
                memory.reentry_node = node;
-                memory.register_b_1 = checkstack;
-                memory.register_b_2 = iswhitenode;
+                memory.register_1 = checkstack;
+                memory.register_2 = iswhitenode;
                memory.mode = mode;
-                pushMemory( doc, memory );
+                TY_(pushMemory)( doc, memory );
                return node;
            }

@ -1907,10 +1940,10 @@ Node* TY_(ParseBody)( TidyDocImpl* doc, Node *body, GetTokenMode mode )
                memory.identity = TY_(ParseBody);
                memory.original_node = body;
                memory.reentry_node = node;
-                memory.register_b_1 = checkstack;
-                memory.register_b_2 = iswhitenode;
+                memory.register_1 = checkstack;
+                memory.register_2 = iswhitenode;
                memory.mode = mode;
-                pushMemory( doc, memory );
+                TY_(pushMemory)( doc, memory );
            }
            DEBUG_LOG(SPRTF("<<<Exiting ParseBody with a node to parse: %s\n", node->element));
            return node;
@ -1944,7 +1977,7 @@ Node* TY_(ParseColGroup)( TidyDocImpl* doc, Node *colgroup, GetTokenMode ARG_UNU
     */
    if ( colgroup == NULL )
    {
-        TidyParserMemory memory = popMemory( doc );
+        TidyParserMemory memory = TY_(popMemory)( doc );
        node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */
        colgroup = memory.original_node;
        mode = memory.mode;
@ -2034,7 +2067,7 @@ Node* TY_(ParseColGroup)( TidyDocImpl* doc, Node *colgroup, GetTokenMode ARG_UNU
            memory.original_node = colgroup;
            memory.reentry_node = node;
            memory.mode = mode;
-            pushMemory( doc, memory );
+            TY_(pushMemory)( doc, memory );
        }
        DEBUG_LOG(SPRTF("<<<Exiting ParseColGroup with a node to parse: %s\n", node->element));
        return node;
@ -2061,7 +2094,7 @@ Node* TY_(ParseDatalist)( TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED

    if ( field == NULL )
    {
-        TidyParserMemory memory = popMemory( doc );
+        TidyParserMemory memory = TY_(popMemory)( doc );
        field = memory.original_node;
        node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */
        DEBUG_LOG(SPRTF(">>>Re-Enter ParseDataList with %s\n", node->element));
@ -2103,7 +2136,7 @@ Node* TY_(ParseDatalist)( TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED
            memory.reentry_mode = IgnoreWhitespace;

            TY_(InsertNodeAtEnd)(field, node);
-            pushMemory(doc, memory);
+            TY_(pushMemory)(doc, memory);
            return node;
        }

@ -2144,7 +2177,7 @@ Node* TY_(ParseDefList)( TidyDocImpl* doc, Node *list, GetTokenMode mode )

    if ( list == NULL )
    {
-        TidyParserMemory memory = popMemory( doc );
+        TidyParserMemory memory = TY_(popMemory)( doc );
        list = memory.original_node;
        node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */
        state = memory.reentry_state;
@ -2272,7 +2305,7 @@ Node* TY_(ParseDefList)( TidyDocImpl* doc, Node *list, GetTokenMode mode )
                        memory.original_node = list;
                        memory.reentry_node = node;
                        memory.reentry_state = STATE_POST_NODEISCENTER;
-                        pushMemory( doc, memory );
+                        TY_(pushMemory)( doc, memory );
                        DEBUG_LOG(SPRTF("<<<Exiting ParseDefList 3 with a node to parse: %s\n", node->element));
                        return node;
                    }
@ -2315,7 +2348,7 @@ Node* TY_(ParseDefList)( TidyDocImpl* doc, Node *list, GetTokenMode mode )
                    memory.original_node = list;
                    memory.reentry_node = node;
                    memory.reentry_state = STATE_INITIAL;
-                    pushMemory( doc, memory );
+                    TY_(pushMemory)( doc, memory );
                    DEBUG_LOG(SPRTF("<<<Exiting ParseDefList 4 with a node to parse: %s\n", node->element));
                    return node;
                }
@ -2402,7 +2435,7 @@ Node* TY_(ParseFrameSet)( TidyDocImpl* doc, Node *frameset, GetTokenMode ARG_UNU
     */
    if ( frameset == NULL )
    {
-        TidyParserMemory memory = popMemory( doc );
+        TidyParserMemory memory = TY_(popMemory)( doc );
        node = memory.reentry_node; /* Throwaway, because we replace it entering the loop. */
        frameset = memory.original_node;
        DEBUG_LOG(SPRTF(">>>Re-Enter ParseFrameSet with %s\n", node->element));
@ -2468,7 +2501,7 @@ Node* TY_(ParseFrameSet)( TidyDocImpl* doc, Node *frameset, GetTokenMode ARG_UNU
            memory.original_node = frameset;
            memory.reentry_node = node;
            memory.mode = MixedContent;
-            pushMemory( doc, memory );
+            TY_(pushMemory)( doc, memory );
            DEBUG_LOG(SPRTF("<<<Exiting ParseFrameSet with a node to parse: %s\n", node->element));
            return node;
        }
@ -2509,11 +2542,11 @@ Node* TY_(ParseHead)( TidyDocImpl* doc, Node *head, GetTokenMode ARG_UNUSED(mode

    if ( head == NULL )
    {
-        TidyParserMemory memory = popMemory( doc );
+        TidyParserMemory memory = TY_(popMemory)( doc );
        head = memory.original_node;
        node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */
-        HasTitle = memory.register_b_1;
-        HasBase = memory.register_b_2;
+        HasTitle = memory.register_1;
+        HasBase = memory.register_2;
        DEBUG_LOG(SPRTF(">>>Re-Enter ParseHead with %s\n", node->element));
    }
    else
@ -2622,9 +2655,9 @@ Node* TY_(ParseHead)( TidyDocImpl* doc, Node *head, GetTokenMode ARG_UNUSED(mode
                memory.identity = TY_(ParseHead);
                memory.original_node = head;
                memory.reentry_node = node;
-                memory.register_b_1 = HasTitle;
-                memory.register_b_2 = HasBase;
-                pushMemory( doc, memory );
+                memory.register_1 = HasTitle;
+                memory.register_2 = HasBase;
+                TY_(pushMemory)( doc, memory );
                DEBUG_LOG(SPRTF("<<<Exiting ParseHead with a node to parse: %s\n", node->element));
                return node;
            }
@ -2684,7 +2717,7 @@ Node* TY_(ParseHTML)( TidyDocImpl *doc, Node *html, GetTokenMode mode )
     */
    if ( html == NULL )
    {
-        TidyParserMemory memory = popMemory( doc );
+        TidyParserMemory memory = TY_(popMemory)( doc );
        node = memory.reentry_node;
        mode = memory.reentry_mode;
        state = memory.reentry_state;
@ -2956,7 +2989,7 @@ Node* TY_(ParseHTML)( TidyDocImpl *doc, Node *html, GetTokenMode mode )
                memory.reentry_mode = mode;
                memory.reentry_state = STATE_PARSE_HEAD_REENTER;
                TY_(InsertNodeAtEnd)(html, node);
-                pushMemory( doc, memory );
+                TY_(pushMemory)( doc, memory );
                DEBUG_LOG(SPRTF("<<<Exiting ParseHTML at STATE_PARSE_HEAD, count: %d, depth %d\n", parser_count, --parser_depth));
                return node;
            } break;
@ -2981,7 +3014,7 @@ Node* TY_(ParseHTML)( TidyDocImpl *doc, Node *html, GetTokenMode mode )
                memory.reentry_mode = mode;
                memory.reentry_state = STATE_COMPLETE;
                TY_(InsertNodeAtEnd)(html, node);
-                pushMemory( doc, memory );
+                TY_(pushMemory)( doc, memory );
                DEBUG_LOG(SPRTF("<<<Exiting ParseHTML at STATE_PARSE_BODY, count: %d, depth %d\n", parser_count, --parser_depth));
                return node;
            } break;
@ -3000,7 +3033,7 @@ Node* TY_(ParseHTML)( TidyDocImpl *doc, Node *html, GetTokenMode mode )
                memory.reentry_node = frameset;
                memory.reentry_mode = mode;
                memory.reentry_state = STATE_PARSE_NOFRAMES_REENTER;
-                pushMemory( doc, memory );
+                TY_(pushMemory)( doc, memory );
                DEBUG_LOG(SPRTF("<<<Exiting ParseHTML at STATE_PARSE_NOFRAMES, count: %d, depth %d\n", parser_count, --parser_depth));
                return noframes;
            } break;
@ -3026,7 +3059,7 @@ Node* TY_(ParseHTML)( TidyDocImpl *doc, Node *html, GetTokenMode mode )
                memory.reentry_mode = mode;
                memory.reentry_state = STATE_PARSE_FRAMESET_REENTER;
                TY_(InsertNodeAtEnd)(html, node);
-                pushMemory( doc, memory );
+                TY_(pushMemory)( doc, memory );
                DEBUG_LOG(SPRTF("<<<Exiting ParseHTML at STATE_PARSE_FRAMESET, count: %d, depth %d\n", parser_count, --parser_depth));
                return node;
            } break;
@ -3081,7 +3114,7 @@ Node* TY_(ParseInline)( TidyDocImpl *doc, Node *element, GetTokenMode mode )

    if ( element == NULL )
    {
-        TidyParserMemory memory = popMemory( doc );
+        TidyParserMemory memory = TY_(popMemory)( doc );
        element = memory.original_node;
        node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */
        mode = memory.reentry_mode;
@ -3651,7 +3684,7 @@ Node* TY_(ParseInline)( TidyDocImpl *doc, Node *element, GetTokenMode mode )
                memory.reentry_node = node;
                memory.mode = mode;
                memory.reentry_mode = mode;
-                pushMemory( doc, memory );
+                TY_(pushMemory)( doc, memory );
                DEBUG_LOG(SPRTF("<<<Exiting ParseInline 1 with a node to parse: %s\n", node->element));
                return node;
            }
@ -3693,7 +3726,7 @@ Node* TY_(ParseList)( TidyDocImpl* doc, Node *list, GetTokenMode ARG_UNUSED(mode

    if ( list == NULL )
    {
-        TidyParserMemory memory = popMemory( doc );
+        TidyParserMemory memory = TY_(popMemory)( doc );
        list = memory.original_node;
        node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */
        DEBUG_LOG(SPRTF(">>>Re-Enter ParseList with %s\n", node->element));
@ -3859,7 +3892,7 @@ Node* TY_(ParseList)( TidyDocImpl* doc, Node *list, GetTokenMode ARG_UNUSED(mode
            memory.original_node = list;
            memory.reentry_node = node;
            memory.mode = IgnoreWhitespace;
-            pushMemory( doc, memory );
+            TY_(pushMemory)( doc, memory );
            DEBUG_LOG(SPRTF("<<<Exiting ParseList with a node to parse: %s\n", node->element));
            return node;
        }
@ -4041,11 +4074,11 @@ Node* TY_(ParseNoFrames)( TidyDocImpl* doc, Node *noframes, GetTokenMode mode )
     */
    if ( noframes == NULL )
    {
-        TidyParserMemory memory = popMemory( doc );
+        TidyParserMemory memory = TY_(popMemory)( doc );
        node = memory.reentry_node; /* Throwaway, because we replace it entering the loop anyway.*/
        noframes = memory.original_node;
        state = memory.reentry_state;
-        body_seen = memory.register_b_1;
+        body_seen = memory.register_1;
        DEBUG_LOG(SPRTF(">>>Re-Enter ParseNoFrames with %s\n", node->element));
    }
    else
@ -4123,11 +4156,11 @@ Node* TY_(ParseNoFrames)( TidyDocImpl* doc, Node *noframes, GetTokenMode mode )
                    memory.original_node = noframes;
                    memory.reentry_node = node;
                    memory.reentry_state = STATE_POST_NODEISBODY;
-                    memory.register_b_1 = lexer->seenEndBody;
+                    memory.register_1 = lexer->seenEndBody;
                    memory.mode = IgnoreWhitespace;

                    TY_(InsertNodeAtEnd)(noframes, node);
-                    pushMemory( doc, memory );
+                    TY_(pushMemory)( doc, memory );
                    DEBUG_LOG(SPRTF("<<<Exiting ParseNoFrames with a node to parse: %s\n", node->element));
                    return node;
                }
@ -4168,7 +4201,7 @@ Node* TY_(ParseNoFrames)( TidyDocImpl* doc, Node *noframes, GetTokenMode mode )
                        memory.reentry_node = node;
                        memory.mode = IgnoreWhitespace; /*MixedContent*/
                        memory.reentry_state = STATE_INITIAL;
-                        pushMemory( doc, memory );
+                        TY_(pushMemory)( doc, memory );
                        DEBUG_LOG(SPRTF("<<<Exiting ParseNoFrames with a node to parse: %s\n", node->element));
                        return node;
                    }
@ -4220,7 +4253,7 @@ Node* TY_(ParseOptGroup)( TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED

    if ( field == NULL )
    {
-        TidyParserMemory memory = popMemory( doc );
+        TidyParserMemory memory = TY_(popMemory)( doc );
        field = memory.original_node;
        node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */
        DEBUG_LOG(SPRTF(">>>Re-Enter ParseOptGroup with %s\n", node->element));
@ -4259,7 +4292,7 @@ Node* TY_(ParseOptGroup)( TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED
            memory.identity = TY_(ParseOptGroup);
            memory.original_node = field;
            memory.reentry_node = node;
-            pushMemory( doc, memory );
+            TY_(pushMemory)( doc, memory );
            DEBUG_LOG(SPRTF("<<<Exiting ParseOptGroup with a node to parse: %s\n", node->element));
            return node;
        }
@ -4293,7 +4326,7 @@ Node* TY_(ParsePre)( TidyDocImpl* doc, Node *pre, GetTokenMode ARG_UNUSED(mode)

    if ( pre == NULL )
    {
-        TidyParserMemory memory = popMemory( doc );
+        TidyParserMemory memory = TY_(popMemory)( doc );
        pre = memory.original_node;
        node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */
        state = memory.reentry_state;
@ -4446,7 +4479,7 @@ Node* TY_(ParsePre)( TidyDocImpl* doc, Node *pre, GetTokenMode ARG_UNUSED(mode)
                        memory.original_node = pre;
                        memory.reentry_node = node;
                        memory.reentry_state = STATE_RENTRY_ACTION;
-                        pushMemory( doc, memory );
+                        TY_(pushMemory)( doc, memory );
                        DEBUG_LOG(SPRTF("<<<Exiting ParsePre with a node to parse: %s\n", node->element));
                        return node;
                    }
@ -4488,7 +4521,7 @@ Node* TY_(ParsePre)( TidyDocImpl* doc, Node *pre, GetTokenMode ARG_UNUSED(mode)
                        memory.original_node = pre;
                        memory.reentry_node = node;
                        memory.reentry_state = STATE_INITIAL;
-                        pushMemory( doc, memory );
+                        TY_(pushMemory)( doc, memory );
                        DEBUG_LOG(SPRTF("<<<Exiting ParsePre with a node to parse: %s\n", node->element));
                        return node;
                    }
@ -4548,11 +4581,11 @@ Node* TY_(ParseRow)( TidyDocImpl* doc, Node *row, GetTokenMode ARG_UNUSED(mode)

    if ( row == NULL )
    {
-        TidyParserMemory memory = popMemory( doc );
+        TidyParserMemory memory = TY_(popMemory)( doc );
        row = memory.original_node;
        node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */
        state = memory.reentry_state;
-        exclude_state = memory.register_b_1;
+        exclude_state = memory.register_1;
        DEBUG_LOG(SPRTF(">>>Re-Enter ParseRow with %s\n", node->element));
    }
    else
@ -4692,8 +4725,8 @@ Node* TY_(ParseRow)( TidyDocImpl* doc, Node *row, GetTokenMode ARG_UNUSED(mode)
                            memory.original_node = row;
                            memory.reentry_node = node;
                            memory.reentry_state = STATE_POST_NOT_ENDTAG;
-                            memory.register_b_1 = exclude_state;
-                            pushMemory( doc, memory );
+                            memory.register_1 = exclude_state;
+                            TY_(pushMemory)( doc, memory );
                            DEBUG_LOG(SPRTF("<<<Exiting ParseRow 1 with a node to parse: %s\n", node->element));
                            return node;
                        }
@ -4727,8 +4760,8 @@ Node* TY_(ParseRow)( TidyDocImpl* doc, Node *row, GetTokenMode ARG_UNUSED(mode)
                    memory.original_node = row;
                    memory.reentry_node = node;
                    memory.reentry_state = STATE_POST_TD_TH;
-                    memory.register_b_1 = exclude_state;
-                    pushMemory( doc, memory );
+                    memory.register_1 = exclude_state;
+                    TY_(pushMemory)( doc, memory );
                    DEBUG_LOG(SPRTF("<<<Exiting ParseRow 2 with a node to parse: %s\n", node->element));
                    return node;
                }
@ -4792,7 +4825,7 @@ Node* TY_(ParseRowGroup)( TidyDocImpl* doc, Node *rowgroup, GetTokenMode ARG_UNU

    if ( rowgroup == NULL )
    {
-        TidyParserMemory memory = popMemory( doc );
+        TidyParserMemory memory = TY_(popMemory)( doc );
        rowgroup = memory.original_node;
        node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */
        state = memory.reentry_state;
@ -4887,7 +4920,7 @@ Node* TY_(ParseRowGroup)( TidyDocImpl* doc, Node *rowgroup, GetTokenMode ARG_UNU
                            memory.original_node = rowgroup;
                            memory.reentry_node = node;
                            memory.reentry_state = STATE_POST_NOT_TEXTNODE;
-                            pushMemory( doc, memory );
+                            TY_(pushMemory)( doc, memory );
                            DEBUG_LOG(SPRTF("<<<Exiting ParseRowGroup 1 with a node to parse: %s\n", node->element));
                            return node;
                        }
@ -4973,7 +5006,7 @@ Node* TY_(ParseRowGroup)( TidyDocImpl* doc, Node *rowgroup, GetTokenMode ARG_UNU
                memory.original_node = rowgroup;
                memory.reentry_node = node;
                memory.reentry_state = STATE_INITIAL;
-                pushMemory( doc, memory );
+                TY_(pushMemory)( doc, memory );
                DEBUG_LOG(SPRTF("<<<Exiting ParseRowGroup 2 with a node to parse: %s\n", node->element));
                return node;
            } break;
@ -5067,7 +5100,7 @@ Node* TY_(ParseSelect)( TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(m

    if ( field == NULL )
    {
-        TidyParserMemory memory = popMemory( doc );
+        TidyParserMemory memory = TY_(popMemory)( doc );
        field = memory.original_node;
        node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */
        DEBUG_LOG(SPRTF(">>>Re-Enter ParseSelect with %s\n", node->element));
@ -5108,7 +5141,7 @@ Node* TY_(ParseSelect)( TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(m
            memory.reentry_node = node;

            TY_(InsertNodeAtEnd)(field, node);
-            pushMemory( doc, memory );
+            TY_(pushMemory)( doc, memory );
            DEBUG_LOG(SPRTF("<<<Exiting ParseSelect with a node to parse: %s\n", node->element));
            return node;
        }
@ -5144,10 +5177,10 @@ Node* TY_(ParseTableTag)( TidyDocImpl* doc, Node *table, GetTokenMode ARG_UNUSED

    if ( table == NULL )
    {
-        TidyParserMemory memory = popMemory( doc );
+        TidyParserMemory memory = TY_(popMemory)( doc );
        node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */
        table = memory.original_node;
-        lexer->exiled = memory.register_b_1;
+        lexer->exiled = memory.register_1;
        DEBUG_LOG(SPRTF(">>>Re-Enter ParseTableTag with %s\n", node->element));
    }
    else
@ -5219,9 +5252,9 @@ Node* TY_(ParseTableTag)( TidyDocImpl* doc, Node *table, GetTokenMode ARG_UNUSED
                    memory.identity = TY_(ParseTableTag);
                    memory.original_node = table;
                    memory.reentry_node = node;
-                    memory.register_b_1 = no; /* later, lexer->exiled = no */
+                    memory.register_1 = no; /* later, lexer->exiled = no */
                    memory.mode = IgnoreWhitespace;
-                    pushMemory( doc, memory );
+                    TY_(pushMemory)( doc, memory );
                    DEBUG_LOG(SPRTF("<<<Exiting ParseTableTag with a node to parse: %s\n", node->element));
                    return node;
                }
@ -5292,8 +5325,8 @@ Node* TY_(ParseTableTag)( TidyDocImpl* doc, Node *table, GetTokenMode ARG_UNUSED
            memory.identity = TY_(ParseTableTag);
            memory.original_node = table;
            memory.reentry_node = node;
-            memory.register_b_1 = lexer->exiled;
-            pushMemory( doc, memory );
+            memory.register_1 = lexer->exiled;
+            TY_(pushMemory)( doc, memory );
            DEBUG_LOG(SPRTF("<<<Exiting ParseTableTag with a node to parse: %s\n", node->element));
            return node;
        }
@ -5457,6 +5490,116 @@ Node* TY_(ParseTitle)( TidyDocImpl* doc, Node *title, GetTokenMode ARG_UNUSED(mo
 }


+/** MARK: ParseXMLElement
+ *  Parses the given XML element.
+ */
+static Node* ParseXMLElement(TidyDocImpl* doc, Node *element, GetTokenMode mode)
+{
+    Lexer* lexer = doc->lexer;
+    Node *node;
+
+    if ( element == NULL )
+    {
+        TidyParserMemory memory = TY_(popMemory)( doc );
+        element = memory.original_node;
+        node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */
+        mode = memory.reentry_mode;
+        TY_(InsertNodeAtEnd)(element, node); /* The only re-entry action needed. */
+    }
+    else
+    {
+        /* if node is pre or has xml:space="preserve" then do so */
+        if ( TY_(XMLPreserveWhiteSpace)(doc, element) )
+            mode = Preformatted;
+
+        /* deal with comments etc. */
+        InsertMisc( &doc->root, element);
+        
+        /* we shouldn't have plain text at this point. */
+        if (TY_(nodeIsText)(element))
+        {
+            TY_(Report)(doc, &doc->root, element, DISCARDING_UNEXPECTED);
+            TY_(FreeNode)( doc, element);
+            return NULL;
+        }
+    }
+    while ((node = TY_(GetToken)(doc, mode)) != NULL)
+    {
+        if (node->type == EndTag &&
+           node->element && element->element &&
+           TY_(tmbstrcmp)(node->element, element->element) == 0)
+        {
+            TY_(FreeNode)( doc, node);
+            element->closed = yes;
+            break;
+        }
+
+        /* discard unexpected end tags */
+        if (node->type == EndTag)
+        {
+            if (element)
+                TY_(Report)(doc, element, node, UNEXPECTED_ENDTAG_IN);
+            else
+                TY_(Report)(doc, element, node, UNEXPECTED_ENDTAG_ERR);
+
+            TY_(FreeNode)( doc, node);
+            continue;
+        }
+
+        /* parse content on seeing start tag */
+        if (node->type == StartTag)
+        {
+            TidyParserMemory memory = {0};
+            memory.identity = ParseXMLElement;
+            memory.original_node = element;
+            memory.reentry_node = node;
+            memory.reentry_mode = mode;
+            TY_(pushMemory)( doc, memory );
+            return node;
+        }
+
+        TY_(InsertNodeAtEnd)(element, node);
+    } /* while */
+
+    /*
+     if first child is text then trim initial space and
+     delete text node if it is empty.
+    */
+
+    node = element->content;
+
+    if (TY_(nodeIsText)(node) && mode != Preformatted)
+    {
+        if ( lexer->lexbuf[node->start] == ' ' )
+        {
+            node->start++;
+
+            if (node->start >= node->end)
+                TY_(DiscardElement)( doc, node );
+        }
+    }
+
+    /*
+     if last child is text then trim final space and
+     delete the text node if it is empty
+    */
+
+    node = element->last;
+
+    if (TY_(nodeIsText)(node) && mode != Preformatted)
+    {
+        if ( lexer->lexbuf[node->end - 1] == ' ' )
+        {
+            node->end--;
+
+            if (node->start >= node->end)
+                TY_(DiscardElement)( doc, node );
+        }
+    }
+    return NULL;
+}
+
+
 /***************************************************************************//*
 ** MARK: - Post-Parse Operations
 ***************************************************************************/
@ -6101,87 +6244,6 @@ void TY_(ParseDocument)(TidyDocImpl* doc)
 }


-/** MARK: TY_(ParseXMLElement)
- *  Parses the given XML element.
- */
-static void ParseXMLElement(TidyDocImpl* doc, Node *element, GetTokenMode mode)
-{
-    Lexer* lexer = doc->lexer;
-    Node *node;
-
-    /* if node is pre or has xml:space="preserve" then do so */
-
-    if ( TY_(XMLPreserveWhiteSpace)(doc, element) )
-        mode = Preformatted;
-
-    while ((node = TY_(GetToken)(doc, mode)) != NULL)
-    {
-        if (node->type == EndTag &&
-           node->element && element->element &&
-           TY_(tmbstrcmp)(node->element, element->element) == 0)
-        {
-            TY_(FreeNode)( doc, node);
-            element->closed = yes;
-            break;
-        }
-
-        /* discard unexpected end tags */
-        if (node->type == EndTag)
-        {
-            if (element)
-                TY_(Report)(doc, element, node, UNEXPECTED_ENDTAG_IN);
-            else
-                TY_(Report)(doc, element, node, UNEXPECTED_ENDTAG_ERR);
-
-            TY_(FreeNode)( doc, node);
-            continue;
-        }
-
-        /* parse content on seeing start tag */
-        if (node->type == StartTag)
-            ParseXMLElement( doc, node, mode );
-
-        TY_(InsertNodeAtEnd)(element, node);
-    }
-
-    /*
-     if first child is text then trim initial space and
-     delete text node if it is empty.
-    */
-
-    node = element->content;
-
-    if (TY_(nodeIsText)(node) && mode != Preformatted)
-    {
-        if ( lexer->lexbuf[node->start] == ' ' )
-        {
-            node->start++;
-
-            if (node->start >= node->end)
-                TY_(DiscardElement)( doc, node );
-        }
-    }
-
-    /*
-     if last child is text then trim final space and
-     delete the text node if it is empty
-    */
-
-    node = element->last;
-
-    if (TY_(nodeIsText)(node) && mode != Preformatted)
-    {
-        if ( lexer->lexbuf[node->end - 1] == ' ' )
-        {
-            node->end--;
-
-            if (node->start >= node->end)
-                TY_(DiscardElement)( doc, node );
-        }
-    }
-}
-
-
 /** MARK: TY_(ParseXMLDocument)
 *  Parses the document using Tidy's XML parser.
 */
@ -6232,7 +6294,7 @@ void TY_(ParseXMLDocument)(TidyDocImpl* doc)
        if (node->type == StartTag)
        {
            TY_(InsertNodeAtEnd)( &doc->root, node );
-            ParseXMLElement( doc, node, IgnoreWhitespace );
+            ParseHTMLWithNode( doc, node );
            continue;
        }

--- a/src/parser.h
+++ b/src/parser.h
@ -55,8 +55,8 @@ typedef struct _TidyParserMemory
    GetTokenMode reentry_mode;   /**< The token mode to use when re-entering. */
    int          reentry_state;  /**< State to set during re-entry. Defined locally in each parser. */
    GetTokenMode mode;           /**< The caller will peek at this value to get the correct mode. */
-    Bool         register_b_1;   /**< Local variable storage. */
-    Bool         register_b_2;   /**< Local variable storage. */
+    int          register_1;     /**< Local variable storage. */
+    int          register_2;   /**< Local variable storage. */
 } TidyParserMemory;


@ -86,6 +86,44 @@ void TY_(InitParserStack)( TidyDocImpl* doc );
 void TY_(FreeParserStack)( TidyDocImpl* doc );


+/**
+ *  Indicates whether or not the stack is empty.
+ */
+Bool TY_(isEmptyParserStack)( TidyDocImpl* doc );
+
+
+/**
+ *  Peek at the parser memory.
+ */
+TidyParserMemory TY_(peekMemory)( TidyDocImpl* doc );
+
+
+/**
+ *  Peek at the parser memory "identity" field. This is just a convenience
+ *  to avoid having to create a new struct instance in the caller.
+ */
+Parser* TY_(peekMemoryIdentity)( TidyDocImpl* doc );
+
+
+/**
+ *  Peek at the parser memory "mode" field. This is just a convenience
+ *  to avoid having to create a new struct instance in the caller.
+ */
+GetTokenMode TY_(peekMemoryMode)( TidyDocImpl* doc );
+
+
+/**
+ *  Pop out a parser memory.
+ */
+TidyParserMemory TY_(popMemory)( TidyDocImpl* doc );
+
+
+/**
+ * Push the parser memory to the stack.
+ */
+void TY_(pushMemory)( TidyDocImpl* doc, TidyParserMemory data );
+
+
 /**
 *  Is used to perform a node integrity check recursively after parsing
 *  an HTML or XML document.
--- a/src/pprint.c
+++ b/src/pprint.c
@ -16,6 +16,7 @@
 #include "entities.h"
 #include "tmbstr.h"
 #include "utf8.h"
+#include "sprtf.h"

 /* *** FOR DEBUG ONLY *** */
 /* #define DEBUG_PPRINT */
@ -2330,102 +2331,152 @@ void TY_(PPrintTree)( TidyDocImpl* doc, uint mode, uint indent, Node *node )
    }
 }

+
 void TY_(PPrintXMLTree)( TidyDocImpl* doc, uint mode, uint indent, Node *node )
 {
    Bool xhtmlOut = cfgBool( doc, TidyXhtmlOut );
-    if (node == NULL)
-        return;
+    Node* next = NULL;
    
-    if (doc->progressCallback)
+    while ( node  )
    {
-        doc->progressCallback( tidyImplToDoc(doc), node->line, node->column, doc->pprint.line + 1 );
-    }
+        next = node->next;
        
-    if ( node->type == TextNode)
-    {
-        PPrintText( doc, mode, indent, node );
-    }
-    else if ( node->type == CommentTag )
-    {
-        PCondFlushLineSmart( doc, indent );
-        PPrintComment( doc, indent, node);
-        /* PCondFlushLine( doc, 0 ); */
-    }
-    else if ( node->type == RootNode )
-    {
-        Node *content;
-        for ( content = node->content;
-              content != NULL;
-              content = content->next )
-           TY_(PPrintXMLTree)( doc, mode, indent, content );
-    }
-    else if ( node->type == DocTypeTag )
-        PPrintDocType( doc, indent, node );
-    else if ( node->type == ProcInsTag )
-        PPrintPI( doc, indent, node );
-    else if ( node->type == XmlDecl )
-        PPrintXmlDecl( doc, indent, node );
-    else if ( node->type == CDATATag )
-        PPrintCDATA( doc, indent, node );
-    else if ( node->type == SectionTag )
-        PPrintSection( doc, indent, node );
-    else if ( node->type == AspTag )
-        PPrintAsp( doc, indent, node );
-    else if ( node->type == JsteTag)
-        PPrintJste( doc, indent, node );
-    else if ( node->type == PhpTag)
-        PPrintPhp( doc, indent, node );
-    else if ( TY_(nodeHasCM)(node, CM_EMPTY) ||
-              (node->type == StartEndTag && !xhtmlOut) )
-    {
-        PCondFlushLineSmart( doc, indent );
-        PPrintTag( doc, mode, indent, node );
-        /* TY_(PFlushLine)( doc, indent ); */
-    }
-    else /* some kind of container element */
-    {
-        uint spaces = cfg( doc, TidyIndentSpaces );
-        Node *content;
-        Bool mixed = no;
-        uint cindent;
-
-        for ( content = node->content; content; content = content->next )
+        if (doc->progressCallback)
        {
-            if ( TY_(nodeIsText)(content) )
+            doc->progressCallback( tidyImplToDoc(doc), node->line, node->column, doc->pprint.line + 1 );
+        }
+        
+        if ( node->type == TextNode)
+        {
+            PPrintText( doc, mode, indent, node );
+        }
+        else if ( node->type == RootNode )
+        {
+            if (node->content)
+                node = node->content;
+            continue;
+        }
+        else if ( node->type == CommentTag )
+        {
+            PCondFlushLineSmart( doc, indent );
+            PPrintComment( doc, indent, node);
+            /* PCondFlushLine( doc, 0 ); */
+        }
+        else if ( node->type == DocTypeTag )
+            PPrintDocType( doc, indent, node );
+        else if ( node->type == ProcInsTag )
+            PPrintPI( doc, indent, node );
+        else if ( node->type == XmlDecl )
+            PPrintXmlDecl( doc, indent, node );
+        else if ( node->type == CDATATag )
+            PPrintCDATA( doc, indent, node );
+        else if ( node->type == SectionTag )
+            PPrintSection( doc, indent, node );
+        else if ( node->type == AspTag )
+            PPrintAsp( doc, indent, node );
+        else if ( node->type == JsteTag)
+            PPrintJste( doc, indent, node );
+        else if ( node->type == PhpTag)
+            PPrintPhp( doc, indent, node );
+        else if ( TY_(nodeHasCM)(node, CM_EMPTY) ||
+                  (node->type == StartEndTag && !xhtmlOut) )
+        {
+            PCondFlushLineSmart( doc, indent );
+            PPrintTag( doc, mode, indent, node );
+            /* TY_(PFlushLine)( doc, indent ); */
+        }
+        else if ( node->type != RootNode )  /* some kind of container element */
+        {
+            TidyParserMemory memory = {0};
+            uint spaces = cfg( doc, TidyIndentSpaces );
+            Node *content;
+            Bool mixed = no;
+            uint cindent;
+
+            for ( content = node->content; content; content = content->next )
            {
-                mixed = yes;
-                break;
+                if ( TY_(nodeIsText)(content) )
+                {
+                    mixed = yes;
+                    break;
+                }
+            }
+
+            PCondFlushLineSmart( doc, indent );
+
+            if ( TY_(XMLPreserveWhiteSpace)(doc, node) )
+            {
+                indent = 0;
+                mixed = no;
+                cindent = 0;
+            }
+            else if (mixed)
+                cindent = indent;
+            else
+                cindent = indent + spaces;
+
+            PPrintTag( doc, mode, indent, node );
+            if ( !mixed && node->content )
+                TY_(PFlushLineSmart)( doc, cindent );
+     
+            memory.original_node = node;
+            memory.reentry_node = next;
+            memory.register_1 = mixed;
+            memory.register_2 = indent;
+            TY_(pushMemory)(doc, memory);
+
+            /* Prevent infinite indentation. Seriously, at what point is
+               anyone going to read a file with infinite indentation? It
+               slows down rendering for arbitrarily-deep test cases that
+               are only meant to crash Tidy in the first place. Let's
+               consider whether to remove this limitation, lower it,
+               increase it, or add a new configuration option to control
+               it, or even emit an info-level message about it.
+             */
+            if (indent < TIDY_INDENTATION_LIMIT * spaces)
+                indent = cindent;
+
+            if (node->content)
+            {
+                node = node->content;
+                continue;
            }
        }
        
-        PCondFlushLineSmart( doc, indent );
-
-        if ( TY_(XMLPreserveWhiteSpace)(doc, node) )
+        if (next)
        {
-            indent = 0;
-            mixed = no;
-            cindent = 0;
+            node = next;
+            continue;
        }
-        else if (mixed)
-            cindent = indent;
-        else
-            cindent = indent + spaces;
        
-        PPrintTag( doc, mode, indent, node );
-        if ( !mixed && node->content )
-            TY_(PFlushLineSmart)( doc, cindent );
+        if ( TY_(isEmptyParserStack)(doc) == no )
+        {
+            /* It's possible that the reentry_node is null, because we
+               only pushed this record as a marker for the end tag while
+               there was no next node. Thus the loop will pop until we have
+               what we need. This also closes multiple end tags.
+             */
+            do {
+                TidyParserMemory memory = TY_(popMemory)(doc);
+                Node* close_node = memory.original_node;
+                Bool mixed = memory.register_1;
+                indent = memory.register_2;
                
-        for ( content = node->content; content; content = content->next )
-            TY_(PPrintXMLTree)( doc, mode, cindent, content );
+                if ( !mixed && close_node->content )
+                    PCondFlushLineSmart( doc, indent );

-        if ( !mixed && node->content )
-            PCondFlushLineSmart( doc, indent );
+                PPrintEndTag( doc, mode, indent, close_node );
+                /* PCondFlushLine( doc, indent ); */
           
-        PPrintEndTag( doc, mode, indent, node );
-        /* PCondFlushLine( doc, indent ); */
-    }
+                node = memory.reentry_node;
+            } while ( node == NULL && TY_(isEmptyParserStack)(doc) == no );
+            continue;;
+        }
+        node = NULL;
+    } /* while */
 }

+
 /*
 * local variables:
 * mode: c
--- a/src/tidylib.c
+++ b/src/tidylib.c
@ -2048,16 +2048,24 @@ void dbg_show_node( TidyDocImpl* doc, Node *node, int caller, int indent )
    SPRTF("\n");
 }

-void dbg_show_all_nodes( TidyDocImpl* doc, Node *node, int indent )
+/* Tail recursion here with sensible compilers will re-use
+   the stack frame and avoid overflows during debugging.
+ */
+void dbg_show_all_nodes_loop( TidyDocImpl* doc, Node *node, int indent )
 {
-    while (node)
+    while ( node && (node = node->next) )
    {
        dbg_show_node( doc, node, 0, indent );
-        dbg_show_all_nodes( doc, node->content, indent + 1 );
-        node = node->next;
+        dbg_show_all_nodes_loop( doc, node->content, indent + 1 );
    }
 }

+void dbg_show_all_nodes( TidyDocImpl* doc, Node *node, int indent )
+{
+    dbg_show_node( doc, node, 0, indent );
+    dbg_show_all_nodes_loop( doc, node->content, indent + 1 );
+}
+
 #endif

 int         tidyDocCleanAndRepair( TidyDocImpl* doc )