diff --git a/src/clean.c b/src/clean.c
index 5e2b936..e314ba6 100644
--- a/src/clean.c
+++ b/src/clean.c
@@ -1585,11 +1585,16 @@ void TY_(List2BQ)( TidyDocImpl* doc, Node* node )
 */
 void TY_(BQ2Div)( TidyDocImpl* doc, Node *node )
 {
+    Stack *stack = TY_(newStack)(doc, 16);
+    Node *next;
+    
     tmbchar indent_buf[ 32 ];
     uint indent;
 
     while (node)
     {
+        next = node->next;
+        
         if ( nodeIsBLOCKQUOTE(node) && node->implicit )
         {
             indent = 1;
@@ -1602,19 +1607,27 @@ void TY_(BQ2Div)( TidyDocImpl* doc, Node *node )
                 StripOnlyChild( doc, node );
             }
 
-            if (node->content)
-                TY_(BQ2Div)( doc, node->content );
-
             TY_(tmbsnprintf)(indent_buf, sizeof(indent_buf), "margin-left: %dem",
                              2*indent);
 
             RenameElem( doc, node, TidyTag_DIV );
             TY_(AddStyleProperty)(doc, node, indent_buf );
+
+            if (node->content)
+            {
+                TY_(push)(stack, next);
+                node = node->content;
+                continue;
+            }
         }
         else if (node->content)
-            TY_(BQ2Div)( doc, node->content );
+        {
+            TY_(push)(stack, next);
+            node = node->content;
+            continue;
+        }
 
-        node = node->next;
+        node = next ? next : TY_(pop)(stack);
     }
 }
 
@@ -2736,30 +2749,42 @@ void TY_(FixAnchors)(TidyDocImpl* doc, Node *node, Bool wantName, Bool wantId)
  */
 static void StyleToHead(TidyDocImpl* doc, Node *head, Node *node, Bool fix, int indent)
 {
-	Node *next;
-	while (node)
-	{
-		next = node->next;	/* get 'next' now , in case the node is moved */
-		/* dbg_show_node(doc, node, 0, indent); */
-		if (nodeIsSTYLE(node))
-		{
-			if (fix)
-			{
-				TY_(RemoveNode)(node); /* unhook style node from body */
-				TY_(InsertNodeAtEnd)(head, node);   /* add to end of head */
-				TY_(Report)(doc, node, head, MOVED_STYLE_TO_HEAD); /* report move */
-			}
-			else
-			{
-				TY_(Report)(doc, node, head, FOUND_STYLE_IN_BODY);
-			}
-		}
-		else if (node->content)
-		{
-			StyleToHead(doc, head, node->content, fix, indent + 1);
-		}
-		node = next;	/* process the 'next', if any */
-	}
+    Stack *stack = TY_(newStack)(doc, 16);
+    Node *next;
+    
+    while (node)
+    {
+        next = node->next;
+        
+        if (nodeIsSTYLE(node))
+        {
+            if (fix)
+            {
+                TY_(RemoveNode)(node); /* unhook style node from body */
+                TY_(InsertNodeAtEnd)(head, node);   /* add to end of head */
+                TY_(Report)(doc, node, head, MOVED_STYLE_TO_HEAD); /* report move */
+            }
+            else
+            {
+                TY_(Report)(doc, node, head, FOUND_STYLE_IN_BODY);
+            }
+        }
+        else if (node->content)
+        {
+            TY_(push)(stack, next);
+            node = node->content;
+            indent++;
+            continue;
+        }
+        
+        if (next)
+            node = next;
+        else
+        {
+            node = TY_(pop)(stack);
+            indent--;
+        }
+    }
 }
 
 
diff --git a/src/lexer.c b/src/lexer.c
index 0fe5dd6..fa8d6fb 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -877,15 +877,6 @@ static tmbchar LastChar( tmbstr str )
     return 0;
 }
 
-/*
-   node->type is one of these:
-
-    #define TextNode    1
-    #define StartTag    2
-    #define EndTag      3
-    #define StartEndTag 4
-*/
-
 Lexer* TY_(NewLexer)( TidyDocImpl* doc )
 {
     Lexer* lexer = (Lexer*) TidyDocAlloc( doc, sizeof(Lexer) );
@@ -1545,13 +1536,7 @@ void TY_(FreeNode)( TidyDocImpl* doc, Node *node )
         }
     }
 #endif
-    /* this is no good ;=((
-    if (node && doc && doc->lexer) {
-        if (node == doc->lexer->token) {
-            doc->lexer->token = NULL; // TY_(NewNode)( doc->lexer->allocator, doc->lexer );
-        }
-    }
-      ----------------- */
+
     while ( node )
     {
         Node* next = node->next;
@@ -4462,11 +4447,102 @@ static Node *ParseDocTypeDecl(TidyDocImpl* doc)
     return NULL;
 }
 
-/*
- * local variables:
- * mode: c
- * indent-tabs-mode: nil
- * c-basic-offset: 4
- * eval: (c-set-offset 'substatement-open 0)
- * end:
+
+/****************************************************************************//*
+ ** MARK: - Node Stack
+ ***************************************************************************/
+
+
+/**
+ * Create a new stack with a given starting capacity. If memory allocation
+ * fails, then the allocator will panic the program automatically.
  */
+Stack* TY_(newStack)(TidyDocImpl *doc, uint capacity)
+{
+    Stack *stack = (Stack *)TidyAlloc(doc->allocator, sizeof(Stack));
+    stack->top = -1;
+    stack->capacity = capacity;
+    stack->firstNode = (Node **)TidyAlloc(doc->allocator, stack->capacity * sizeof(Node**));
+    stack->allocator = doc->allocator;
+    return stack;
+}
+ 
+
+/**
+ *  Increase the stack size. This will be called automatically when the
+ *  current stack is full. If memory allocation fails, then the allocator
+ *  will panic the program automatically.
+ */
+void TY_(growStack)(Stack *stack)
+{
+    uint new_capacity = stack->capacity * 2;
+    
+    Node **firstNode = (Node **)TidyAlloc(stack->allocator, new_capacity);
+    
+    memcpy( firstNode, stack->firstNode, sizeof(Node**) * (stack->top + 1) );
+    TidyFree(stack->allocator, stack->firstNode);
+
+    stack->firstNode = firstNode;
+    stack->capacity = new_capacity;
+}
+
+
+/**
+ * Stack is full when top is equal to the last index.
+ */
+Bool TY_(stackFull)(Stack *stack)
+{
+    return stack->top == stack->capacity - 1;
+}
+
+
+/**
+ * Stack is empty when top is equal to -1
+ */
+Bool TY_(stackEmpty)(Stack *stack)
+{
+    return stack->top == -1;
+}
+ 
+
+/**
+ * Push an item to the stack.
+ */
+void TY_(push)(Stack *stack, Node *node)
+{
+    if (TY_(stackFull)(stack))
+        TY_(growStack)(stack);
+    
+    if (node)
+        stack->firstNode[++stack->top] = node;
+}
+
+
+/**
+ * Pop an item from the stack.
+ */
+Node* TY_(pop)(Stack *stack)
+{
+    return TY_(stackEmpty)(stack) ? NULL : stack->firstNode[stack->top--];
+}
+
+
+/**
+ * Peek at the stack.
+ */
+FUNC_UNUSED Node* TY_(peek)(Stack *stack)
+{
+    return TY_(stackEmpty)(stack) ? NULL : stack->firstNode[stack->top--];
+}
+
+/**
+ *  Frees the stack when done.
+ */
+void TY_(freeStack)(Stack *stack)
+{
+    TidyFree( stack->allocator, stack->firstNode );
+    stack->top = -1;
+    stack->capacity = 0;
+    stack->firstNode = NULL;
+    stack->allocator = NULL;
+}
diff --git a/src/lexer.h b/src/lexer.h
index 113a9f4..9d49898 100644
--- a/src/lexer.h
+++ b/src/lexer.h
@@ -1,33 +1,46 @@
 #ifndef __LEXER_H__
 #define __LEXER_H__
 
-/* lexer.h -- Lexer for html parser
-  
-   (c) 1998-2008 (W3C) MIT, ERCIM, Keio University
-   See tidy.h for the copyright notice.
 
-  Given an input source, it returns a sequence of tokens.
-
-     GetToken(source) gets the next token
-     UngetToken(source) provides one level undo
-
-  The tags include an attribute list:
-
-    - linked list of attribute/value nodes
-    - each node has 2 NULL-terminated strings.
-    - entities are replaced in attribute values
-
-  white space is compacted if not in preformatted mode
-  If not in preformatted mode then leading white space
-  is discarded and subsequent white space sequences
-  compacted to single space characters.
-
-  If XmlTags is no then Tag names are folded to upper
-  case and attribute names to lower case.
-
- Not yet done:
-    -   Doctype subset and marked sections
-*/
+/**************************************************************************//**
+ * @file
+ * Lexer for HTML and XML Parsers.
+ *
+ *   Given an input source, it returns a sequence of tokens.
+ *
+ *      GetToken(source) gets the next token
+ *      UngetToken(source) provides one level undo
+ *
+ *   The tags include an attribute list:
+ *
+ *     - linked list of attribute/value nodes
+ *     - each node has 2 NULL-terminated strings.
+ *     - entities are replaced in attribute values
+ *
+ *   white space is compacted if not in preformatted mode
+ *   If not in preformatted mode then leading white space
+ *   is discarded and subsequent white space sequences
+ *   compacted to single space characters.
+ *
+ *   If XmlTags is no then Tag names are folded to upper
+ *   case and attribute names to lower case.
+ *
+ *  Not yet done:
+ *     - Doctype subset and marked sections
+ *
+ * @author  HTACG, et al (consult git log)
+ *
+ * @copyright
+ *     (c) 1998-2021 (W3C) MIT, ERCIM, Keio University, and HTACG.
+ *     See tidy.h for the copyright notice.
+ * @par
+ *     All Rights Reserved.
+ * @par
+ *     See `tidy.h` for the complete license.
+ *
+ * @date Additional updates: consult git log
+ *
+ ******************************************************************************/
 
 #ifdef __cplusplus
 extern "C" {
@@ -35,8 +48,23 @@ extern "C" {
 
 #include "forward.h"
 
-/* lexer character types
-*/
+/** @addtogroup internal_api */
+/** @{ */
+
+
+/***************************************************************************//**
+ ** @defgroup lexer_h HTML and XML Lexing
+ **
+ ** These functions and structures form the internal API for document
+ ** lexing.
+ **
+ ** @{
+ ******************************************************************************/
+
+
+/**
+ *  Lexer character types.
+ */
 #define digit       1u
 #define letter      2u
 #define namechar    4u
@@ -47,8 +75,9 @@ extern "C" {
 #define digithex    128u
 
 
-/* node->type is one of these values
-*/
+/**
+ *  node->type is one of these values
+ */
 typedef enum
 {
   RootNode,
@@ -68,9 +97,9 @@ typedef enum
 } NodeType;
 
 
-
-/* lexer GetToken states
-*/
+/**
+ *  Lexer GetToken() states.
+ */
 typedef enum
 {
   LEX_CONTENT,
@@ -88,7 +117,10 @@ typedef enum
   LEX_XMLDECL
 } LexerState;
 
-/* ParseDocTypeDecl state constants */
+
+/**
+ *  ParseDocTypeDecl state constants.
+ */
 typedef enum
 {
   DT_INTERMEDIATE,
@@ -98,67 +130,44 @@ typedef enum
   DT_INTSUBSET
 } ParseDocTypeDeclState;
 
-/* content model shortcut encoding
 
-   Descriptions are tentative.
-*/
+/**
+ *  Content model shortcut encoding.
+ *  Descriptions are tentative.
+ */
 #define CM_UNKNOWN      0
-/* Elements with no content. Map to HTML specification. */
-#define CM_EMPTY        (1 << 0)
-/* Elements that appear outside of "BODY". */
-#define CM_HTML         (1 << 1)
-/* Elements that can appear within HEAD. */
-#define CM_HEAD         (1 << 2)
-/* HTML "block" elements. */
-#define CM_BLOCK        (1 << 3)
-/* HTML "inline" elements. */
-#define CM_INLINE       (1 << 4)
-/* Elements that mark list item ("LI"). */
-#define CM_LIST         (1 << 5)
-/* Elements that mark definition list item ("DL", "DT"). */
-#define CM_DEFLIST      (1 << 6)
-/* Elements that can appear inside TABLE. */
-#define CM_TABLE        (1 << 7)
-/* Used for "THEAD", "TFOOT" or "TBODY". */
-#define CM_ROWGRP       (1 << 8)
-/* Used for "TD", "TH" */
-#define CM_ROW          (1 << 9)
-/* Elements whose content must be protected against white space movement.
-   Includes some elements that can found in forms. */
-#define CM_FIELD        (1 << 10)
-/* Used to avoid propagating inline emphasis inside some elements
-   such as OBJECT or APPLET. */
-#define CM_OBJECT       (1 << 11)
-/* Elements that allows "PARAM". */
-#define CM_PARAM        (1 << 12)
-/* "FRAME", "FRAMESET", "NOFRAMES". Used in ParseFrameSet. */
-#define CM_FRAMES       (1 << 13)
-/* Heading elements (h1, h2, ...). */
-#define CM_HEADING      (1 << 14)
-/* Elements with an optional end tag. */
-#define CM_OPT          (1 << 15)
-/* Elements that use "align" attribute for vertical position. */
-#define CM_IMG          (1 << 16)
-/* Elements with inline and block model. Used to avoid calling InlineDup. */
-#define CM_MIXED        (1 << 17)
-/* Elements whose content needs to be indented only if containing one 
-   CM_BLOCK element. */
-#define CM_NO_INDENT    (1 << 18)
-/* Elements that are obsolete (such as "dir", "menu"). */
-#define CM_OBSOLETE     (1 << 19)
-/* User defined elements. Used to determine how attributes without value
-   should be printed. */
-#define CM_NEW          (1 << 20)
-/* Elements that cannot be omitted. */
-#define CM_OMITST       (1 << 21)
+#define CM_EMPTY        (1 << 0)   /**< Elements with no content. Map to HTML specification. */
+#define CM_HTML         (1 << 1)   /**< Elements that appear outside of "BODY". */
+#define CM_HEAD         (1 << 2)   /**< Elements that can appear within HEAD. */
+#define CM_BLOCK        (1 << 3)   /**< HTML "block" elements. */
+#define CM_INLINE       (1 << 4)   /**< HTML "inline" elements. */
+#define CM_LIST         (1 << 5)   /**< Elements that mark list item ("LI"). */
+#define CM_DEFLIST      (1 << 6)   /**< Elements that mark definition list item ("DL", "DT"). */
+#define CM_TABLE        (1 << 7)   /**< Elements that can appear inside TABLE. */
+#define CM_ROWGRP       (1 << 8)   /**< Used for "THEAD", "TFOOT" or "TBODY". */
+#define CM_ROW          (1 << 9)   /**< Used for "TD", "TH" */
+#define CM_FIELD        (1 << 10)  /**< Elements whose content must be protected against white space movement. Includes some elements that can found in forms. */
+#define CM_OBJECT       (1 << 11)  /**< Used to avoid propagating inline emphasis inside some elements such as OBJECT or APPLET. */
+#define CM_PARAM        (1 << 12)  /**< Elements that allows "PARAM". */
+#define CM_FRAMES       (1 << 13)  /**< "FRAME", "FRAMESET", "NOFRAMES". Used in ParseFrameSet. */
+#define CM_HEADING      (1 << 14)  /**< Heading elements (h1, h2, ...). */
+#define CM_OPT          (1 << 15)  /**< Elements with an optional end tag. */
+#define CM_IMG          (1 << 16)  /**< Elements that use "align" attribute for vertical position. */
+#define CM_MIXED        (1 << 17)  /**< Elements with inline and block model. Used to avoid calling InlineDup. */
+#define CM_NO_INDENT    (1 << 18)  /**< Elements whose content needs to be indented only if containing one CM_BLOCK element. */
+#define CM_OBSOLETE     (1 << 19)  /**< Elements that are obsolete (such as "dir", "menu"). */
+#define CM_NEW          (1 << 20)  /**< User defined elements. Used to determine how attributes without value should be printed. */
+#define CM_OMITST       (1 << 21)   /**< Elements that cannot be omitted. */
 
-/* If the document uses just HTML 2.0 tags and attributes described
-** it as HTML 2.0 Similarly for HTML 3.2 and the 3 flavors of HTML 4.0.
-** If there are proprietary tags and attributes then describe it as
-** HTML Proprietary. If it includes the xml-lang or xmlns attributes
-** but is otherwise HTML 2.0, 3.2 or 4.0 then describe it as one of the
-** flavors of Voyager (strict, loose or frameset).
-*/
+
+/**
+ *  If the document uses just HTML 2.0 tags and attributes described
+ *  it is HTML 2.0. Similarly for HTML 3.2 and the 3 flavors of HTML 4.0.
+ *  If there are proprietary tags and attributes then describe it as
+ *  HTML Proprietary. If it includes the xml-lang or xmlns attributes
+ *  but is otherwise HTML 2.0, 3.2 or 4.0 then describe it as one of the
+ *  flavors of Voyager (strict, loose or frameset).
+ */
 
 /* unknown */
 #define xxxx                   0u
@@ -220,8 +229,10 @@ typedef enum
 /* all proprietary types */
 #define VERS_PROPRIETARY   (VERS_NETSCAPE|VERS_MICROSOFT|VERS_SUN)
 
-/* Linked list of class names and styles
-*/
+
+/**
+ *  Linked list of class names and styles
+ */
 struct _Style;
 typedef struct _Style TagStyle;
 
@@ -234,8 +245,9 @@ struct _Style
 };
 
 
-/* Linked list of style properties
-*/
+/**
+ *  Linked list of style properties
+ */
 struct _StyleProp;
 typedef struct _StyleProp StyleProp;
 
@@ -247,11 +259,9 @@ struct _StyleProp
 };
 
 
-
-
-/* Attribute/Value linked list node
-*/
-
+/**
+ *  Attribute/Value linked list node
+ */
 struct _AttVal
 {
     AttVal*           next;
@@ -264,93 +274,89 @@ struct _AttVal
 };
 
 
-
-/*
-  Mosaic handles inlines via a separate stack from other elements
-  We duplicate this to recover from inline markup errors such as:
-
-     <i>italic text
-     <p>more italic text</b> normal text
-
-  which for compatibility with Mosaic is mapped to:
-
-     <i>italic text</i>
-     <p><i>more italic text</i> normal text
-
-  Note that any inline end tag pop's the effect of the current
-  inline start tag, so that </b> pop's <i> in the above example.
+/**
+ *  Mosaic handles inlines via a separate stack from other elements
+ *  We duplicate this to recover from inline markup errors such as:
+ *     ~~~
+ *     <i>italic text
+ *     <p>more italic text</b> normal text
+ *     ~~~
+ *  which for compatibility with Mosaic is mapped to:
+ *     ~~~
+ *     <i>italic text</i>
+ *     <p><i>more italic text</i> normal text
+ *     ~~~
+ *  Note that any inline end tag pop's the effect of the current
+ *  inline start tag, so that `</b>` pop's `<i>` in the above example.
 */
 struct _IStack
 {
     IStack*     next;
-    const Dict* tag;        /* tag's dictionary definition */
-    tmbstr      element;    /* name (NULL for text nodes) */
+    const Dict* tag;        /**< tag's dictionary definition */
+    tmbstr      element;    /**< name (NULL for text nodes) */
     AttVal*     attributes;
 };
 
 
-/* HTML/XHTML/XML Element, Comment, PI, DOCTYPE, XML Decl,
-** etc. etc.
-*/
-
+/**
+ *  HTML/XHTML/XML Element, Comment, PI, DOCTYPE, XML Decl, etc., etc.
+ */
 struct _Node
 {
-    Node*       parent;         /* tree structure */
+    Node*       parent;         /**< tree structure */
     Node*       prev;
     Node*       next;
     Node*       content;
     Node*       last;
 
     AttVal*     attributes;
-    const Dict* was;            /* old tag when it was changed */
-    const Dict* tag;            /* tag's dictionary definition */
+    const Dict* was;            /**< old tag when it was changed */
+    const Dict* tag;            /**< tag's dictionary definition */
 
-    tmbstr      element;        /* name (NULL for text nodes) */
+    tmbstr      element;        /**< name (NULL for text nodes) */
 
-    uint        start;          /* start of span onto text array */
-    uint        end;            /* end of span onto text array */
-    NodeType    type;           /* TextNode, StartTag, EndTag etc. */
+    uint        start;          /**< start of span onto text array */
+    uint        end;            /**< end of span onto text array */
+    NodeType    type;           /**< TextNode, StartTag, EndTag etc. */
 
-    uint        line;           /* current line of document */
-    uint        column;         /* current column of document */
+    uint        line;           /**< current line of document */
+    uint        column;         /**< current column of document */
 
-    Bool        closed;         /* true if closed by explicit end tag */
-    Bool        implicit;       /* true if inferred */
-    Bool        linebreak;      /* true if followed by a line break */
+    Bool        closed;         /**< true if closed by explicit end tag */
+    Bool        implicit;       /**< true if inferred */
+    Bool        linebreak;      /**< true if followed by a line break */
 };
 
 
-/*
-  The following are private to the lexer
-  Use NewLexer() to create a lexer, and
-  FreeLexer() to free it.
-*/
-
+/**
+ *  The following are private to the lexer.
+ *  Use `NewLexer()` to create a lexer, and `FreeLexer()` to free it.
+ */
 struct _Lexer
 {
-    uint lines;             /* lines seen */
-    uint columns;           /* at start of current token */
-    Bool waswhite;          /* used to collapse contiguous white space */
-    Bool pushed;            /* true after token has been pushed back */
-    Bool insertspace;       /* when space is moved after end tag */
-    Bool excludeBlocks;     /* Netscape compatibility */
-    Bool exiled;            /* true if moved out of table */
-    Bool isvoyager;         /* true if xmlns attribute on html element */
-    uint versions;          /* bit vector of HTML versions */
-    uint doctype;           /* version as given by doctype (if any) */
-    uint versionEmitted;    /* version of doctype emitted */
-    Bool bad_doctype;       /* e.g. if html or PUBLIC is missing */
-    uint txtstart;          /* start of current node */
-    uint txtend;            /* end of current node */
-    LexerState state;       /* state of lexer's finite state machine */
+    uint lines;                /**< lines seen */
+    uint columns;              /**< at start of current token */
+    Bool waswhite;             /**< used to collapse contiguous white space */
+    Bool pushed;               /**< true after token has been pushed back */
+    Bool insertspace;          /**< when space is moved after end tag */
+    Bool excludeBlocks;        /**< Netscape compatibility */
+    Bool exiled;               /**< true if moved out of table */
+    Bool isvoyager;            /**< true if xmlns attribute on html element (i.e., "Voyager" was the W3C codename for XHTML). */
+    uint versions;             /**< bit vector of HTML versions */
+    uint doctype;              /**< version as given by doctype (if any) */
+    uint versionEmitted;       /**< version of doctype emitted */
+    Bool bad_doctype;          /**< e.g. if html or PUBLIC is missing */
+    uint txtstart;             /**< start of current node */
+    uint txtend;               /**< end of current node */
+    LexerState state;          /**< state of lexer's finite state machine */
 
-    Node* token;            /* last token returned by GetToken() */
-    Node* itoken;           /* last duplicate inline returned by GetToken() */
-    Node* root;             /* remember root node of the document */
-    Node* parent;           /* remember parent node for CDATA elements */
-    
-    Bool seenEndBody;       /* true if a </body> tag has been encountered */
-    Bool seenEndHtml;       /* true if a </html> tag has been encountered */
+    Node* token;               /**< last token returned by GetToken() */
+    Node* itoken;              /**< last duplicate inline returned by GetToken() */
+    Node* root;                /**< remember root node of the document */
+    Node* parent;              /**< remember parent node for CDATA elements */
+
+    Bool seenEndBody;          /**< true if a `</body>` tag has been encountered */
+    Bool seenEndHtml;          /**< true if a `</html>` tag has been encountered */
 
     /*
       Lexer character buffer
@@ -361,33 +367,57 @@ struct _Lexer
 
       lexsize must be reset for each file.
     */
-    tmbstr lexbuf;          /* MB character buffer */
-    uint lexlength;         /* allocated */
-    uint lexsize;           /* used */
+    tmbstr lexbuf;             /**< MB character buffer */
+    uint lexlength;            /**< allocated */
+    uint lexsize;              /**< used */
 
     /* Inline stack for compatibility with Mosaic */
-    Node* inode;            /* for deferring text node */
-    IStack* insert;         /* for inferring inline tags */
+    Node* inode;               /**< for deferring text node */
+    IStack* insert;            /**< for inferring inline tags */
     IStack* istack;
-    uint istacklength;      /* allocated */
-    uint istacksize;        /* used */
-    uint istackbase;        /* start of frame */
+    uint istacklength;         /**< allocated */
+    uint istacksize;           /**< used */
+    uint istackbase;           /**< start of frame */
 
-    TagStyle *styles;          /* used for cleaning up presentation markup */
+    TagStyle *styles;          /**< used for cleaning up presentation markup */
 
-    TidyAllocator* allocator; /* allocator */
+    TidyAllocator* allocator;  /**< allocator */
 };
 
 
-/* Lexer Functions
-*/
+/**
+ *  modes for GetToken()
+ *
+ *  MixedContent   -- for elements which don't accept PCDATA
+ *  Preformatted   -- white space preserved as is
+ *  IgnoreMarkup   -- for CDATA elements such as script, style
+ */
+typedef enum
+{
+  IgnoreWhitespace,
+  MixedContent,
+  Preformatted,
+  IgnoreMarkup,
+  OtherNamespace,
+  CdataContent
+} GetTokenMode;
 
-/* choose what version to use for new doctype */
+
+/** @name Lexer Functions
+ *  @{
+ */
+
+
+/**
+ *  Choose what version to use for new doctype
+ */
 TY_PRIVATE int TY_(HTMLVersion)( TidyDocImpl* doc );
 
-/* everything is allowed in proprietary version of HTML */
-/* this is handled here rather than in the tag/attr dicts */
 
+/**
+ *  Everything is allowed in proprietary version of HTML.
+ *  This is handled here rather than in the tag/attr dicts
+ */
 TY_PRIVATE void TY_(ConstrainVersion)( TidyDocImpl* doc, uint vers );
 
 TY_PRIVATE Bool TY_(IsWhite)(uint c);
@@ -399,7 +429,6 @@ TY_PRIVATE Bool TY_(IsNamechar)(uint c);
 TY_PRIVATE Bool TY_(IsXMLLetter)(uint c);
 TY_PRIVATE Bool TY_(IsXMLNamechar)(uint c);
 
-/* Bool IsLower(uint c); */
 TY_PRIVATE Bool TY_(IsUpper)(uint c);
 TY_PRIVATE uint TY_(ToLower)(uint c);
 TY_PRIVATE uint TY_(ToUpper)(uint c);
@@ -407,60 +436,82 @@ TY_PRIVATE uint TY_(ToUpper)(uint c);
 TY_PRIVATE Lexer* TY_(NewLexer)( TidyDocImpl* doc );
 TY_PRIVATE void TY_(FreeLexer)( TidyDocImpl* doc );
 
-/* store character c as UTF-8 encoded byte stream */
+
+/**
+ *  Store character c as UTF-8 encoded byte stream
+ */
 TY_PRIVATE void TY_(AddCharToLexer)( Lexer *lexer, uint c );
 
-/*
-  Used for elements and text nodes
-  element name is NULL for text nodes
-  start and end are offsets into lexbuf
-  which contains the textual content of
-  all elements in the parse tree.
 
-  parent and content allow traversal
-  of the parse tree in any direction.
-  attributes are represented as a linked
-  list of AttVal nodes which hold the
-  strings for attribute/value pairs.
+/**
+ *  Used for elements and text nodes.
+ *   - Element name is NULL for text nodes.
+ *   - start and end are offsets into lexbuf,
+ *     which contains the textual content of
+ *     all elements in the parse tree.
+ *   - parent and content allow traversal
+ *     of the parse tree in any direction.
+ *   - attributes are represented as a linked
+ *     list of AttVal nodes which hold the
+ *     strings for attribute/value pairs.
 */
 TY_PRIVATE Node* TY_(NewNode)( TidyAllocator* allocator, Lexer* lexer );
 
 
-/* used to clone heading nodes when split by an <HR> */
+/**
+ *  Used to clone heading nodes when split by an `<HR>`
+ */
 TY_PRIVATE Node* TY_(CloneNode)( TidyDocImpl* doc, Node *element );
 
-/* free node's attributes */
+
+/**
+ *  Free node's attributes
+ */
 TY_PRIVATE void TY_(FreeAttrs)( TidyDocImpl* doc, Node *node );
 
-/* doesn't repair attribute list linkage */
+
+/**
+ *  Doesn't repair attribute list linkage
+ */
 TY_PRIVATE void TY_(FreeAttribute)( TidyDocImpl* doc, AttVal *av );
 
-/* detach attribute from node */
+
+/**
+ * Detach attribute from node
+ */
 TY_PRIVATE void TY_(DetachAttribute)( Node *node, AttVal *attr );
 
-/* detach attribute from node then free it
-*/
+
+/**
+ *  Detach attribute from node then free it.
+ */
 TY_PRIVATE void TY_(RemoveAttribute)( TidyDocImpl* doc, Node *node, AttVal *attr );
 
-/*
-  Free document nodes by iterating through peers and recursing
-  through children. Set next to NULL before calling FreeNode()
-  to avoid freeing peer nodes. Doesn't patch up prev/next links.
+
+/**
+ *  Free document nodes by iterating through peers and recursing
+ *  through children. Set `next` to `NULL` before calling `FreeNode()`
+ *  to avoid freeing peer nodes. Doesn't patch up prev/next links.
  */
 TY_PRIVATE void TY_(FreeNode)( TidyDocImpl* doc, Node *node );
 
+
 TY_PRIVATE Node* TY_(TextToken)( Lexer *lexer );
 
-/* used for creating preformatted text from Word2000 */
+
+/**
+ *  Used for creating preformatted text from Word2000.
+ */
 TY_PRIVATE Node* TY_(NewLineNode)( Lexer *lexer );
 
-/* used for adding a &nbsp; for Word2000 */
+
+/**
+ *  Used for adding a &nbsp; for Word2000.
+ */
 TY_PRIVATE Node* TY_(NewLiteralTextNode)(Lexer *lexer, ctmbstr txt );
 
-TY_PRIVATE void TY_(AddStringLiteral)( Lexer* lexer, ctmbstr str );
-/* TY_PRIVATE void AddStringLiteralLen( Lexer* lexer, ctmbstr str, int len ); */
 
-/* find element */
+TY_PRIVATE void TY_(AddStringLiteral)( Lexer* lexer, ctmbstr str );
 TY_PRIVATE Node* TY_(FindDocType)( TidyDocImpl* doc );
 TY_PRIVATE Node* TY_(FindHTML)( TidyDocImpl* doc );
 TY_PRIVATE Node* TY_(FindHEAD)( TidyDocImpl* doc );
@@ -468,10 +519,16 @@ TY_PRIVATE Node* TY_(FindTITLE)(TidyDocImpl* doc);
 TY_PRIVATE Node* TY_(FindBody)( TidyDocImpl* doc );
 TY_PRIVATE Node* TY_(FindXmlDecl)(TidyDocImpl* doc);
 
-/* Returns containing block element, if any */
+
+/**
+ *  Returns containing block element, if any
+ */
 TY_PRIVATE Node* TY_(FindContainer)( Node* node );
 
-/* add meta element for Tidy */
+
+/**
+ *  Add meta element for Tidy.
+ */
 TY_PRIVATE Bool TY_(AddGenerator)( TidyDocImpl* doc );
 
 TY_PRIVATE uint TY_(ApparentVersion)( TidyDocImpl* doc );
@@ -485,118 +542,209 @@ TY_PRIVATE Bool TY_(WarnMissingSIInEmittedDocType)( TidyDocImpl* doc );
 TY_PRIVATE Bool TY_(SetXHTMLDocType)( TidyDocImpl* doc );
 
 
-/* fixup doctype if missing */
+/**
+ *  Fixup doctype if missing.
+ */
 TY_PRIVATE Bool TY_(FixDocType)( TidyDocImpl* doc );
 
-/* ensure XML document starts with <?xml version="1.0"?> */
-/* add encoding attribute if not using ASCII or UTF-8 output */
+
+/**
+ *  Ensure XML document starts with <?xml version="1.0"?>,and
+ *  add encoding attribute if not using ASCII or UTF-8 output.
+ */
 TY_PRIVATE Bool TY_(FixXmlDecl)( TidyDocImpl* doc );
 
+
 TY_PRIVATE Node* TY_(InferredTag)(TidyDocImpl* doc, TidyTagId id);
 
 TY_PRIVATE void TY_(UngetToken)( TidyDocImpl* doc );
 
-
-/*
-  modes for GetToken()
-
-  MixedContent   -- for elements which don't accept PCDATA
-  Preformatted   -- white space preserved as is
-  IgnoreMarkup   -- for CDATA elements such as script, style
-*/
-typedef enum
-{
-  IgnoreWhitespace,
-  MixedContent,
-  Preformatted,
-  IgnoreMarkup,
-  OtherNamespace,
-  CdataContent
-} GetTokenMode;
-
 TY_PRIVATE Node* TY_(GetToken)( TidyDocImpl* doc, GetTokenMode mode );
 
 TY_PRIVATE void TY_(InitMap)(void);
 
 
-/* create a new attribute */
+/**
+ *  Create a new attribute.
+ */
 TY_PRIVATE AttVal* TY_(NewAttribute)( TidyDocImpl* doc );
 
-/* create a new attribute with given name and value */
+
+/**
+ *  Create a new attribute with given name and value.
+ */
 TY_PRIVATE AttVal* TY_(NewAttributeEx)( TidyDocImpl* doc, ctmbstr name, ctmbstr value,
                              int delim );
 
-/* insert attribute at the end of attribute list of a node */
+
+/**
+ *  Insert attribute at the end of attribute list of a node.
+ */
 TY_PRIVATE void TY_(InsertAttributeAtEnd)( Node *node, AttVal *av );
 
-/* insert attribute at the start of attribute list of a node */
+/**
+ *  Insert attribute at the start of attribute list of a node.
+ */
 TY_PRIVATE void TY_(InsertAttributeAtStart)( Node *node, AttVal *av );
 
-/*************************************
-  In-line Stack functions
-*************************************/
+
+/** @}
+ *  @name Inline Stack Functions
+ *  @{
+ */
 
 
-/* duplicate attributes */
+/**
+ *  Duplicate attributes.
+ */
 TY_PRIVATE AttVal* TY_(DupAttrs)( TidyDocImpl* doc, AttVal* attrs );
 
-/*
-  push a copy of an inline node onto stack
-  but don't push if implicit or OBJECT or APPLET
-  (implicit tags are ones generated from the istack)
 
-  One issue arises with pushing inlines when
-  the tag is already pushed. For instance:
-
-      <p><em>text
-      <p><em>more text
-
-  Shouldn't be mapped to
-
-      <p><em>text</em></p>
-      <p><em><em>more text</em></em>
-*/
+/**
+ *  Push a copy of an inline node onto stack, but don't push if
+ *  implicit or OBJECT or APPLET (implicit tags are ones generated
+ *  from the istack).
+ *
+ *  One issue arises with pushing inlines when the tag is already pushed.
+ *  For instance:
+ *    ~~~
+ *    <p><em>text
+ *    <p><em>more text
+ *    ~~~
+ *  Shouldn't be mapped to
+ *    ~~~
+ *    <p><em>text</em></p>
+ *    <p><em><em>more text</em></em>
+ *    ~~~
+ */
 TY_PRIVATE void TY_(PushInline)( TidyDocImpl* doc, Node* node );
 
-/* pop inline stack */
+
+/**
+ * Pop inline stack.
+ */
 TY_PRIVATE void TY_(PopInline)( TidyDocImpl* doc, Node* node );
 
+
 TY_PRIVATE Bool TY_(IsPushed)( TidyDocImpl* doc, Node* node );
 TY_PRIVATE Bool TY_(IsPushedLast)( TidyDocImpl* doc, Node *element, Node *node );
 
-/*
-  This has the effect of inserting "missing" inline
-  elements around the contents of blocklevel elements
-  such as P, TD, TH, DIV, PRE etc. This procedure is
-  called at the start of ParseBlock. when the inline
-  stack is not empty, as will be the case in:
 
-    <i><h1>italic heading</h1></i>
-
-  which is then treated as equivalent to
-
-    <h1><i>italic heading</i></h1>
-
-  This is implemented by setting the lexer into a mode
-  where it gets tokens from the inline stack rather than
-  from the input stream.
-*/
+/**
+ *  This has the effect of inserting "missing" inline elements around the
+ *  contents of blocklevel elements such as P, TD, TH, DIV, PRE etc. This
+ *  procedure is called at the start of `ParseBlock`, when the inline
+ *  stack is not empty, as will be the case in:
+ *    ~~~
+ *    <i><h1>italic heading</h1></i>
+ *    ~~~
+ *  which is then treated as equivalent to
+ *    ~~~
+ *    <h1><i>italic heading</i></h1>
+ *    ~~~
+ *  This is implemented by setting the lexer into a mode where it gets
+ *  tokens from the inline stack rather than from the input stream.
+ */
 TY_PRIVATE int TY_(InlineDup)( TidyDocImpl* doc, Node *node );
 
-/*
- defer duplicates when entering a table or other
- element where the inlines shouldn't be duplicated
-*/
+
+/**
+ *  Fefer duplicates when entering a table or other
+ *  element where the inlines shouldn't be duplicated.
+ */
 TY_PRIVATE void TY_(DeferDup)( TidyDocImpl* doc );
+
+
 TY_PRIVATE Node* TY_(InsertedToken)( TidyDocImpl* doc );
 
-/* stack manipulation for inline elements */
+/**
+ *  Stack manipulation for inline elements
+ */
 TY_PRIVATE Bool TY_(SwitchInline)( TidyDocImpl* doc, Node* element, Node* node );
+
+
 TY_PRIVATE Bool TY_(InlineDup1)( TidyDocImpl* doc, Node* node, Node* element );
 
+
+/** @}
+ *  @name Generic stack of nodes.
+ *  @{
+ */
+
+
+/**
+ * This typedef represents a stack of addresses to nodes. Tidy uses these to
+ * try to limit recursion by pushing nodes to a stack when possible instead
+ * of recursing.
+ */
+typedef struct _Stack {
+    int top;                        /**< Current top position. */
+    unsigned capacity;              /**< Current capacity. Can be expanded. */
+    Node **firstNode;               /** A pointer to the first pointer to a Node in an array of node addresses. */
+    TidyAllocator* allocator;       /**< Tidy's allocator, used at instantiation and expanding. */
+} Stack;
+ 
+
+/**
+ * Create a new stack with a given starting capacity. If memory allocation
+ * fails, then the allocator will panic the program automatically.
+ */
+TY_PRIVATE Stack* TY_(newStack)(TidyDocImpl *doc, uint capacity);
+ 
+
+/**
+ *  Increase the stack size. This will be called automatically when the
+ *  current stack is full. If memory allocation fails, then the allocator
+ *  will panic the program automatically.
+ */
+TY_PRIVATE void TY_(growStack)(Stack *stack);
+
+
+/**
+ * Stack is full when top is equal to the last index.
+ */
+TY_PRIVATE Bool TY_(stackFull)(Stack *stack);
+
+
+/**
+ * Stack is empty when top is equal to -1
+ */
+TY_PRIVATE Bool TY_(stackEmpty)(Stack *stack);
+ 
+
+/**
+ * Push an item to the stack.
+ */
+TY_PRIVATE void TY_(push)(Stack *stack, Node *node);
+
+
+/**
+ * Pop an item from the stack.
+ */
+TY_PRIVATE Node* TY_(pop)(Stack *stack);
+
+
+/**
+ * Peek at the stack.
+ */
+TY_PRIVATE Node* TY_(peek)(Stack *stack);
+
+/**
+ *  Frees the stack when done.
+ */
+TY_PRIVATE void TY_(freeStack)(Stack *stack);
+
+
+/** @}
+ */
+
+
 #ifdef __cplusplus
 }
 #endif
 
 
+/** @} end parser_h group */
+/** @} end internal_api group */
+
 #endif /* __LEXER_H__ */
diff --git a/src/parser.c b/src/parser.c
index eab7393..2a4fcb9 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -14,161 +14,37 @@
 #include "tmbstr.h"
 #include "sprtf.h"
 
-/*
-  Issue #72 - Need to know to avoid error-reporting - no warning only if --show-body-only yes
-  Issue #132 - likewise avoid warning if showing body only
+
+/****************************************************************************//*
+ ** MARK: - Forward Declarations
+ ***************************************************************************/
+
+
+static void ParseTag( TidyDocImpl* doc, Node *node, GetTokenMode mode );
+
+
+/****************************************************************************//*
+ ** MARK: - Configuration Options
+ ***************************************************************************/
+
+
+/**
+ *  Issue #72  - Need to know to avoid error-reporting - no warning only if
+ *               --show-body-only yes.
+ *  Issue #132 - Likewise avoid warning if showing body only.
  */
 #define showingBodyOnly(doc) (cfgAutoBool(doc,TidyBodyOnly) == TidyYesState) ? yes : no
 
 
-Bool TY_(CheckNodeIntegrity)(Node *node)
-{
-#ifndef NO_NODE_INTEGRITY_CHECK
-    Node *child;
+/****************************************************************************//*
+ ** MARK: - Node Operations
+ ***************************************************************************/
 
-    if (node->prev)
-    {
-        if (node->prev->next != node)
-            return no;
-    }
 
-    if (node->next)
-    {
-        if (node->next == node || node->next->prev != node)
-            return no;
-    }
-
-    if (node->parent)
-    {
-        if (node->prev == NULL && node->parent->content != node)
-            return no;
-
-        if (node->next == NULL && node->parent->last != node)
-            return no;
-    }
-
-    for (child = node->content; child; child = child->next)
-        if ( child->parent != node || !TY_(CheckNodeIntegrity)(child) )
-            return no;
-
-#endif
-    return yes;
-}
-
-/*
- used to determine how attributes
- without values should be printed
- this was introduced to deal with
- user defined tags e.g. ColdFusion
-*/
-Bool TY_(IsNewNode)(Node *node)
-{
-    if (node && node->tag)
-    {
-        return (node->tag->model & CM_NEW);
-    }
-    return yes;
-}
-
-void TY_(CoerceNode)(TidyDocImpl* doc, Node *node, TidyTagId tid, Bool obsolete, Bool unexpected)
-{
-    const Dict* tag = TY_(LookupTagDef)(tid);
-    Node* tmp = TY_(InferredTag)(doc, tag->id);
-
-    if (obsolete)
-        TY_(Report)(doc, node, tmp, OBSOLETE_ELEMENT);
-    else if (unexpected)
-        TY_(Report)(doc, node, tmp, REPLACING_UNEX_ELEMENT);
-    else
-        TY_(Report)(doc, node, tmp, REPLACING_ELEMENT);
-
-    TidyDocFree(doc, tmp->element);
-    TidyDocFree(doc, tmp);
-
-    node->was = node->tag;
-    node->tag = tag;
-    node->type = StartTag;
-    node->implicit = yes;
-    TidyDocFree(doc, node->element);
-    node->element = TY_(tmbstrdup)(doc->allocator, tag->name);
-}
-
-/* extract a node and its children from a markup tree */
-Node *TY_(RemoveNode)(Node *node)
-{
-    if (node->prev)
-        node->prev->next = node->next;
-
-    if (node->next)
-        node->next->prev = node->prev;
-
-    if (node->parent)
-    {
-        if (node->parent->content == node)
-            node->parent->content = node->next;
-
-        if (node->parent->last == node)
-            node->parent->last = node->prev;
-    }
-
-    node->parent = node->prev = node->next = NULL;
-    return node;
-}
-
-/* remove node from markup tree and discard it */
-Node *TY_(DiscardElement)( TidyDocImpl* doc, Node *element )
-{
-    Node *next = NULL;
-
-    if (element)
-    {
-        next = element->next;
-        TY_(RemoveNode)(element);
-        TY_(FreeNode)( doc, element);
-    }
-
-    return next;
-}
-
-/*
- insert "node" into markup tree as the first element
- of content of "element"
-*/
-void TY_(InsertNodeAtStart)(Node *element, Node *node)
-{
-    node->parent = element;
-
-    if (element->content == NULL)
-        element->last = node;
-    else
-        element->content->prev = node;
-
-    node->next = element->content;
-    node->prev = NULL;
-    element->content = node;
-}
-
-/*
- insert "node" into markup tree as the last element
- of content of "element"
-*/
-void TY_(InsertNodeAtEnd)(Node *element, Node *node)
-{
-    node->parent = element;
-    node->prev = element->last;
-
-    if (element->last != NULL)
-        element->last->next = node;
-    else
-        element->content = node;
-
-    element->last = node;
-}
-
-/*
- insert "node" into markup tree in place of "element"
- which is moved to become the child of the node
-*/
+/**
+ *  Insert "node" into markup tree in place of "element"
+ *  which is moved to become the child of the node
+ */
 static void InsertNodeAsParent(Node *element, Node *node)
 {
     node->content = element;
@@ -195,47 +71,172 @@ static void InsertNodeAsParent(Node *element, Node *node)
         node->next->prev = node;
 }
 
-/* insert "node" into markup tree before "element" */
-void TY_(InsertNodeBeforeElement)(Node *element, Node *node)
+
+/**
+ *  Inserts node into element at an appropriate location based
+ *  on the type of node being inserted.
+ */
+static Bool InsertMisc(Node *element, Node *node)
 {
-    Node *parent;
-
-    parent = element->parent;
-    node->parent = parent;
-    node->next = element;
-    node->prev = element->prev;
-    element->prev = node;
-
-    if (node->prev)
-        node->prev->next = node;
-
-    if (parent->content == element)
-        parent->content = node;
-}
-
-/* insert "node" into markup tree after "element" */
-void TY_(InsertNodeAfterElement)(Node *element, Node *node)
-{
-    Node *parent;
-
-    parent = element->parent;
-    node->parent = parent;
-
-    /* AQ - 13 Jan 2000 fix for parent == NULL */
-    if (parent != NULL && parent->last == element)
-        parent->last = node;
-    else
+    if (node->type == CommentTag ||
+        node->type == ProcInsTag ||
+        node->type == CDATATag ||
+        node->type == SectionTag ||
+        node->type == AspTag ||
+        node->type == JsteTag ||
+        node->type == PhpTag )
     {
-        node->next = element->next;
-        /* AQ - 13 Jan 2000 fix for node->next == NULL */
-        if (node->next != NULL)
-            node->next->prev = node;
+        TY_(InsertNodeAtEnd)(element, node);
+        return yes;
     }
 
-    element->next = node;
-    node->prev = element;
+    if ( node->type == XmlDecl )
+    {
+        Node* root = element;
+        while ( root && root->parent )
+            root = root->parent;
+        if ( root && !(root->content && root->content->type == XmlDecl))
+        {
+          TY_(InsertNodeAtStart)( root, node );
+          return yes;
+        }
+    }
+
+    /* Declared empty tags seem to be slipping through
+    ** the cracks.  This is an experiment to figure out
+    ** a decent place to pick them up.
+    */
+    if ( node->tag &&
+         TY_(nodeIsElement)(node) &&
+         TY_(nodeCMIsEmpty)(node) && TagId(node) == TidyTag_UNKNOWN &&
+         (node->tag->versions & VERS_PROPRIETARY) != 0 )
+    {
+        TY_(InsertNodeAtEnd)(element, node);
+        return yes;
+    }
+
+    return no;
 }
 
+
+/**
+ *  Move node to the head, where element is used as starting
+ *  point in hunt for head. normally called during parsing.
+ */
+static void MoveToHead( TidyDocImpl* doc, Node *element, Node *node )
+{
+    Node *head;
+
+    TY_(RemoveNode)( node );  /* make sure that node is isolated */
+
+    if ( TY_(nodeIsElement)(node) )
+    {
+        TY_(Report)(doc, element, node, TAG_NOT_ALLOWED_IN );
+
+        head = TY_(FindHEAD)(doc);
+        assert(head != NULL);
+
+        TY_(InsertNodeAtEnd)(head, node);
+
+        if ( node->tag->parser )
+            ParseTag( doc, node, IgnoreWhitespace );
+    }
+    else
+    {
+        TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED);
+        TY_(FreeNode)( doc, node );
+    }
+}
+
+
+/**
+ *  Moves given node to end of body element.
+ */
+static void MoveNodeToBody( TidyDocImpl* doc, Node* node )
+{
+    Node* body = TY_(FindBody)( doc );
+    if ( body )
+    {
+        TY_(RemoveNode)( node );
+        TY_(InsertNodeAtEnd)( body, node );
+    }
+}
+
+
+/**
+ *  Unexpected content in table row is moved to just before the table in
+ *  in accordance with Netscape and IE. This code assumes that node hasn't
+ *  been inserted into the row.
+ */
+static void MoveBeforeTable( TidyDocImpl* ARG_UNUSED(doc), Node *row,
+                            Node *node )
+{
+    Node *table;
+
+    /* first find the table element */
+    for (table = row->parent; table; table = table->parent)
+    {
+        if ( nodeIsTABLE(table) )
+        {
+            TY_(InsertNodeBeforeElement)( table, node );
+            return;
+        }
+    }
+    /* No table element */
+    TY_(InsertNodeBeforeElement)( row->parent, node );
+}
+
+
+/**
+ *  Generalised search for duplicate elements.
+ *  Issue #166 - repeated <main> element.
+ */
+static Bool findNodeWithId( Node *node, TidyTagId tid )
+{
+    Node *content;
+    while (node)
+    {
+        if (TagIsId(node,tid))
+            return yes;
+        /*\
+         *   Issue #459 - Under certain circumstances, with many node this use of
+         *   'for (content = node->content; content; content = content->content)'
+         *   would produce a **forever** circle, or at least a very extended loop...
+         *   It is sufficient to test the content, if it exists,
+         *   to quickly iterate all nodes. Now all nodes are tested only once.
+        \*/
+        content = node->content;
+        if (content)
+        {
+            if ( findNodeWithId(content,tid) )
+                return yes;
+        }
+        node = node->next;
+    }
+    return no;
+}
+
+
+/**
+ *  Perform a global search for an element.
+ *  Issue #166 - repeated <main> element
+ */
+static Bool findNodeById( TidyDocImpl* doc, TidyTagId tid )
+{
+    Node *node = (doc ? doc->root.content : NULL);
+    return findNodeWithId( node,tid );
+}
+
+
+/***************************************************************************//*
+ ** MARK: - Decision Making
+ ***************************************************************************/
+
+
+/**
+ *  Indicates whether or not element can be pruned based on content,
+ *  user settings, etc.
+ */
 static Bool CanPrune( TidyDocImpl* doc, Node *element )
 {
     if ( !cfgBool(doc, TidyDropEmptyElems) )
@@ -321,150 +322,27 @@ static Bool CanPrune( TidyDocImpl* doc, Node *element )
     return yes;
 }
 
-/* return next element */
-Node *TY_(TrimEmptyElement)( TidyDocImpl* doc, Node *element )
+
+/**
+ *  Indicates whether or not node is a descendant of a tag of the given tid.
+ */
+static Bool DescendantOf( Node *element, TidyTagId tid )
 {
-    if ( CanPrune(doc, element) )
+    Node *parent;
+    for ( parent = element->parent;
+         parent != NULL;
+         parent = parent->parent )
     {
-        if (element->type != TextNode)
-        {
-            doc->footnotes |= FN_TRIM_EMPTY_ELEMENT;
-            TY_(Report)(doc, element, NULL, TRIM_EMPTY_ELEMENT);
-        }
-        
-        return TY_(DiscardElement)(doc, element);
+        if ( TagIsId(parent, tid) )
+            return yes;
     }
-    return element->next;
+    return no;
 }
 
-Node* TY_(DropEmptyElements)(TidyDocImpl* doc, Node* node)
-{
-    Node* next;
-
-    while (node)
-    {
-        next = node->next;
-
-        if (node->content)
-            TY_(DropEmptyElements)(doc, node->content);
-
-        if (!TY_(nodeIsElement)(node) &&
-            !(TY_(nodeIsText)(node) && !(node->start < node->end)))
-        {
-            node = next;
-            continue;
-        }
-
-        next = TY_(TrimEmptyElement)(doc, node);
-        node = next;
-    }
-
-    return node;
-}
-
-/* 
-  errors in positioning of form start or end tags
-  generally require human intervention to fix
-  Issue #166 - repeated <main> element also uses this flag
-  to indicate duplicates, discarded
-*/
-static void BadForm( TidyDocImpl* doc )
-{
-    doc->badForm |= flg_BadForm;
-    /* doc->errors++; */
-}
-
-/*
-  This maps 
-       <em>hello </em><strong>world</strong>
-  to
-       <em>hello</em> <strong>world</strong>
-
-  If last child of element is a text node
-  then trim trailing white space character
-  moving it to after element's end tag.
-*/
-static void TrimTrailingSpace( TidyDocImpl* doc, Node *element, Node *last )
-{
-    Lexer* lexer = doc->lexer;
-    byte c;
-
-    if (TY_(nodeIsText)(last))
-    {
-        if (last->end > last->start)
-        {
-            c = (byte) lexer->lexbuf[ last->end - 1 ];
-
-            if ( c == ' ' )
-            {
-                last->end -= 1;
-                if ( (element->tag->model & CM_INLINE) &&
-                     !(element->tag->model & CM_FIELD) )
-                    lexer->insertspace = yes;
-            }
-        }
-    }
-}
-
-/* Only true for text nodes. */
-Bool TY_(IsBlank)(Lexer *lexer, Node *node)
-{
-    Bool isBlank = TY_(nodeIsText)(node);
-    if ( isBlank )
-        isBlank = ( node->end == node->start ||       /* Zero length */
-                    ( node->end == node->start+1      /* or one blank. */
-                      && lexer->lexbuf[node->start] == ' ' ) );
-    return isBlank;
-}
-
-/*
-  This maps 
-       <p>hello<em> world</em>
-  to
-       <p>hello <em>world</em>
-
-  Trims initial space, by moving it before the
-  start tag, or if this element is the first in
-  parent's content, then by discarding the space
-*/
-static void TrimInitialSpace( TidyDocImpl* doc, Node *element, Node *text )
-{
-    Lexer* lexer = doc->lexer;
-    Node *prev, *node;
-
-    if ( TY_(nodeIsText)(text) && 
-         lexer->lexbuf[text->start] == ' ' && 
-         text->start < text->end )
-    {
-        if ( (element->tag->model & CM_INLINE) &&
-             !(element->tag->model & CM_FIELD) )
-        {
-            prev = element->prev;
-
-            if (TY_(nodeIsText)(prev))
-            {
-                if (prev->end == 0 || lexer->lexbuf[prev->end - 1] != ' ')
-                    lexer->lexbuf[(prev->end)++] = ' ';
-
-                ++(element->start);
-            }
-            else /* create new node */
-            {
-                node = TY_(NewNode)(lexer->allocator, lexer);
-                node->start = (element->start)++;
-                node->end = element->start;
-                lexer->lexbuf[node->start] = ' ';
-                TY_(InsertNodeBeforeElement)(element ,node);
-                DEBUG_LOG(SPRTF("TrimInitialSpace: Created text node, inserted before <%s>\n",
-                    (element->element ? element->element : "unknown")));
-            }
-        }
-
-        /* discard the space in current node */
-        ++(text->start);
-    }
-}
 
+/**
+ *  Indicates whether or not node is a descendant of a pre tag.
+ */
 static Bool IsPreDescendant(Node* node)
 {
     Node *parent = node->parent;
@@ -480,6 +358,10 @@ static Bool IsPreDescendant(Node* node)
     return no;
 }
 
+
+/**
+ *  Indicates whether or not trailing whitespace should be cleaned.
+ */
 static Bool CleanTrailingWhitespace(TidyDocImpl* doc, Node* node)
 {
     Node* next;
@@ -531,6 +413,10 @@ static Bool CleanTrailingWhitespace(TidyDocImpl* doc, Node* node)
     return no;
 }
 
+
+/**
+ *  Indicates whether or not leading whitespace should be cleaned.
+ */
 static Bool CleanLeadingWhitespace(TidyDocImpl* ARG_UNUSED(doc), Node* node)
 {
     if (!TY_(nodeIsText)(node))
@@ -565,10 +451,149 @@ static Bool CleanLeadingWhitespace(TidyDocImpl* ARG_UNUSED(doc), Node* node)
     return no;
 }
 
+
+/**
+ *  Indicates whether or not the content of the given node is acceptable
+ *  content for pre elements
+ */
+static Bool PreContent( TidyDocImpl* ARG_UNUSED(doc), Node* node )
+{
+    /* p is coerced to br's, Text OK too */
+    if ( nodeIsP(node) || TY_(nodeIsText)(node) )
+        return yes;
+
+    if ( node->tag == NULL ||
+         nodeIsPARAM(node) ||
+         !TY_(nodeHasCM)(node, CM_INLINE|CM_NEW) )
+        return no;
+
+    return yes;
+}
+
+
+/**
+ *  Indicates whether or not the only content model for the given node
+ *  is CM_INLINE.
+ */
+static Bool nodeCMIsOnlyInline( Node* node )
+{
+    return TY_(nodeHasCM)( node, CM_INLINE ) && !TY_(nodeHasCM)( node, CM_BLOCK );
+}
+
+
+/***************************************************************************//*
+ ** MARK: - Information Accumulation
+ ***************************************************************************/
+
+
+/**
+ *  Errors in positioning of form start or end tags
+ *  generally require human intervention to fix.
+ *  Issue #166 - repeated <main> element also uses this flag
+ *  to indicate duplicates, discarded.
+ */
+static void BadForm( TidyDocImpl* doc )
+{
+    doc->badForm |= flg_BadForm;
+}
+
+
+/***************************************************************************//*
+ ** MARK: - Fixes and Touchup
+ ***************************************************************************/
+
+
+/**
+ *  This maps
+ *     <em>hello </em><strong>world</strong>
+ *  to
+ *     <em>hello</em> <strong>world</strong>
+ *
+ *  If last child of element is a text node
+ *  then trim trailing white space character
+ *  moving it to after element's end tag.
+ */
+static void TrimTrailingSpace( TidyDocImpl* doc, Node *element, Node *last )
+{
+    Lexer* lexer = doc->lexer;
+    byte c;
+
+    if (TY_(nodeIsText)(last))
+    {
+        if (last->end > last->start)
+        {
+            c = (byte) lexer->lexbuf[ last->end - 1 ];
+
+            if ( c == ' ' )
+            {
+                last->end -= 1;
+                if ( (element->tag->model & CM_INLINE) &&
+                     !(element->tag->model & CM_FIELD) )
+                    lexer->insertspace = yes;
+            }
+        }
+    }
+}
+
+
+/**
+ *  This maps
+ *     <p>hello<em> world</em>
+ *  to
+ *     <p>hello <em>world</em>
+ *
+ *  Trims initial space, by moving it before the
+ *  start tag, or if this element is the first in
+ *  parent's content, then by discarding the space
+ */
+static void TrimInitialSpace( TidyDocImpl* doc, Node *element, Node *text )
+{
+    Lexer* lexer = doc->lexer;
+    Node *prev, *node;
+
+    if ( TY_(nodeIsText)(text) && 
+         lexer->lexbuf[text->start] == ' ' && 
+         text->start < text->end )
+    {
+        if ( (element->tag->model & CM_INLINE) &&
+             !(element->tag->model & CM_FIELD) )
+        {
+            prev = element->prev;
+
+            if (TY_(nodeIsText)(prev))
+            {
+                if (prev->end == 0 || lexer->lexbuf[prev->end - 1] != ' ')
+                    lexer->lexbuf[(prev->end)++] = ' ';
+
+                ++(element->start);
+            }
+            else /* create new node */
+            {
+                node = TY_(NewNode)(lexer->allocator, lexer);
+                node->start = (element->start)++;
+                node->end = element->start;
+                lexer->lexbuf[node->start] = ' ';
+                TY_(InsertNodeBeforeElement)(element ,node);
+                DEBUG_LOG(SPRTF("TrimInitialSpace: Created text node, inserted before <%s>\n",
+                    (element->element ? element->element : "unknown")));
+            }
+        }
+
+        /* discard the space in current node */
+        ++(text->start);
+    }
+}
+
+
+/**
+ *  Cleans whitespace from text nodes, and drops such nodes if emptied
+ *  completely as a result.
+ */
 static void CleanSpaces(TidyDocImpl* doc, Node* node)
 {
-    Node* next;
-
+    Stack *stack = TY_(newStack)(doc, 16);
+    Node *next;
+    
     while (node)
     {
         next = node->next;
@@ -585,30 +610,34 @@ static void CleanSpaces(TidyDocImpl* doc, Node* node)
         {
             TY_(RemoveNode)(node);
             TY_(FreeNode)(doc, node);
-            node = next;
-
+            node = next ? next : TY_(pop)(stack);
             continue;
         }
 
         if (node->content)
-            CleanSpaces(doc, node->content);
+        {
+            TY_(push)(stack, next);
+            node = node->content;
+            continue;
+        }
 
-        node = next;
+        node = next ? next : TY_(pop)(stack);
     }
+    TY_(freeStack)(stack);
 }
 
-/* 
-  Move initial and trailing space out.
-  This routine maps:
 
-       hello<em> world</em>
-  to
-       hello <em>world</em>
-  and
-       <em>hello </em><strong>world</strong>
-  to
-       <em>hello</em> <strong>world</strong>
-*/
+/**
+ *  Move initial and trailing space out.
+ *  This routine maps:
+ *     hello<em> world</em>
+ *  to
+ *     hello <em>world</em>
+ *  and
+ *     <em>hello </em><strong>world</strong>
+ *  to
+ *     <em>hello</em> <strong>world</strong>
+ */
 static void TrimSpaces( TidyDocImpl* doc, Node *element)
 {
     Node* text = element->content;
@@ -625,97 +654,11 @@ static void TrimSpaces( TidyDocImpl* doc, Node *element)
         TrimTrailingSpace(doc, element, text);
 }
 
-static Bool DescendantOf( Node *element, TidyTagId tid )
-{
-    Node *parent;
-    for ( parent = element->parent;
-          parent != NULL;
-          parent = parent->parent )
-    {
-        if ( TagIsId(parent, tid) )
-            return yes;
-    }
-    return no;
-}
 
-static Bool InsertMisc(Node *element, Node *node)
-{
-    if (node->type == CommentTag ||
-        node->type == ProcInsTag ||
-        node->type == CDATATag ||
-        node->type == SectionTag ||
-        node->type == AspTag ||
-        node->type == JsteTag ||
-        node->type == PhpTag )
-    {
-        TY_(InsertNodeAtEnd)(element, node);
-        return yes;
-    }
-
-    if ( node->type == XmlDecl )
-    {
-        Node* root = element;
-        while ( root && root->parent )
-            root = root->parent;
-        if ( root && !(root->content && root->content->type == XmlDecl))
-        {
-          TY_(InsertNodeAtStart)( root, node );
-          return yes;
-        }
-    }
-
-    /* Declared empty tags seem to be slipping through
-    ** the cracks.  This is an experiment to figure out
-    ** a decent place to pick them up.
-    */
-    if ( node->tag &&
-         TY_(nodeIsElement)(node) &&
-         TY_(nodeCMIsEmpty)(node) && TagId(node) == TidyTag_UNKNOWN &&
-         (node->tag->versions & VERS_PROPRIETARY) != 0 )
-    {
-        TY_(InsertNodeAtEnd)(element, node);
-        return yes;
-    }
-
-    return no;
-}
-
-
-static void ParseTag( TidyDocImpl* doc, Node *node, GetTokenMode mode )
-{
-    Lexer* lexer = doc->lexer;
-
-    if (node->tag == NULL) /* [i_a]2 prevent crash for active content (php, asp) docs */
-        return;
-
-    /*
-       Fix by GLP 2000-12-21.  Need to reset insertspace if this 
-       is both a non-inline and empty tag (base, link, meta, isindex, hr, area).
-    */
-    if (node->tag->model & CM_EMPTY)
-    {
-        lexer->waswhite = no;
-        if (node->tag->parser == NULL)
-            return;
-    }
-    else if (!(node->tag->model & CM_INLINE))
-        lexer->insertspace = no;
-
-    if (node->tag->parser == NULL)
-        return;
-
-    if (node->type == StartEndTag)
-        return;
-
-    lexer->parent = node; /* [i_a]2 added this - not sure why - CHECKME: */
-
-    (*node->tag->parser)( doc, node, mode );
-}
-
-/*
- the doctype has been found after other tags,
- and needs moving to before the html element
-*/
+/**
+ *  The doctype has been found after other tags,
+ *  and needs moving to before the html element
+ */
 static void InsertDocType( TidyDocImpl* doc, Node *element, Node *doctype )
 {
     Node* existing = TY_(FindDocType)( doc );
@@ -733,51 +676,17 @@ static void InsertDocType( TidyDocImpl* doc, Node *element, Node *doctype )
     }
 }
 
-/*
- move node to the head, where element is used as starting
- point in hunt for head. normally called during parsing
-*/
-static void MoveToHead( TidyDocImpl* doc, Node *element, Node *node )
-{
-    Node *head;
 
-    TY_(RemoveNode)( node );  /* make sure that node is isolated */
-
-    if ( TY_(nodeIsElement)(node) )
-    {
-        TY_(Report)(doc, element, node, TAG_NOT_ALLOWED_IN );
-
-        head = TY_(FindHEAD)(doc);
-        assert(head != NULL);
-
-        TY_(InsertNodeAtEnd)(head, node);
-
-        if ( node->tag->parser )
-            ParseTag( doc, node, IgnoreWhitespace );
-    }
-    else
-    {
-        TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED);
-        TY_(FreeNode)( doc, node );
-    }
-}
-
-/* moves given node to end of body element */
-static void MoveNodeToBody( TidyDocImpl* doc, Node* node )
-{
-    Node* body = TY_(FindBody)( doc );
-    if ( body )
-    {
-        TY_(RemoveNode)( node );
-        TY_(InsertNodeAtEnd)( body, node );
-    }
-}
 
+/**
+ *  Adds style information as a class in the document or a property
+ *  of the node to prevent indentation of inferred UL tags.
+ */
 static void AddClassNoIndent( TidyDocImpl* doc, Node *node )
 {
     ctmbstr sprop =
-        "padding-left: 2ex; margin-left: 0ex"
-        "; margin-top: 0ex; margin-bottom: 0ex";
+    "padding-left: 2ex; margin-left: 0ex"
+    "; margin-top: 0ex; margin-bottom: 0ex";
     if ( !cfgBool(doc, TidyDecorateInferredUL) )
         return;
     if ( cfgBool(doc, TidyMakeClean) )
@@ -786,12 +695,378 @@ static void AddClassNoIndent( TidyDocImpl* doc, Node *node )
         TY_(AddStyleProperty)( doc, node, sprop );
 }
 
-/*
-   element is node created by the lexer
-   upon seeing the start tag, or by the
-   parser when the start tag is inferred
-*/
-void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
+
+/**
+ *  If a table row is empty then insert an empty cell. This practice is
+ *  consistent with browser behavior and avoids potential problems with
+ *  row spanning cells.
+ */
+static void FixEmptyRow(TidyDocImpl* doc, Node *row)
+{
+    Node *cell;
+
+    if (row->content == NULL)
+    {
+        cell = TY_(InferredTag)(doc, TidyTag_TD);
+        TY_(InsertNodeAtEnd)(row, cell);
+        TY_(Report)(doc, row, cell, MISSING_STARTTAG);
+    }
+}
+
+
+/***************************************************************************//*
+ ** MARK: - Parsers Support
+ ***************************************************************************/
+
+
+/**
+ *  Structure used by FindDescendant_cb.
+ */
+struct MatchingDescendantData
+{
+    Node *found_node;
+    Bool *passed_marker_node;
+
+    /* input: */
+    TidyTagId matching_tagId;
+    Node *node_to_find;
+    Node *marker_node;
+};
+
+
+/**
+ *  The main engine for FindMatchingDescendant.
+ */
+static NodeTraversalSignal FindDescendant_cb(TidyDocImpl* ARG_UNUSED(doc), Node* node, void *propagate)
+{
+    struct MatchingDescendantData *cb_data = (struct MatchingDescendantData *)propagate;
+
+    if (TagId(node) == cb_data->matching_tagId)
+    {
+        /* make sure we match up 'unknown' tags exactly! */
+        if (cb_data->matching_tagId != TidyTag_UNKNOWN ||
+            (node->element != NULL &&
+            cb_data->node_to_find != NULL &&
+            cb_data->node_to_find->element != NULL &&
+            0 == TY_(tmbstrcmp)(cb_data->node_to_find->element, node->element)))
+        {
+            cb_data->found_node = node;
+            return ExitTraversal;
+        }
+    }
+
+    if (cb_data->passed_marker_node && node == cb_data->marker_node)
+        *cb_data->passed_marker_node = yes;
+
+    return VisitParent;
+}
+
+
+/**
+ *  Search the parent chain (from `parent` upwards up to the root) for a node
+ *  matching the given 'node'.
+ *
+ *  When the search passes beyond the `marker_node` (which is assumed to sit
+ *  in the parent chain), this will be flagged by setting the boolean
+ *  referenced by `is_parent_of_marker` to `yes`.
+ *
+ *  'is_parent_of_marker' and 'marker_node' are optional parameters and may
+ *  be NULL.
+ */
+static Node *FindMatchingDescendant( Node *parent, Node *node, Node *marker_node, Bool *is_parent_of_marker )
+{
+    struct MatchingDescendantData cb_data = { 0 };
+    cb_data.matching_tagId = TagId(node);
+    cb_data.node_to_find = node;
+    cb_data.marker_node = marker_node;
+
+    assert(node);
+
+    if (is_parent_of_marker)
+        *is_parent_of_marker = no;
+
+    TY_(TraverseNodeTree)(NULL, parent, FindDescendant_cb, &cb_data);
+    return cb_data.found_node;
+}
+
+
+/**
+ *   Finds the last list item for the given list, providing it in the
+ *   in-out parameter. Returns yes or no if the item was the last list
+ *   item.
+ */
+static Bool FindLastLI( Node *list, Node **lastli )
+{
+    Node *node;
+
+    *lastli = NULL;
+    for ( node = list->content; node ; node = node->next )
+        if ( nodeIsLI(node) && node->type == StartTag )
+            *lastli=node;
+    return *lastli ? yes:no;
+}
+
+
+/***************************************************************************//*
+ ** MARK: - Parser Stack
+ ***************************************************************************/
+
+
+/**
+ *  Allocates and initializes the parser's stack.
+ */
+void TY_(InitParserStack)( TidyDocImpl* doc )
+{
+    uint default_size = 16;
+    TidyParserMemory *content = (TidyParserMemory *) TidyAlloc( doc->allocator, sizeof(TidyParserMemory) * default_size );
+
+    doc->stack.content = content;
+    doc->stack.size = default_size;
+    doc->stack.top = -1;
+    doc->stack.allocator = doc->allocator;
+}
+
+
+/**
+ *  Frees the parser's stack when done.
+ */
+void TY_(FreeParserStack)( TidyDocImpl* doc )
+{
+    TidyFree( doc->stack.allocator, doc->stack.content );
+
+    doc->stack.content = NULL;
+    doc->stack.size = 0;
+    doc->stack.top = -1;
+}
+
+
+/**
+ *  Increase the stack size.
+ *  TODO: don't overflow max_uint. Need a message when we can no longer increase the size beyond 429 million depth.
+ */
+static void growParserStack( TidyDocImpl* doc )
+{
+    TidyParserMemory *content;
+    content = (TidyParserMemory *) TidyAlloc( doc->stack.allocator, sizeof(TidyParserMemory) * doc->stack.size * 2 );
+
+    memcpy( content, doc->stack.content, sizeof(TidyParserMemory) * (doc->stack.top + 1) );
+    TidyFree(doc->stack.allocator, doc->stack.content);
+
+    doc->stack.content = content;
+    doc->stack.size = doc->stack.size * 2;
+}
+
+
+/**
+ *  Indicates whether or not the stack is empty.
+ */
+static Bool isEmptyParserStack( TidyDocImpl* doc )
+{
+    return doc->stack.top < 0;
+}
+
+
+/**
+ * Push the parser memory to the stack.
+ */
+static void pushMemory( TidyDocImpl* doc, TidyParserMemory data )
+{
+    if ( doc->stack.top == doc->stack.size - 1 )
+        growParserStack( doc );
+
+    doc->stack.top++;
+    doc->stack.content[doc->stack.top] = data;
+}
+
+
+/**
+ *  Peek at the parser memory.
+ */
+static FUNC_UNUSED TidyParserMemory peekMemory( TidyDocImpl* doc )
+{
+    return doc->stack.content[doc->stack.top];
+}
+
+
+/**
+ *  Peek at the parser memory "mode" field. This is just a convenience
+ *  to avoid having to create a new struct instance in the caller.
+ */
+static GetTokenMode peekMemoryMode( TidyDocImpl* doc )
+{
+    return doc->stack.content[doc->stack.top].mode;
+}
+
+
+/**
+ *  Peek at the parser memory "identity" field. This is just a convenience
+ *  to avoid having to create a new struct instance in the caller.
+ */
+static Parser* peekMemoryIdentity( TidyDocImpl* doc )
+{
+    return doc->stack.content[doc->stack.top].identity;
+}
+
+
+/**
+ *  Pop out a parser memory.
+ */
+static TidyParserMemory popMemory( TidyDocImpl* doc )
+{
+    if ( !isEmptyParserStack( doc ) )
+    {
+        TidyParserMemory data = doc->stack.content[doc->stack.top];
+        doc->stack.top = doc->stack.top - 1;
+        return data;
+    }
+    TidyParserMemory blank = { NULL };
+    return blank;
+}
+
+
+/***************************************************************************//*
+ ** MARK: - Parser Search and Instantiation
+ ***************************************************************************/
+
+
+/**
+ *  Retrieves the correct parser for the given node, accounting for various
+ *  conditions, and readies the lexer for parsing that node.
+ */
+static Parser* GetParserForNode( TidyDocImpl* doc, Node *node )
+{
+    Lexer* lexer = doc->lexer;
+
+    /* [i_a]2 prevent crash for active content (php, asp) docs */
+    if (node->tag == NULL) 
+        return NULL;
+
+    /*
+       Fix by GLP 2000-12-21.  Need to reset insertspace if this is both
+       a non-inline and empty tag (base, link, meta, isindex, hr, area).
+    */
+    if (node->tag->model & CM_EMPTY)
+    {
+        lexer->waswhite = no;
+        if (node->tag->parser == NULL)
+            return NULL;
+    }
+    else if (!(node->tag->model & CM_INLINE))
+        lexer->insertspace = no;
+
+    if (node->tag->parser == NULL)
+        return NULL;
+
+    if (node->type == StartEndTag)
+        return NULL;
+
+    /* [i_a]2 added this - not sure why - CHECKME: */
+    lexer->parent = node;
+
+    return (node->tag->parser);
+}
+
+
+/**
+ *  Instantiates the correct parser for the given node. This is currently
+ *  maintained ONLY until the legacy parsers have been ported, as this
+ *  introduces recursion when used.
+ */
+static void ParseTag( TidyDocImpl* doc, Node *node, GetTokenMode mode )
+{
+    Parser* parser = GetParserForNode( doc, node );
+
+    if ( parser )
+        (*parser)( doc, node, mode, no );
+}
+
+
+/**
+ *  The main parser body will populate the document's document root starting
+ *  with the provided node, which generally should be the HTML node after the
+ *  pre-HTML stuff is handled at a higher level.
+ *
+ *  This parser works cooperatively with compliant parsers to pass state
+ *  information back and forth in the TidyDocImpl's `stack`, which resides on
+ *  the heap and prevents recursion and stack exhaustion, and also works well
+ *  with the old-style parsers that do recurse.
+ *
+ *  (The goal is to update the old-style parsers slowly and deliberately
+ *  without causing regressions, in a series of smaller commits and updates.)
+ */
+void ParseHTMLWithNode( TidyDocImpl* doc, Node* node )
+{
+    GetTokenMode mode = IgnoreWhitespace;
+    Parser* parser = NULL;
+
+    /*
+     This main loop is only extinguished when all of the parser tokens are
+     consumed. Note that most of the parsers consume tokens as well, and
+     so what we're really doing here is managing parsers and preventing
+     recursion with cooperating parsers.
+     */
+    while ( node )
+    {
+        if ( (parser = GetParserForNode( doc, node )) )
+        {
+            if ( (node = parser( doc, node, mode, no )) )
+            {
+                /*
+                 When a parser returns a node, it means that we have
+                 to continue the loop rather than moving on, because it
+                 indicates that the parser encountered a token it does not
+                 handle. It also tells us the correct GetTokenMode to use
+                 for it via the struct that it pushed:
+                 */
+                mode = peekMemoryMode( doc );
+                continue;
+            }
+        }
+
+        /*
+         If we've come this far, the parser has bottomed out, and won't be
+         going any deeper. Now we run back up the stack to close all of the
+         open elements and handle any parser post-processing that was needed.
+         Of course, other nodes might cause us to deepen the stack again, too.
+         */
+        if ( !isEmptyParserStack( doc ) )
+        {
+            if ( (parser = peekMemoryIdentity( doc )) )
+            {
+                if ( (node = parser( doc, NULL, 0, yes )) )
+                {
+                    /* Another assignment from the parser. */
+                    mode = peekMemoryMode( doc );
+                    continue;
+                }
+            } else {
+                /*
+                 There's no identity in the stack (it was used to pass back
+                 a GetToken mode, and nothing else, so remove discard it.
+                 */
+                popMemory( doc );
+            }
+        }
+
+        /*
+         Assuming we've gotten this far, there's no more work to do and
+         so we can draw a nice, fresh token from the lexer.
+         */
+       node = TY_(GetToken)( doc, mode );
+    }
+}
+
+
+/***************************************************************************//*
+ ** MARK: - Old Parsers
+ ***************************************************************************/
+
+
+/** MARK: TY_(oldParseBlock)
+ *  `element` is a node created by the lexer upon seeing the start tag, or
+ *  by the parser when the start tag is inferred
+ */
+void* TY_(oldParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
 {
 #if defined(ENABLE_DEBUG_LOG)
     static int in_parse_block = 0;
@@ -813,10 +1088,10 @@ void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
         in_parse_block--;
         SPRTF("Exit ParseBlockL 1 %d...\n",in_parse_block);
 #endif
-        return;
+        return NULL;
     }
 
-    if ( nodeIsFORM(element) && 
+    if ( nodeIsFORM(element) &&
          DescendantOf(element, TidyTag_FORM) )
         TY_(Report)(doc, element, NULL, ILLEGAL_NESTING );
 
@@ -851,8 +1126,8 @@ void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
     else if (mode == IgnoreWhitespace)
     {
         /* Issue #212 - Further fix in case ParseBlock() is called with 'IgnoreWhitespace'
-           when such a leading space may need to be inserted before this element to 
-           preserve the browser view */
+           when such a leading space may need to be inserted before this element to
+           preverve the browser view */
         mode = MixedContent;
     }
 
@@ -878,7 +1153,7 @@ void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
             in_parse_block--;
             SPRTF("Exit ParseBlock 2 %d...\n",in_parse_block);
 #endif
-            return;
+            return NULL;
         }
 
         if ( nodeIsHTML(node) || nodeIsHEAD(node) || nodeIsBODY(node) )
@@ -913,7 +1188,7 @@ void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
             }
             else if (DescendantOf( element, node->tag->id ))
             {
-                /* 
+                /*
                   if this is the end tag for an ancestor element
                   then infer end tag for this element
                 */
@@ -932,7 +1207,7 @@ void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
                     in_parse_block--;
                     SPRTF("Exit ParseBlock 2 %d...\n",in_parse_block);
 #endif
-                    return;
+                    return NULL;
                 }
             }
         }
@@ -1096,7 +1371,7 @@ void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
                     in_parse_block--;
                     SPRTF("Exit ParseBlock 3 %d...\n",in_parse_block);
 #endif
-                    return;
+                    return NULL;
                 }
             }
             else if ( TY_(nodeHasCM)(node, CM_BLOCK) )
@@ -1116,7 +1391,7 @@ void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
                     in_parse_block--;
                     SPRTF("Exit ParseBlock 4 %d...\n",in_parse_block);
 #endif
-                    return;
+                    return NULL;
                 }
             }
             else /* things like list items */
@@ -1157,7 +1432,7 @@ void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
 
                 if ( !TY_(nodeHasCM)(element, CM_OPT) && !element->implicit )
                     TY_(Report)(doc, element, node, MISSING_ENDTAG_BEFORE );
-                
+
                 /* #521, warn on missing optional end-tags if not omitting them. */
                 if ( cfgBool( doc, TidyOmitOptionalTags ) == no && TY_(nodeHasCM)(element, CM_OPT) )
                     TY_(Report)(doc, element, node, MISSING_ENDTAG_OPTIONAL );
@@ -1175,7 +1450,7 @@ void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
                         in_parse_block--;
                         SPRTF("Exit ParseBlock 5 %d...\n",in_parse_block);
 #endif
-                        return;
+                        return NULL;
                     }
 
                     node = TY_(InferredTag)(doc, TidyTag_UL);
@@ -1190,7 +1465,7 @@ void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
                         in_parse_block--;
                         SPRTF("Exit ParseBlock 6 %d...\n",in_parse_block);
 #endif
-                        return;
+                        return NULL;
                     }
 
                     node = TY_(InferredTag)(doc, TidyTag_DL);
@@ -1198,14 +1473,14 @@ void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
                 else if ( TY_(nodeHasCM)(node, CM_TABLE) || TY_(nodeHasCM)(node, CM_ROW) )
                 {
                     /* http://tidy.sf.net/issue/1316307 */
-                    /* In exiled mode, return so table processing can 
+                    /* In exiled mode, return so table processing can
                        continue. */
                     if (lexer->exiled) {
 #if defined(ENABLE_DEBUG_LOG)
                         in_parse_block--;
                         SPRTF("Exit ParseBlock 7 %d...\n",in_parse_block);
 #endif
-                        return;
+                        return NULL;
                     }
                     node = TY_(InferredTag)(doc, TidyTag_TABLE);
                 }
@@ -1220,7 +1495,7 @@ void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
                     in_parse_block--;
                     SPRTF("Exit ParseBlock 8 %d...\n",in_parse_block);
 #endif
-                    return;
+                    return NULL;
 
                 }
                 else
@@ -1230,7 +1505,7 @@ void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
                     in_parse_block--;
                     SPRTF("Exit ParseBlock 9 %d...\n",in_parse_block);
 #endif
-                    return;
+                    return NULL;
                 }
             }
         }
@@ -1242,7 +1517,7 @@ void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
          *  href: http://www.w3.org/TR/html-markup/a.html
          *  The interactive element a must not appear as a descendant of the a element.
         \*/
-        if ( nodeIsA(node) && !node->implicit && 
+        if ( nodeIsA(node) && !node->implicit &&
              (nodeIsA(element) || DescendantOf(element, TidyTag_A)) )
         {
             if (node->type != EndTag && node->attributes == NULL
@@ -1274,7 +1549,7 @@ void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
             in_parse_block--;
             SPRTF("Exit ParseBlock 9b %d...\n",in_parse_block);
 #endif
-            return;
+            return NULL;
         }
 
         /* parse known element */
@@ -1306,11 +1581,11 @@ void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
                 TrimSpaces( doc, element );
 
             TY_(InsertNodeAtEnd)(element, node);
-            
+
             if (node->implicit)
                 TY_(Report)(doc, element, node, INSERTING_TAG );
 
-            /* Issue #212 - WHY is this hard coded to 'IgnoreWhitespace' while an 
+            /* Issue #212 - WHY is this hard coded to 'IgnoreWhitespace' while an
                effort has been made above to set a 'MixedContent' mode in some cases?
                WHY IS THE 'mode' VARIABLE NOT USED HERE???? */
             ParseTag( doc, node, IgnoreWhitespace /*MixedContent*/ );
@@ -1338,205 +1613,19 @@ void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
     }
 
     TrimSpaces( doc, element );
+
 #if defined(ENABLE_DEBUG_LOG)
     in_parse_block--;
     SPRTF("Exit ParseBlock 10 %d...\n",in_parse_block);
 #endif
-}
-
-/* [i_a] svg / math */
-
-struct MatchingDescendantData
-{
-    Node *found_node;
-    Bool *passed_marker_node;
-
-    /* input: */
-    TidyTagId matching_tagId;
-    Node *node_to_find;
-    Node *marker_node;
-};
-
-static NodeTraversalSignal FindDescendant_cb(TidyDocImpl* ARG_UNUSED(doc), Node* node, void *propagate)
-{
-    struct MatchingDescendantData *cb_data = (struct MatchingDescendantData *)propagate;
-
-    if (TagId(node) == cb_data->matching_tagId)
-    {
-        /* make sure we match up 'unknown' tags exactly! */
-        if (cb_data->matching_tagId != TidyTag_UNKNOWN ||
-            (node->element != NULL &&
-            cb_data->node_to_find != NULL &&
-            cb_data->node_to_find->element != NULL &&
-            0 == TY_(tmbstrcmp)(cb_data->node_to_find->element, node->element)))
-        {
-            cb_data->found_node = node;
-            return ExitTraversal;
-        }
-    }
-
-    if (cb_data->passed_marker_node && node == cb_data->marker_node)
-        *cb_data->passed_marker_node = yes;
-
-    return VisitParent;
-}
-
-/*
-Search the parent chain (from 'parent' upwards up to the root) for a node matching the
-given 'node'.
-
-When the search passes beyond the 'marker_node' (which is assumed to sit in the
-parent chain), this will be flagged by setting the boolean referenced by
-'is_parent_of_marker' to yes.
-
-'is_parent_of_marker' and 'marker_node' are optional parameters and may be NULL.
-*/
-static Node *FindMatchingDescendant( Node *parent, Node *node, Node *marker_node, Bool *is_parent_of_marker )
-{
-    struct MatchingDescendantData cb_data = { 0 };
-    cb_data.matching_tagId = TagId(node);
-    cb_data.node_to_find = node;
-    cb_data.marker_node = marker_node;
-
-    assert(node);
-
-    if (is_parent_of_marker)
-        *is_parent_of_marker = no;
-
-    TY_(TraverseNodeTree)(NULL, parent, FindDescendant_cb, &cb_data);
-    return cb_data.found_node;
-}
-
-/*
-   Act as a generic XML (sub)tree parser: collect each node and add it to the DOM, without any further validation.
-   TODO : add schema- or other-hierarchy-definition-based validation of the subtree here...
-*/
-void TY_(ParseNamespace)(TidyDocImpl* doc, Node *basenode, GetTokenMode mode)
-{
-    Lexer* lexer = doc->lexer;
-    Node *node;
-    Node *parent = basenode;
-    uint istackbase;
-    AttVal* av; /* #130 MathML attr and entity fix! */
-
-    /* a la <table>: defer popping elements off the inline stack */
-    TY_(DeferDup)( doc );
-    istackbase = lexer->istackbase;
-    lexer->istackbase = lexer->istacksize;
-
-    mode = OtherNamespace; /* Preformatted; IgnoreWhitespace; */
-
-    while ((node = TY_(GetToken)(doc, mode)) != NULL)
-    {
-        /*
-        fix check to skip action in InsertMisc for regular/empty
-        nodes, which we don't want here...
-
-        The way we do it here is by checking and processing everything
-        and only what remains goes into InsertMisc()
-        */
-
-        /* is this a close tag? And does it match the current parent node? */
-        if (node->type == EndTag)
-        {
-            /*
-            to prevent end tags flowing from one 'alternate namespace' we
-            check this in two phases: first we check if the tag is a
-            descendant of the current node, and when it is, we check whether
-            it is the end tag for a node /within/ or /outside/ the basenode.
-            */
-            Bool outside;
-            Node *mp = FindMatchingDescendant(parent, node, basenode, &outside);
-
-            if (mp != NULL)
-            {
-                /*
-                when mp != parent as we might expect,
-                infer end tags until we 'hit' the matched
-                parent or the basenode
-                */
-                Node *n;
-
-                for (n = parent;
-                     n != NULL && n != basenode->parent && n != mp;
-                     n = n->parent)
-                {
-                    /* n->implicit = yes; */
-                    n->closed = yes;
-                    TY_(Report)(doc, n->parent, n, MISSING_ENDTAG_BEFORE);
-                }
-
-                /* Issue #369 - Since 'assert' is DEBUG only, and there are
-                   simple cases where these can be fired, removing them
-                   pending feedback from the original author!
-                   assert(outside == no ? n == mp : 1);
-                   assert(outside == yes ? n == basenode->parent : 1);
-                   =================================================== */
-
-                if (outside == no)
-                {
-                    /* EndTag for a node within the basenode subtree. Roll on... */
-                    n->closed = yes;
-                    TY_(FreeNode)(doc, node);
-
-                    node = n;
-                    parent = node->parent;
-                }
-                else
-                {
-                    /* EndTag for a node outside the basenode subtree: let the caller handle that. */
-                    TY_(UngetToken)( doc );
-                    node = basenode;
-                    parent = node->parent;
-                }
-
-                /* when we've arrived at the end-node for the base node, it's quitting time */
-                if (node == basenode)
-                {
-                    lexer->istackbase = istackbase;
-                    assert(basenode->closed == yes);
-                    return;
-                }
-            }
-            else
-            {
-                /* unmatched close tag: report an error and discard */
-                /* TY_(Report)(doc, parent, node, NON_MATCHING_ENDTAG); Issue #308 - Seems wrong warning! */
-                TY_(Report)(doc, parent, node, DISCARDING_UNEXPECTED);
-                assert(parent);
-                /* assert(parent->tag != node->tag); Issue #308 - Seems would always be true! */
-                TY_(FreeNode)( doc, node); /* Issue #308 - Discard unexpected end tag memory */
-            }
-        }
-        else if (node->type == StartTag)
-        {
-            /* #130 MathML attr and entity fix! 
-               care if it has attributes, and 'accidently' any of those attributes match known */
-            for ( av = node->attributes; av; av = av->next )
-            {
-                av->dict = 0; /* does something need to be freed? */
-            }
-            /* add another child to the current parent */
-            TY_(InsertNodeAtEnd)(parent, node);
-            parent = node;
-        }
-        else
-        {
-            /* #130 MathML attr and entity fix! 
-               care if it has attributes, and 'accidently' any of those attributes match known */
-            for ( av = node->attributes; av; av = av->next )
-            {
-                av->dict = 0; /* does something need to be freed? */
-            }
-            TY_(InsertNodeAtEnd)(parent, node);
-        }
-    }
-
-    TY_(Report)(doc, basenode->parent, basenode, MISSING_ENDTAG_FOR);
+    return NULL;
 }
 
 
-TY_PRIVATE void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
+/** MARK: TY_(oldParseInline)
+ *  Parse inline element nodes.
+ */
+void* TY_(oldParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
 {
 #if defined(ENABLE_DEBUG_LOG)
     static int in_parse_inline = 0;
@@ -1553,7 +1642,7 @@ TY_PRIVATE void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode
         in_parse_inline--;
         SPRTF("Exit ParseInline 1 %d...\n",in_parse_inline);
 #endif
-        return;
+        return NULL;
     }
 
     /*
@@ -1564,7 +1653,7 @@ TY_PRIVATE void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode
      This test is carried out in PushInline and PopInline, see istack.c
 
      InlineDup(...) is not called for elements with a CM_MIXED (inline and
-     block) content model, e.g. <del> or <ins>, otherwise constructs like 
+     block) content model, e.g. <del> or <ins>, otherwise constructs like
 
        <p>111<a name='foo'>222<del>333</del>444</a>555</p>
        <p>111<span>222<del>333</del>444</span>555</p>
@@ -1605,7 +1694,7 @@ TY_PRIVATE void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode
              then move the font element inside the anchor since
              otherwise it won't alter the anchor text color
             */
-            if ( nodeIsFONT(element) && 
+            if ( nodeIsFONT(element) &&
                  element->content && element->content == element->last )
             {
                 Node *child = element->content;
@@ -1635,7 +1724,7 @@ TY_PRIVATE void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode
             in_parse_inline--;
             SPRTF("Exit ParseInline 2 %d...\n",in_parse_inline);
 #endif
-            return;
+            return NULL;
         }
 
         /* <u>...<u>  map 2nd <u> to </u> if 1st is explicit */
@@ -1674,14 +1763,14 @@ TY_PRIVATE void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode
             if (node->attributes == NULL || element->attributes == NULL)
                 TY_(Report)(doc, element, node, NESTED_EMPHASIS);
         }
-        else if ( TY_(IsPushed)(doc, node) && node->type == StartTag && 
+        else if ( TY_(IsPushed)(doc, node) && node->type == StartTag &&
                   nodeIsQ(node) )
         {
             /*\
              * Issue #215 - such nested quotes are NOT a problem if HTML5, so
              * only issue this warning if NOT HTML5 mode.
             \*/
-            if (TY_(HTMLVersion)(doc) != HT50) 
+            if (TY_(HTMLVersion)(doc) != HT50)
             {
                 TY_(Report)(doc, element, node, NESTED_QUOTATION);
             }
@@ -1726,14 +1815,14 @@ TY_PRIVATE void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode
             in_parse_inline--;
             SPRTF("Exit ParseInline 3 %d...\n",in_parse_inline);
 #endif
-            return;
+            return NULL;
         }
 
         /* within <dt> or <pre> map <p> to <br> */
         if ( nodeIsP(node) &&
              node->type == StartTag &&
              ( (mode & Preformatted) ||
-               nodeIsDT(element) || 
+               nodeIsDT(element) ||
                DescendantOf(element, TidyTag_DT )
              )
            )
@@ -1753,7 +1842,7 @@ TY_PRIVATE void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode
         {
             TY_(ConstrainVersion)( doc, ~VERS_HTML40_STRICT );
             TY_(InsertNodeAtEnd)(element, node);
-            (*node->tag->parser)( doc, node, mode );
+            (*node->tag->parser)( doc, node, mode, no );
             continue;
         }
 
@@ -1816,7 +1905,7 @@ TY_PRIVATE void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode
                         in_parse_inline--;
                         SPRTF("Exit ParseInline 4 %d...\n",in_parse_inline);
 #endif
-                        return; /* close <i>, but will re-open it, after </b> */
+                        return NULL; /* close <i>, but will re-open it, after </b> */
                     }
                 }
                 TY_(PopInline)( doc, element );
@@ -1840,7 +1929,7 @@ TY_PRIVATE void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode
                     in_parse_inline--;
                     SPRTF("Exit ParseInline 5 %d...\n",in_parse_inline);
 #endif
-                    return;
+                    return NULL;
                 }
 
                 /* if parent is <a> then discard unexpected inline end tag */
@@ -1857,7 +1946,7 @@ TY_PRIVATE void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode
                 in_parse_inline--;
                 SPRTF("Exit ParseInline 6 %d...\n",in_parse_inline);
 #endif
-                return;
+                return NULL;
             }
         }
 
@@ -1883,7 +1972,7 @@ TY_PRIVATE void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode
             in_parse_inline--;
             SPRTF("Exit ParseInline 7 %d...\n",in_parse_inline);
 #endif
-            return;
+            return NULL;
         }
 
         /*
@@ -1892,7 +1981,7 @@ TY_PRIVATE void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode
         */
         /* #427827 - fix by Randy Waki and Bjoern Hoehrmann 23 Aug 00 */
         /* if (node->tag == doc->tags.tag_a && !node->implicit && TY_(IsPushed)(doc, node)) */
-        if ( nodeIsA(node) && !node->implicit && 
+        if ( nodeIsA(node) && !node->implicit &&
              (nodeIsA(element) || DescendantOf(element, TidyTag_A)) )
         {
             /* coerce <a> to </a> unless it has some attributes */
@@ -1920,7 +2009,7 @@ TY_PRIVATE void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode
             in_parse_inline--;
             SPRTF("Exit ParseInline 8 %d...\n",in_parse_inline);
 #endif
-            return;
+            return NULL;
         }
 
         if (element->tag->model & CM_HEADING)
@@ -2021,7 +2110,7 @@ TY_PRIVATE void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode
         }
 
 
-        /* 
+        /*
           if this is the end tag for an ancestor element
           then infer end tag for this element
         */
@@ -2035,7 +2124,7 @@ TY_PRIVATE void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode
                     if (!(element->tag->model & CM_OPT) && !element->implicit)
                         TY_(Report)(doc, element, node, MISSING_ENDTAG_BEFORE);
 
-                    if( TY_(IsPushedLast)( doc, element, node ) ) 
+                    if( TY_(IsPushedLast)( doc, element, node ) )
                         TY_(PopInline)( doc, element );
                     TY_(UngetToken)( doc );
 
@@ -2046,13 +2135,13 @@ TY_PRIVATE void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode
                     in_parse_inline--;
                     SPRTF("Exit ParseInline 9 %d...\n",in_parse_inline);
 #endif
-                    return;
+                    return NULL;
                 }
             }
         }
 
         /*\
-         *  block level tags end this element 
+         *  block level tags end this element
          *  Issue #333 - There seems an exception if the element is a 'span',
          *  and the node just collected is a 'meta'. The 'meta' can not have
          *  CM_INLINE added, nor can the 'span' have CM_MIXED added without
@@ -2098,7 +2187,7 @@ TY_PRIVATE void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode
                     in_parse_inline--;
                     SPRTF("Exit ParseInline 10 %d...\n",in_parse_inline);
 #endif
-                    return;
+                    return NULL;
                 }
             }
 
@@ -2111,7 +2200,7 @@ TY_PRIVATE void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode
             in_parse_inline--;
             SPRTF("Exit ParseInline 11 %d...\n",in_parse_inline);
 #endif
-            return;
+            return NULL;
         }
 
         /* parse inline element */
@@ -2123,7 +2212,7 @@ TY_PRIVATE void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode
             /* trim white space before <br> */
             if ( nodeIsBR(node) )
                 TrimSpaces(doc, element);
-            
+
             TY_(InsertNodeAtEnd)(element, node);
             ParseTag(doc, node, mode);
             continue;
@@ -2142,36 +2231,20 @@ TY_PRIVATE void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode
     in_parse_inline--;
     SPRTF("Exit ParseInline 12 %d...\n",in_parse_inline);
 #endif
+    return NULL;
 }
 
-void TY_(ParseEmpty)(TidyDocImpl* doc, Node *element, GetTokenMode mode)
-{
-    Lexer* lexer = doc->lexer;
-    if ( lexer->isvoyager )
-    {
-        Node *node = TY_(GetToken)( doc, mode);
-        if ( node )
-        {
-            if ( !(node->type == EndTag && node->tag == element->tag) )
-            {
-                /* TY_(Report)(doc, element, node, ELEMENT_NOT_EMPTY); */
-                TY_(UngetToken)( doc );
-            }
-            else
-            {
-                TY_(FreeNode)( doc, node );
-            }
-        }
-    }
-}
 
-void TY_(ParseDefList)(TidyDocImpl* doc, Node *list, GetTokenMode mode)
+/** MARK: TY_(oldParseDefList)
+ *  Parses the `dl` tag.
+ */
+void* TY_(oldParseDefList)(TidyDocImpl* doc, Node *list, GetTokenMode mode)
 {
     Lexer* lexer = doc->lexer;
     Node *node, *parent;
 
     if (list->tag->model & CM_EMPTY)
-        return;
+        return NULL;
 
     lexer->insert = NULL;  /* defer implicit inline start tags */
 
@@ -2181,7 +2254,7 @@ void TY_(ParseDefList)(TidyDocImpl* doc, Node *list, GetTokenMode mode)
         {
             TY_(FreeNode)( doc, node);
             list->closed = yes;
-            return;
+            return NULL;
         }
 
         /* deal with comments etc. */
@@ -2202,7 +2275,7 @@ void TY_(ParseDefList)(TidyDocImpl* doc, Node *list, GetTokenMode mode)
             continue;
         }
 
-        /* 
+        /*
           if this is the end tag for an ancestor element
           then infer end tag for this element
         */
@@ -2233,7 +2306,7 @@ void TY_(ParseDefList)(TidyDocImpl* doc, Node *list, GetTokenMode mode)
                     TY_(Report)(doc, list, node, MISSING_ENDTAG_BEFORE);
 
                     TY_(UngetToken)( doc );
-                    return;
+                    return NULL;
                 }
             }
             if (discardIt)
@@ -2287,12 +2360,12 @@ void TY_(ParseDefList)(TidyDocImpl* doc, Node *list, GetTokenMode mode)
             if (!(node->tag->model & (CM_BLOCK | CM_INLINE)))
             {
                 TY_(Report)(doc, list, node, TAG_NOT_ALLOWED_IN);
-                return;
+                return NULL;
             }
 
             /* if DD appeared directly in BODY then exclude blocks */
             if (!(node->tag->model & CM_INLINE) && lexer->excludeBlocks)
-                return;
+                return NULL;
 
             node = TY_(InferredTag)(doc, TidyTag_DD);
             TY_(Report)(doc, list, node, MISSING_STARTTAG);
@@ -2304,27 +2377,21 @@ void TY_(ParseDefList)(TidyDocImpl* doc, Node *list, GetTokenMode mode)
             TY_(FreeNode)( doc, node);
             continue;
         }
-        
+
         /* node should be <DT> or <DD>*/
         TY_(InsertNodeAtEnd)(list, node);
         ParseTag( doc, node, IgnoreWhitespace);
     }
 
     TY_(Report)(doc, list, node, MISSING_ENDTAG_FOR);
+    return NULL;
 }
 
-static Bool FindLastLI( Node *list, Node **lastli )
-{
-    Node *node;
 
-    *lastli = NULL;
-    for ( node = list->content; node ; node = node->next )
-        if ( nodeIsLI(node) && node->type == StartTag )
-            *lastli=node;
-    return *lastli ? yes:no;
-}
-
-void TY_(ParseList)(TidyDocImpl* doc, Node *list, GetTokenMode ARG_UNUSED(mode))
+/** MARK: TY_(oldParseList)
+ *  Parses list tags.
+ */
+void* TY_(oldParseList)(TidyDocImpl* doc, Node *list, GetTokenMode ARG_UNUSED(mode))
 {
 #if defined(ENABLE_DEBUG_LOG)
     static int in_parse_list = 0;
@@ -2344,7 +2411,7 @@ void TY_(ParseList)(TidyDocImpl* doc, Node *list, GetTokenMode ARG_UNUSED(mode))
         in_parse_list--;
         SPRTF("Exit ParseList 1 %d... CM_EMPTY\n",in_parse_list);
 #endif
-        return;
+        return NULL;
     }
     lexer->insert = NULL;  /* defer implicit inline start tags */
 
@@ -2359,7 +2426,7 @@ void TY_(ParseList)(TidyDocImpl* doc, Node *list, GetTokenMode ARG_UNUSED(mode))
             in_parse_list--;
             SPRTF("Exit ParseList 2 %d... Endtag\n",in_parse_list);
 #endif
-            return;
+            return NULL;
         }
 
         /* deal with comments etc. */
@@ -2388,7 +2455,7 @@ void TY_(ParseList)(TidyDocImpl* doc, Node *list, GetTokenMode ARG_UNUSED(mode))
         }
 
 
-        /* 
+        /*
           if this is the end tag for an ancestor element
           then infer end tag for this element
         */
@@ -2426,7 +2493,7 @@ void TY_(ParseList)(TidyDocImpl* doc, Node *list, GetTokenMode ARG_UNUSED(mode))
                     in_parse_list--;
                     SPRTF("Exit ParseList 3 %d... No End Tag\n",in_parse_list);
 #endif
-                    return;
+                    return NULL;
                 }
             }
 
@@ -2459,7 +2526,7 @@ void TY_(ParseList)(TidyDocImpl* doc, Node *list, GetTokenMode ARG_UNUSED(mode))
                 in_parse_list--;
                 SPRTF("Exit ParseList 4 %d... No End Tag\n",in_parse_list);
 #endif
-                return;
+                return NULL;
             }
             /* http://tidy.sf.net/issue/1316307 */
             /* In exiled mode, return so table processing can continue. */
@@ -2471,12 +2538,12 @@ void TY_(ParseList)(TidyDocImpl* doc, Node *list, GetTokenMode ARG_UNUSED(mode))
                 in_parse_list--;
                 SPRTF("Exit ParseList 5 %d... exiled\n",in_parse_list);
 #endif
-                return;
+                return NULL;
             }
             /* http://tidy.sf.net/issue/836462
-               If "list" is an unordered list, insert the next tag within 
-               the last <li> to preserve the numbering to match the visual 
-               rendering of most browsers. */    
+               If "list" is an unordered list, insert the next tag within
+               the last <li> to preserve the numbering to match the visual
+               rendering of most browsers. */
             if ( nodeIsOL(list) && FindLastLI(list, &lastli) )
             {
                 /* Create a node for error reporting */
@@ -2490,12 +2557,12 @@ void TY_(ParseList)(TidyDocImpl* doc, Node *list, GetTokenMode ARG_UNUSED(mode))
                 /* Add an inferred <li> */
                 wasblock = TY_(nodeHasCM)(node,CM_BLOCK);
                 node = TY_(InferredTag)(doc, TidyTag_LI);
-                /* Add "display: inline" to avoid a blank line after <li> with 
+                /* Add "display: inline" to avoid a blank line after <li> with
                    Internet Explorer. See http://tidy.sf.net/issue/836462 */
                 TY_(AddStyleProperty)( doc, node,
                                        wasblock
                                        ? "list-style: none; display: inline"
-                                       : "list-style: none" 
+                                       : "list-style: none"
                                        );
                 TY_(Report)(doc, list, node, MISSING_STARTTAG );
                 TY_(InsertNodeAtEnd)(list,node);
@@ -2510,56 +2577,21 @@ void TY_(ParseList)(TidyDocImpl* doc, Node *list, GetTokenMode ARG_UNUSED(mode))
     in_parse_list--;
     SPRTF("Exit ParseList 6 %d... missing end tag\n",in_parse_list);
 #endif
+    return NULL;
 }
 
-/*
- unexpected content in table row is moved to just before
- the table in accordance with Netscape and IE. This code
- assumes that node hasn't been inserted into the row.
-*/
-static void MoveBeforeTable( TidyDocImpl* ARG_UNUSED(doc), Node *row,
-                             Node *node )
-{
-    Node *table;
 
-    /* first find the table element */
-    for (table = row->parent; table; table = table->parent)
-    {
-        if ( nodeIsTABLE(table) )
-        {
-            TY_(InsertNodeBeforeElement)( table, node );
-            return;
-        }
-    }
-    /* No table element */
-    TY_(InsertNodeBeforeElement)( row->parent, node );
-}
-
-/*
- if a table row is empty then insert an empty cell
- this practice is consistent with browser behavior
- and avoids potential problems with row spanning cells
-*/
-static void FixEmptyRow(TidyDocImpl* doc, Node *row)
-{
-    Node *cell;
-
-    if (row->content == NULL)
-    {
-        cell = TY_(InferredTag)(doc, TidyTag_TD);
-        TY_(InsertNodeAtEnd)(row, cell);
-        TY_(Report)(doc, row, cell, MISSING_STARTTAG);
-    }
-}
-
-void TY_(ParseRow)(TidyDocImpl* doc, Node *row, GetTokenMode ARG_UNUSED(mode))
+/** MARK: TY_(oldParseRow)
+ *  Parses the `row` tag.
+ */
+void* TY_(oldParseRow)(TidyDocImpl* doc, Node *row, GetTokenMode ARG_UNUSED(mode))
 {
     Lexer* lexer = doc->lexer;
     Node *node;
     Bool exclude_state;
 
     if (row->tag->model & CM_EMPTY)
-        return;
+        return NULL;
 
     while ((node = TY_(GetToken)(doc, IgnoreWhitespace)) != NULL)
     {
@@ -2570,16 +2602,16 @@ void TY_(ParseRow)(TidyDocImpl* doc, Node *row, GetTokenMode ARG_UNUSED(mode))
                 TY_(FreeNode)( doc, node);
                 row->closed = yes;
                 FixEmptyRow( doc, row);
-                return;
+                return NULL;
             }
 
             /* New row start implies end of current row */
             TY_(UngetToken)( doc );
             FixEmptyRow( doc, row);
-            return;
+            return NULL;
         }
 
-        /* 
+        /*
           if this is the end tag for an ancestor element
           then infer end tag for this element
         */
@@ -2589,7 +2621,7 @@ void TY_(ParseRow)(TidyDocImpl* doc, Node *row, GetTokenMode ARG_UNUSED(mode))
                  && DescendantOf(row, TagId(node)) )
             {
                 TY_(UngetToken)( doc );
-                return;
+                return NULL;
             }
 
             if ( nodeIsFORM(node) || TY_(nodeHasCM)(node, CM_BLOCK|CM_INLINE) )
@@ -2634,7 +2666,7 @@ void TY_(ParseRow)(TidyDocImpl* doc, Node *row, GetTokenMode ARG_UNUSED(mode))
         if ( TY_(nodeHasCM)(node, CM_ROWGRP) )
         {
             TY_(UngetToken)( doc );
-            return;
+            return NULL;
         }
 
         if (node->type == EndTag)
@@ -2687,7 +2719,7 @@ void TY_(ParseRow)(TidyDocImpl* doc, Node *row, GetTokenMode ARG_UNUSED(mode))
             TY_(FreeNode)( doc, node);
             continue;
         }
-        
+
         /* node should be <TD> or <TH> */
         TY_(InsertNodeAtEnd)(row, node);
         exclude_state = lexer->excludeBlocks;
@@ -2700,16 +2732,20 @@ void TY_(ParseRow)(TidyDocImpl* doc, Node *row, GetTokenMode ARG_UNUSED(mode))
         while ( lexer->istacksize > lexer->istackbase )
             TY_(PopInline)( doc, NULL );
     }
-
+    return NULL;
 }
 
-void TY_(ParseRowGroup)(TidyDocImpl* doc, Node *rowgroup, GetTokenMode ARG_UNUSED(mode))
+
+/** MARK: TY_(oldParseRowGroup)
+ *  Parses the `rowgroup` tag.
+ */
+void* TY_(oldParseRowGroup)(TidyDocImpl* doc, Node *rowgroup, GetTokenMode ARG_UNUSED(mode))
 {
     Lexer* lexer = doc->lexer;
     Node *node, *parent;
 
     if (rowgroup->tag->model & CM_EMPTY)
-        return;
+        return NULL;
 
     while ((node = TY_(GetToken)(doc, IgnoreWhitespace)) != NULL)
     {
@@ -2719,18 +2755,18 @@ void TY_(ParseRowGroup)(TidyDocImpl* doc, Node *rowgroup, GetTokenMode ARG_UNUSE
             {
                 rowgroup->closed = yes;
                 TY_(FreeNode)( doc, node);
-                return;
+                return NULL;
             }
 
             TY_(UngetToken)( doc );
-            return;
+            return NULL;
         }
 
         /* if </table> infer end tag */
         if ( nodeIsTABLE(node) && node->type == EndTag )
         {
             TY_(UngetToken)( doc );
-            return;
+            return NULL;
         }
 
         /* deal with comments etc. */
@@ -2780,7 +2816,7 @@ void TY_(ParseRowGroup)(TidyDocImpl* doc, Node *rowgroup, GetTokenMode ARG_UNUSE
             }
         }
 
-        /* 
+        /*
           if this is the end tag for ancestor element
           then infer end tag for this element
         */
@@ -2810,7 +2846,7 @@ void TY_(ParseRowGroup)(TidyDocImpl* doc, Node *rowgroup, GetTokenMode ARG_UNUSE
                 if (node->tag == parent->tag)
                 {
                     TY_(UngetToken)( doc );
-                    return;
+                    return NULL;
                 }
             }
         }
@@ -2824,7 +2860,7 @@ void TY_(ParseRowGroup)(TidyDocImpl* doc, Node *rowgroup, GetTokenMode ARG_UNUSE
             if (node->type != EndTag)
             {
                 TY_(UngetToken)( doc );
-                return;
+                return NULL;
             }
         }
 
@@ -2834,7 +2870,7 @@ void TY_(ParseRowGroup)(TidyDocImpl* doc, Node *rowgroup, GetTokenMode ARG_UNUSE
             TY_(FreeNode)( doc, node);
             continue;
         }
-        
+
         if ( !nodeIsTR(node) )
         {
             node = TY_(InferredTag)(doc, TidyTag_TR);
@@ -2846,15 +2882,19 @@ void TY_(ParseRowGroup)(TidyDocImpl* doc, Node *rowgroup, GetTokenMode ARG_UNUSE
         TY_(InsertNodeAtEnd)(rowgroup, node);
         ParseTag(doc, node, IgnoreWhitespace);
     }
-
+    return NULL;
 }
 
-void TY_(ParseColGroup)(TidyDocImpl* doc, Node *colgroup, GetTokenMode ARG_UNUSED(mode))
+
+/** MARK: TY_(oldParseColGroup)
+ *  Parses the `colgroup` tag.
+ */
+void* TY_(oldParseColGroup)(TidyDocImpl* doc, Node *colgroup, GetTokenMode ARG_UNUSED(mode))
 {
     Node *node, *parent;
 
     if (colgroup->tag->model & CM_EMPTY)
-        return;
+        return NULL;
 
     while ((node = TY_(GetToken)(doc, IgnoreWhitespace)) != NULL)
     {
@@ -2862,10 +2902,10 @@ void TY_(ParseColGroup)(TidyDocImpl* doc, Node *colgroup, GetTokenMode ARG_UNUSE
         {
             TY_(FreeNode)( doc, node);
             colgroup->closed = yes;
-            return;
+            return NULL;
         }
 
-        /* 
+        /*
           if this is the end tag for an ancestor element
           then infer end tag for this element
         */
@@ -2886,7 +2926,7 @@ void TY_(ParseColGroup)(TidyDocImpl* doc, Node *colgroup, GetTokenMode ARG_UNUSE
                 if (node->tag == parent->tag)
                 {
                     TY_(UngetToken)( doc );
-                    return;
+                    return NULL;
                 }
             }
         }
@@ -2894,7 +2934,7 @@ void TY_(ParseColGroup)(TidyDocImpl* doc, Node *colgroup, GetTokenMode ARG_UNUSE
         if (TY_(nodeIsText)(node))
         {
             TY_(UngetToken)( doc );
-            return;
+            return NULL;
         }
 
         /* deal with comments etc. */
@@ -2912,7 +2952,7 @@ void TY_(ParseColGroup)(TidyDocImpl* doc, Node *colgroup, GetTokenMode ARG_UNUSE
         if ( !nodeIsCOL(node) )
         {
             TY_(UngetToken)( doc );
-            return;
+            return NULL;
         }
 
         if (node->type == EndTag)
@@ -2921,14 +2961,19 @@ void TY_(ParseColGroup)(TidyDocImpl* doc, Node *colgroup, GetTokenMode ARG_UNUSE
             TY_(FreeNode)( doc, node);
             continue;
         }
-        
+
         /* node should be <COL> */
         TY_(InsertNodeAtEnd)(colgroup, node);
         ParseTag(doc, node, IgnoreWhitespace);
     }
+    return NULL;
 }
 
-void TY_(ParseTableTag)(TidyDocImpl* doc, Node *table, GetTokenMode ARG_UNUSED(mode))
+
+/** MARK: TY_(oldParseTableTag)
+ *  Parses the `table` tag.
+ */
+void* TY_(oldParseTableTag)(TidyDocImpl* doc, Node *table, GetTokenMode ARG_UNUSED(mode))
 {
 #if defined(ENABLE_DEBUG_LOG)
     static int in_parse_table = 0;
@@ -2944,7 +2989,7 @@ void TY_(ParseTableTag)(TidyDocImpl* doc, Node *table, GetTokenMode ARG_UNUSED(m
     in_parse_table++;
     SPRTF("Entering ParseTableTag %d...\n",in_parse_table);
 #endif
-    
+
     while ((node = TY_(GetToken)(doc, IgnoreWhitespace)) != NULL)
     {
         if (node->tag == table->tag )
@@ -2956,7 +3001,7 @@ void TY_(ParseTableTag)(TidyDocImpl* doc, Node *table, GetTokenMode ARG_UNUSED(m
             else
             {
                 /* Issue #498 - If a <table> in a <table>
-                 * just close the current table, and issue a 
+                 * just close the current table, and issue a
                  * warning. The previous action was to discard
                  * this second <table>
                  */
@@ -2969,7 +3014,7 @@ void TY_(ParseTableTag)(TidyDocImpl* doc, Node *table, GetTokenMode ARG_UNUSED(m
             in_parse_table--;
             SPRTF("Exit ParseTableTag 1 %d... EndTag\n",in_parse_table);
 #endif
-            return;
+            return NULL;
         }
 
         /* deal with comments etc. */
@@ -3000,7 +3045,7 @@ void TY_(ParseTableTag)(TidyDocImpl* doc, Node *table, GetTokenMode ARG_UNUSED(m
                 TY_(Report)(doc, table, node, TAG_NOT_ALLOWED_IN);
                 lexer->exiled = yes;
 
-                if (node->type != TextNode) 
+                if (node->type != TextNode)
                     ParseTag(doc, node, IgnoreWhitespace);
 
                 lexer->exiled = no;
@@ -3013,7 +3058,7 @@ void TY_(ParseTableTag)(TidyDocImpl* doc, Node *table, GetTokenMode ARG_UNUSED(m
             }
         }
 
-        /* 
+        /*
           if this is the end tag for an ancestor element
           then infer end tag for this element
         */
@@ -3049,7 +3094,7 @@ void TY_(ParseTableTag)(TidyDocImpl* doc, Node *table, GetTokenMode ARG_UNUSED(m
                     in_parse_table--;
                     SPRTF("Exit ParseTableTag 2 %d... missing EndTag\n",in_parse_table);
 #endif
-                    return;
+                    return NULL;
                 }
             }
         }
@@ -3063,7 +3108,7 @@ void TY_(ParseTableTag)(TidyDocImpl* doc, Node *table, GetTokenMode ARG_UNUSED(m
             in_parse_table--;
             SPRTF("Exit ParseTableTag 3 %d... CM_TABLE\n",in_parse_table);
 #endif
-            return;
+            return NULL;
         }
 
         if (TY_(nodeIsElement)(node))
@@ -3084,35 +3129,25 @@ void TY_(ParseTableTag)(TidyDocImpl* doc, Node *table, GetTokenMode ARG_UNUSED(m
     in_parse_table--;
     SPRTF("Exit ParseTableTag 4 %d... missing end\n",in_parse_table);
 #endif
+    return NULL;
 }
 
-/* acceptable content for pre elements */
-static Bool PreContent( TidyDocImpl* ARG_UNUSED(doc), Node* node )
-{
-    /* p is coerced to br's, Text OK too */
-    if ( nodeIsP(node) || TY_(nodeIsText)(node) )
-        return yes;
 
-    if ( node->tag == NULL ||
-         nodeIsPARAM(node) ||
-         !TY_(nodeHasCM)(node, CM_INLINE|CM_NEW) )
-        return no;
-
-    return yes;
-}
-
-void TY_(ParsePre)( TidyDocImpl* doc, Node *pre, GetTokenMode ARG_UNUSED(mode) )
+/** MARK: TY_(oldParsePre)
+ *  Parses the `pre` tag.
+ */
+void* TY_(oldParsePre)( TidyDocImpl* doc, Node *pre, GetTokenMode ARG_UNUSED(mode) )
 {
     Node *node;
 
     if (pre->tag->model & CM_EMPTY)
-        return;
+        return NULL;
 
     TY_(InlineDup)( doc, NULL ); /* tell lexer to insert inlines if needed */
 
     while ((node = TY_(GetToken)(doc, Preformatted)) != NULL)
     {
-        if ( node->type == EndTag && 
+        if ( node->type == EndTag &&
              (node->tag == pre->tag || DescendantOf(pre, TagId(node))) )
         {
             if (nodeIsBODY(node) || nodeIsHTML(node))
@@ -3132,7 +3167,7 @@ void TY_(ParsePre)( TidyDocImpl* doc, Node *pre, GetTokenMode ARG_UNUSED(mode) )
             }
             pre->closed = yes;
             TrimSpaces(doc, pre);
-            return;
+            return NULL;
         }
 
         if (TY_(nodeIsText)(node))
@@ -3160,13 +3195,13 @@ void TY_(ParsePre)( TidyDocImpl* doc, Node *pre, GetTokenMode ARG_UNUSED(mode) )
             /* fix for http://tidy.sf.net/bug/772205 */
             if (node->type == EndTag)
             {
-                /* http://tidy.sf.net/issue/1590220 */ 
+                /* http://tidy.sf.net/issue/1590220 */
                if ( doc->lexer->exiled
                    && (TY_(nodeHasCM)(node, CM_TABLE) || nodeIsTABLE(node)) )
                {
                   TY_(UngetToken)(doc);
                   TrimSpaces(doc, pre);
-                  return;
+                  return NULL;
                }
 
                TY_(Report)(doc, pre, node, DISCARDING_UNEXPECTED);
@@ -3182,7 +3217,7 @@ void TY_(ParsePre)( TidyDocImpl* doc, Node *pre, GetTokenMode ARG_UNUSED(mode) )
                     TY_(Report)(doc, pre, node, MISSING_ENDTAG_BEFORE);
 
                 TY_(UngetToken)(doc);
-                return;
+                return NULL;
             }
 
             /*
@@ -3211,7 +3246,7 @@ void TY_(ParsePre)( TidyDocImpl* doc, Node *pre, GetTokenMode ARG_UNUSED(mode) )
                 <pre>...<br>...<br>...</pre>         (Internet Explorer)
                 <pre>...<br><br>...<br><br>...</pre> (Mozilla, Opera 6)
                 <pre>...<br>...<br><br>...</pre>     (Opera 7)
-                
+
               or something similar, they could also be closing the <pre> and propagate
               the <pre> into the newly opened <p>.
 
@@ -3242,7 +3277,7 @@ void TY_(ParsePre)( TidyDocImpl* doc, Node *pre, GetTokenMode ARG_UNUSED(mode) )
 
                 /* trim white space before <p> in <pre>*/
                 TrimSpaces(doc, pre);
-            
+
                 /* coerce both <p> and </p> to <br> */
                 TY_(CoerceNode)(doc, node, TidyTag_BR, no, no);
                 TY_(FreeAttrs)( doc, node ); /* discard align attribute etc. */
@@ -3261,7 +3296,7 @@ void TY_(ParsePre)( TidyDocImpl* doc, Node *pre, GetTokenMode ARG_UNUSED(mode) )
             /* trim white space before <br> */
             if ( nodeIsBR(node) )
                 TrimSpaces(doc, pre);
-            
+
             TY_(InsertNodeAtEnd)(pre, node);
             ParseTag(doc, node, Preformatted);
             continue;
@@ -3273,9 +3308,14 @@ void TY_(ParsePre)( TidyDocImpl* doc, Node *pre, GetTokenMode ARG_UNUSED(mode) )
     }
 
     TY_(Report)(doc, pre, node, MISSING_ENDTAG_FOR);
+    return NULL;
 }
 
-void TY_(ParseOptGroup)(TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(mode))
+
+/** MARK: TY_(oldParseOptGroup)
+ *  Parses the `optgroup` tag.
+ */
+void* TY_(oldParseOptGroup)(TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(mode))
 {
     Lexer* lexer = doc->lexer;
     Node *node;
@@ -3289,14 +3329,14 @@ void TY_(ParseOptGroup)(TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(m
             TY_(FreeNode)( doc, node);
             field->closed = yes;
             TrimSpaces(doc, field);
-            return;
+            return NULL;
         }
 
         /* deal with comments etc. */
         if (InsertMisc(field, node))
             continue;
 
-        if ( node->type == StartTag && 
+        if ( node->type == StartTag &&
              (nodeIsOPTION(node) || nodeIsOPTGROUP(node)) )
         {
             if ( nodeIsOPTGROUP(node) )
@@ -3311,10 +3351,14 @@ void TY_(ParseOptGroup)(TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(m
         TY_(Report)(doc, field, node, DISCARDING_UNEXPECTED );
         TY_(FreeNode)( doc, node);
     }
+    return NULL;
 }
 
 
-void TY_(ParseSelect)(TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(mode))
+/** MARK: TY_(oldParseSelect)
+ *  Parses the `select` tag.
+ */
+void* TY_(oldParseSelect)(TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(mode))
 {
 #if defined(ENABLE_DEBUG_LOG)
     static int in_parse_select = 0;
@@ -3339,18 +3383,18 @@ void TY_(ParseSelect)(TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(mod
             in_parse_select--;
             SPRTF("Exit ParseSelect 1 %d...\n",in_parse_select);
 #endif
-            return;
+            return NULL;
         }
 
         /* deal with comments etc. */
         if (InsertMisc(field, node))
             continue;
 
-        if ( node->type == StartTag && 
+        if ( node->type == StartTag &&
              ( nodeIsOPTION(node)   ||
                nodeIsOPTGROUP(node) ||
                nodeIsDATALIST(node) ||
-               nodeIsSCRIPT(node)) 
+               nodeIsSCRIPT(node))
            )
         {
             TY_(InsertNodeAtEnd)(field, node);
@@ -3368,10 +3412,14 @@ void TY_(ParseSelect)(TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(mod
     in_parse_select--;
     SPRTF("Exit ParseSelect 2 %d...\n",in_parse_select);
 #endif
+    return NULL;
 }
 
-/* HTML5 */
-void TY_(ParseDatalist)(TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(mode))
+
+/** MARK: TY_(oldParseDataList)
+ *  Parses the `datalist` tag.
+ */
+void* TY_(oldParseDatalist)(TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(mode))
 {
 #if defined(ENABLE_DEBUG_LOG)
     static int in_parse_datalist = 0;
@@ -3396,18 +3444,18 @@ void TY_(ParseDatalist)(TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(m
             in_parse_datalist--;
             SPRTF("Exit ParseDatalist 1 %d...\n",in_parse_datalist);
 #endif
-            return;
+            return NULL;
         }
 
         /* deal with comments etc. */
         if (InsertMisc(field, node))
             continue;
 
-        if ( node->type == StartTag && 
+        if ( node->type == StartTag &&
              ( nodeIsOPTION(node)   ||
                nodeIsOPTGROUP(node) ||
                nodeIsDATALIST(node) ||
-               nodeIsSCRIPT(node)) 
+               nodeIsSCRIPT(node))
            )
         {
             TY_(InsertNodeAtEnd)(field, node);
@@ -3425,12 +3473,14 @@ void TY_(ParseDatalist)(TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(m
     in_parse_datalist--;
     SPRTF("Exit ParseDatalist 2 %d...\n",in_parse_datalist);
 #endif
+    return NULL;
 }
 
 
-
-
-void TY_(ParseText)(TidyDocImpl* doc, Node *field, GetTokenMode mode)
+/** MARK: TY_(oldParseText)
+ *  Parses the `option` and `textarea` tags.
+ */
+void* TY_(oldParseText)(TidyDocImpl* doc, Node *field, GetTokenMode mode)
 {
     Lexer* lexer = doc->lexer;
     Node *node;
@@ -3449,7 +3499,7 @@ void TY_(ParseText)(TidyDocImpl* doc, Node *field, GetTokenMode mode)
             TY_(FreeNode)( doc, node);
             field->closed = yes;
             TrimSpaces(doc, field);
-            return;
+            return NULL;
         }
 
         /* deal with comments etc. */
@@ -3475,7 +3525,7 @@ void TY_(ParseText)(TidyDocImpl* doc, Node *field, GetTokenMode mode)
         /* for textarea should all cases of < and & be escaped? */
 
         /* discard inline tags e.g. font */
-        if (   node->tag 
+        if (   node->tag
             && node->tag->model & CM_INLINE
             && !(node->tag->model & CM_FIELD)) /* #487283 - fix by Lee Passey 25 Jan 02 */
         {
@@ -3490,15 +3540,19 @@ void TY_(ParseText)(TidyDocImpl* doc, Node *field, GetTokenMode mode)
 
         TY_(UngetToken)( doc );
         TrimSpaces(doc, field);
-        return;
+        return NULL;
     }
 
     if (!(field->tag->model & CM_OPT))
         TY_(Report)(doc, field, node, MISSING_ENDTAG_FOR);
+    return NULL;
 }
 
 
-void TY_(ParseTitle)(TidyDocImpl* doc, Node *title, GetTokenMode ARG_UNUSED(mode))
+/** MARK: TY_(oldParseTitle)
+ *  Parses the `title` tag.
+ */
+void* TY_(oldParseTitle)(TidyDocImpl* doc, Node *title, GetTokenMode ARG_UNUSED(mode))
 {
     Node *node;
     while ((node = TY_(GetToken)(doc, MixedContent)) != NULL)
@@ -3516,7 +3570,7 @@ void TY_(ParseTitle)(TidyDocImpl* doc, Node *title, GetTokenMode ARG_UNUSED(mode
             TY_(FreeNode)( doc, node);
             title->closed = yes;
             TrimSpaces(doc, title);
-            return;
+            return NULL;
         }
 
         if (TY_(nodeIsText)(node))
@@ -3551,23 +3605,29 @@ void TY_(ParseTitle)(TidyDocImpl* doc, Node *title, GetTokenMode ARG_UNUSED(mode
         TY_(Report)(doc, title, node, MISSING_ENDTAG_BEFORE);
         TY_(UngetToken)( doc );
         TrimSpaces(doc, title);
-        return;
+        return NULL;
     }
 
     TY_(Report)(doc, title, node, MISSING_ENDTAG_FOR);
+    return NULL;
 }
 
-/*
-  This isn't quite right for CDATA content as it recognises
-  tags within the content and parses them accordingly.
-  This will unfortunately screw up scripts which include
-  < + letter,  < + !, < + ?  or  < + / + letter
-*/
 
-void TY_(ParseScript)(TidyDocImpl* doc, Node *script, GetTokenMode ARG_UNUSED(mode))
+/** MARK: TY_(oldParseScript)
+ *  Parses the `script` tag.
+ *
+ *  @todo This isn't quite right for CDATA content as it recognises tags
+ *  within the content and parses them accordingly. This will unfortunately
+ *  screw up scripts which include:
+ *    < + letter
+ *    < + !
+ *    < + ?
+ *    < + / + letter
+ */
+void* TY_(oldParseScript)(TidyDocImpl* doc, Node *script, GetTokenMode ARG_UNUSED(mode))
 {
     Node *node;
-    
+
     doc->lexer->parent = script;
     node = TY_(GetToken)(doc, CdataContent);
     doc->lexer->parent = NULL;
@@ -3580,7 +3640,7 @@ void TY_(ParseScript)(TidyDocImpl* doc, Node *script, GetTokenMode ARG_UNUSED(mo
     {
         /* handle e.g. a document like "<script>" */
         TY_(Report)(doc, script, NULL, MISSING_ENDTAG_FOR);
-        return;
+        return NULL;
     }
 
     node = TY_(GetToken)(doc, IgnoreWhitespace);
@@ -3597,30 +3657,14 @@ void TY_(ParseScript)(TidyDocImpl* doc, Node *script, GetTokenMode ARG_UNUSED(mo
     {
         TY_(FreeNode)(doc, node);
     }
+    return NULL;
 }
 
-Bool TY_(IsJavaScript)(Node *node)
-{
-    Bool result = no;
-    AttVal *attr;
 
-    if (node->attributes == NULL)
-        return yes;
-
-    for (attr = node->attributes; attr; attr = attr->next)
-    {
-        if ( (attrIsLANGUAGE(attr) || attrIsTYPE(attr))
-             && AttrContains(attr, "javascript") )
-        {
-            result = yes;
-            break;
-        }
-    }
-
-    return result;
-}
-
-void TY_(ParseHead)(TidyDocImpl* doc, Node *head, GetTokenMode ARG_UNUSED(mode))
+/** MARK: TY_(oldParseHead)
+ *  Parses the `head` tag.
+ */
+void* TY_(oldParseHead)(TidyDocImpl* doc, Node *head, GetTokenMode ARG_UNUSED(mode))
 {
     Lexer* lexer = doc->lexer;
     Node *node;
@@ -3686,7 +3730,7 @@ void TY_(ParseHead)(TidyDocImpl* doc, Node *head, GetTokenMode ARG_UNUSED(mode))
             TY_(FreeNode)( doc, node);
             continue;
         }
-        
+
         /*
          if it doesn't belong in the head then
          treat as implicit end of head and deal
@@ -3732,50 +3776,14 @@ void TY_(ParseHead)(TidyDocImpl* doc, Node *head, GetTokenMode ARG_UNUSED(mode))
         TY_(FreeNode)( doc, node);
     }
     DEBUG_LOG(SPRTF("Exit ParseHead 1...\n"));
-}
-
-/*\ 
- *  Issue #166 - repeated <main> element
- *  But this service is generalised to check for other duplicate elements
-\*/
-static Bool TY_(FindNodeWithId)( Node *node, TidyTagId tid )
-{
-    Node *content;
-    while (node)
-    {
-        if (TagIsId(node,tid))
-            return yes;
-        /*\ 
-         *   Issue #459 - Under certain circumstances, with many node this use of
-         *   'for (content = node->content; content; content = content->content)'
-         *   would produce a **forever** circle, or at least a very extended loop...
-         *   It is sufficient to test the content, if it exists,
-         *   to quickly iterate all nodes. Now all nodes are tested only once.
-        \*/ 
-        content = node->content;
-        if (content)
-        {
-            if (TY_(FindNodeWithId)(content,tid))
-                return yes;
-        }
-        node = node->next;
-    }
-    return no;
+    return NULL;
 }
 
 
-/*\ 
- *  Issue #166 - repeated <main> element
- *  Do a global search for an element
-\*/
-static Bool TY_(FindNodeById)( TidyDocImpl* doc, TidyTagId tid )
-{
-    Node *node = (doc ? doc->root.content : NULL);
-    return TY_(FindNodeWithId)(node,tid);
-}
-
-
-void TY_(ParseBody)(TidyDocImpl* doc, Node *body, GetTokenMode mode)
+/** MARK: TY_(oldParseBody)
+ *  Parses the `body` tag.
+ */
+void* TY_(oldParseBody)(TidyDocImpl* doc, Node *body, GetTokenMode mode)
 {
     Lexer* lexer = doc->lexer;
     Node *node;
@@ -3800,7 +3808,7 @@ void TY_(ParseBody)(TidyDocImpl* doc, Node *body, GetTokenMode mode)
         /* #538536 Extra endtags not detected */
         if ( nodeIsHTML(node) )
         {
-            if (TY_(nodeIsElement)(node) || lexer->seenEndHtml) 
+            if (TY_(nodeIsElement)(node) || lexer->seenEndHtml)
                 TY_(Report)(doc, body, node, DISCARDING_UNEXPECTED);
             else
                 lexer->seenEndHtml = 1;
@@ -3809,7 +3817,7 @@ void TY_(ParseBody)(TidyDocImpl* doc, Node *body, GetTokenMode mode)
             continue;
         }
 
-        if ( lexer->seenEndBody && 
+        if ( lexer->seenEndBody &&
              ( node->type == StartTag ||
                node->type == EndTag   ||
                node->type == StartEndTag ) )
@@ -3836,7 +3844,7 @@ void TY_(ParseBody)(TidyDocImpl* doc, Node *body, GetTokenMode mode)
             if (node->type == StartTag)
             {
                 TY_(InsertNodeAtEnd)(body, node);
-                TY_(ParseBlock)(doc, node, mode);
+                TY_(oldParseBlock)(doc, node, mode);
                 continue;
             }
 
@@ -3855,7 +3863,7 @@ void TY_(ParseBody)(TidyDocImpl* doc, Node *body, GetTokenMode mode)
             TY_(UngetToken)( doc );
             break;
         }
-        
+
         iswhitenode = no;
 
         if ( TY_(nodeIsText)(node) &&
@@ -3912,7 +3920,7 @@ void TY_(ParseBody)(TidyDocImpl* doc, Node *body, GetTokenMode mode)
           to match Netscape's observed behaviour.
         */
         lexer->excludeBlocks = no;
-        
+
         if (( nodeIsINPUT(node) ||
              (!TY_(nodeHasCM)(node, CM_BLOCK) && !TY_(nodeHasCM)(node, CM_INLINE))
            ) && !TY_(IsHTML5Mode)(doc) )
@@ -3924,7 +3932,7 @@ void TY_(ParseBody)(TidyDocImpl* doc, Node *body, GetTokenMode mode)
             if (node->tag->model & CM_HTML)
             {
                 /* copy body attributes if current body was inferred */
-                if ( nodeIsBODY(node) && body->implicit 
+                if ( nodeIsBODY(node) && body->implicit
                      && body->attributes == NULL )
                 {
                     body->attributes = node->attributes;
@@ -3974,7 +3982,7 @@ void TY_(ParseBody)(TidyDocImpl* doc, Node *body, GetTokenMode mode)
                 if ( !TY_(nodeHasCM)(node, CM_ROW | CM_FIELD) )
                 {
                     TY_(UngetToken)( doc );
-                    return;
+                    return NULL;
                 }
 
                 /* ignore </td> </th> <option> etc. */
@@ -3998,11 +4006,12 @@ void TY_(ParseBody)(TidyDocImpl* doc, Node *body, GetTokenMode mode)
 
         if (TY_(nodeIsElement)(node))
         {
-            if (nodeIsMAIN(node)) {
+            if (nodeIsMAIN(node))
+            {
                 /*\ Issue #166 - repeated <main> element
                  *  How to efficiently search for a previous main element?
                 \*/
-                if ( TY_(FindNodeById)(doc, TidyTag_MAIN) )
+                if ( findNodeById(doc, TidyTag_MAIN) )
                 {
                     doc->badForm |= flg_BadMain; /* this is an ERROR in format */
                     TY_(Report)(doc, body, node, DISCARDING_UNEXPECTED);
@@ -4051,9 +4060,14 @@ void TY_(ParseBody)(TidyDocImpl* doc, Node *body, GetTokenMode mode)
         TY_(FreeNode)( doc, node);
     }
     DEBUG_LOG(SPRTF("Exit ParseBody 1...\n"));
+    return NULL;
 }
 
-void TY_(ParseNoFrames)(TidyDocImpl* doc, Node *noframes, GetTokenMode mode)
+
+/** MARK: TY_(oldParseNoFrames)
+ *  Parses the `noframes` tag.
+ */
+void* TY_(oldParseNoFrames)(TidyDocImpl* doc, Node *noframes, GetTokenMode mode)
 {
     Lexer* lexer = doc->lexer;
     Node *node;
@@ -4071,7 +4085,7 @@ void TY_(ParseNoFrames)(TidyDocImpl* doc, Node *noframes, GetTokenMode mode)
             TY_(FreeNode)( doc, node);
             noframes->closed = yes;
             TrimSpaces(doc, noframes);
-            return;
+            return NULL;
         }
 
         if ( nodeIsFRAME(node) || nodeIsFRAMESET(node) )
@@ -4087,7 +4101,7 @@ void TY_(ParseNoFrames)(TidyDocImpl* doc, Node *noframes, GetTokenMode mode)
                 TY_(Report)(doc, noframes, node, MISSING_ENDTAG_BEFORE);
                 TY_(UngetToken)( doc );
             }
-            return;
+            return NULL;
         }
 
         if ( nodeIsHTML(node) )
@@ -4157,9 +4171,14 @@ void TY_(ParseNoFrames)(TidyDocImpl* doc, Node *noframes, GetTokenMode mode)
     }
 
     TY_(Report)(doc, noframes, node, MISSING_ENDTAG_FOR);
+    return NULL;
 }
 
-void TY_(ParseFrameSet)(TidyDocImpl* doc, Node *frameset, GetTokenMode ARG_UNUSED(mode))
+
+/** MARK: TY_(oldParseFrameSet)
+ *  Parses the `frameset` tag.
+ */
+void* TY_(oldParseFrameSet)(TidyDocImpl* doc, Node *frameset, GetTokenMode ARG_UNUSED(mode))
 {
     Lexer* lexer = doc->lexer;
     Node *node;
@@ -4168,7 +4187,7 @@ void TY_(ParseFrameSet)(TidyDocImpl* doc, Node *frameset, GetTokenMode ARG_UNUSE
     {
         doc->badAccess |= BA_USING_FRAMES;
     }
-    
+
     while ((node = TY_(GetToken)(doc, IgnoreWhitespace)) != NULL)
     {
         if (node->tag == frameset->tag && node->type == EndTag)
@@ -4176,7 +4195,7 @@ void TY_(ParseFrameSet)(TidyDocImpl* doc, Node *frameset, GetTokenMode ARG_UNUSE
             TY_(FreeNode)( doc, node);
             frameset->closed = yes;
             TrimSpaces(doc, frameset);
-            return;
+            return NULL;
         }
 
         /* deal with comments etc. */
@@ -4187,7 +4206,7 @@ void TY_(ParseFrameSet)(TidyDocImpl* doc, Node *frameset, GetTokenMode ARG_UNUSE
         {
             TY_(Report)(doc, frameset, node, DISCARDING_UNEXPECTED);
             TY_(FreeNode)( doc, node);
-            continue; 
+            continue;
         }
 
         if (TY_(nodeIsElement)(node))
@@ -4229,248 +4248,1764 @@ void TY_(ParseFrameSet)(TidyDocImpl* doc, Node *frameset, GetTokenMode ARG_UNUSE
     }
 
     TY_(Report)(doc, frameset, node, MISSING_ENDTAG_FOR);
+    return NULL;
 }
 
-void TY_(ParseHTML)(TidyDocImpl* doc, Node *html, GetTokenMode mode)
+
+/***************************************************************************//*
+ ** MARK: - Parsers
+ ***************************************************************************/
+
+
+/** MARK: TY_(ParseHTML)
+ *  Parses the `html` tag. At this point, other root-level stuff (doctype,
+ *  comments) are already set up, and the bulk of the parsing can be
+ *  conducted from here as our nexus.
+ */
+Node* TY_(ParseHTML)( TidyDocImpl *doc, Node *html, GetTokenMode mode, Bool popStack )
 {
-    Node *node, *head;
+#if defined(ENABLE_DEBUG_LOG)
+    static int parser_depth = 0;
+    static int parser_count = 0;
+#endif
+    Node *node = NULL;
+    Node *head = NULL;
     Node *frameset = NULL;
     Node *noframes = NULL;
+    Bool keepToken = no;
+    parserState state = STATE_INITIAL;
+
+#if defined(ENABLE_DEBUG_LOG)
+    parser_depth++;
+    parser_count++;
+    DEBUG_LOG(SPRTF("***Entering ParseHTML, count: %d, depth %d\n", parser_count, parser_depth));
+#endif
 
-    DEBUG_LOG(SPRTF("Entering ParseHTML...\n"));
     TY_(SetOptionBool)( doc, TidyXmlTags, no );
 
-    for (;;)
+    /*
+     If we're re-entering, then we need to setup from a previous state,
+     instead of starting fresh. We can pull what we need from the document's
+     stack.
+     */
+    if ( popStack )
     {
-        node = TY_(GetToken)(doc, IgnoreWhitespace);
-
-        if (node == NULL)
-        {
-            node = TY_(InferredTag)(doc, TidyTag_HEAD);
-            break;
-        }
-
-        if ( nodeIsHEAD(node) )
-            break;
-
-        if (node->tag == html->tag && node->type == EndTag)
-        {
-            TY_(Report)(doc, html, node, DISCARDING_UNEXPECTED);
-            TY_(FreeNode)( doc, node);
-            continue;
-        }
-
-        /* find and discard multiple <html> elements */
-        if (node->tag == html->tag && node->type == StartTag)
-        {
-            TY_(Report)(doc, html, node, DISCARDING_UNEXPECTED);
-            TY_(FreeNode)(doc, node);
-            continue;
-        }
-
-        /* deal with comments etc. */
-        if (InsertMisc(html, node))
-            continue;
-
-        TY_(UngetToken)( doc );
-        node = TY_(InferredTag)(doc, TidyTag_HEAD);
-        break;
+        TidyParserMemory memory = popMemory( doc );
+        node = memory.reentry_node;
+        mode = memory.reentry_mode;
+        state = memory.reentry_state;
+        html = memory.original_node;
+        keepToken = node != NULL;
     }
 
-    head = node;
-    TY_(InsertNodeAtEnd)(html, head);
-    TY_(ParseHead)(doc, head, mode);
-
-    for (;;)
+    /*
+     This main loop pulls tokens from the lexer until we're out of tokens,
+     or until there's no more work to do.
+     */
+    while ( state != STATE_COMPLETE )
     {
-        node = TY_(GetToken)(doc, IgnoreWhitespace);
+        /*
+         We don't want to get the next token unless we're
+         done with this one. Using this flag is much quicker
+         than using `UngetToken()` every time we want to keep
+         the token.
+         */
+        if ( !keepToken )
+            node = TY_(GetToken)( doc, IgnoreWhitespace );
+        keepToken = no;
 
-        if (node == NULL)
+        switch ( state )
         {
-            if (frameset == NULL) /* implied body */
+            /**************************************************************
+             This case is all about finding a head tag and dealing with
+             cases were we don't, so that we can move on to parsing a head
+             tag.
+             **************************************************************/
+            case STATE_INITIAL:
             {
-                node = TY_(InferredTag)(doc, TidyTag_BODY);
-                TY_(InsertNodeAtEnd)(html, node);
-                TY_(ParseBody)(doc, node, mode);
-            }
-
-            DEBUG_LOG(SPRTF("Exit ParseHTML 1...\n"));
-            return;
-        }
-
-        /* robustly handle html tags */
-        if (node->tag == html->tag)
-        {
-            if (node->type != StartTag && frameset == NULL)
-                TY_(Report)(doc, html, node, DISCARDING_UNEXPECTED);
-
-            TY_(FreeNode)( doc, node);
-            continue;
-        }
-
-        /* deal with comments etc. */
-        if (InsertMisc(html, node))
-            continue;
-
-        /* if frameset document coerce <body> to <noframes> */
-        if ( nodeIsBODY(node) )
-        {
-            if (node->type != StartTag)
-            {
-                TY_(Report)(doc, html, node, DISCARDING_UNEXPECTED);
-                TY_(FreeNode)( doc, node);
-                continue;
-            }
-
-            if ( cfg(doc, TidyAccessibilityCheckLevel) == 0 )
-            {
-                if (frameset != NULL)
+                /*
+                 The only way we can possibly be here is if the lexer
+                 had nothing to give us. Thus we'll create our own
+                 head, and set the signal to start parsing it.
+                 */
+                if (node == NULL)
                 {
-                    TY_(UngetToken)( doc );
+                    node = TY_(InferredTag)(doc, TidyTag_HEAD);
+                    state = STATE_PARSE_HEAD;
+                    keepToken = yes;
+                    continue;
+                }
 
+                /* We found exactly what we expected: head. */
+                if ( nodeIsHEAD(node) )
+                {
+                    state = STATE_PARSE_HEAD;
+                    keepToken = yes;
+                    continue;
+                }
+
+                /* We did not expect to find an html closing tag here! */
+                if (node->tag == html->tag && node->type == EndTag)
+                {
+                    TY_(Report)(doc, html, node, DISCARDING_UNEXPECTED);
+                    TY_(FreeNode)( doc, node);
+                    continue;
+                }
+
+                /* Find and discard multiple <html> elements. */
+                if (node->tag == html->tag && node->type == StartTag)
+                {
+                    TY_(Report)(doc, html, node, DISCARDING_UNEXPECTED);
+                    TY_(FreeNode)(doc, node);
+                    continue;
+                }
+
+                /* Deal with comments etc. */
+                if (InsertMisc(html, node))
+                    continue;
+
+                /* At this point, we didn't find a head tag, so put the
+                 token back and create our own head tag, so we can
+                 move on.
+                 */
+                TY_(UngetToken)( doc );
+                node = TY_(InferredTag)(doc, TidyTag_HEAD);
+                state = STATE_PARSE_HEAD;
+                keepToken = yes;
+                continue;
+            } break;
+
+
+            /**************************************************************
+             This case determines whether we're dealing with body or
+             frameset + noframes, and sets things up accordingly.
+             **************************************************************/
+            case STATE_PRE_BODY:
+            {
+                if (node == NULL )
+                {
+                    if (frameset == NULL) /* Implied body. */
+                    {
+                        node = TY_(InferredTag)(doc, TidyTag_BODY);
+                        state = STATE_PARSE_BODY;
+                        keepToken = yes;
+                    } else {
+                        state = STATE_COMPLETE;
+                    }
+
+                    continue;
+                }
+
+                /* Robustly handle html tags. */
+                if (node->tag == html->tag)
+                {
+                    if (node->type != StartTag && frameset == NULL)
+                        TY_(Report)(doc, html, node, DISCARDING_UNEXPECTED);
+
+                    TY_(FreeNode)( doc, node);
+                    continue;
+                }
+
+                /* Deal with comments etc. */
+                if (InsertMisc(html, node))
+                    continue;
+
+                /* If frameset document, coerce <body> to <noframes> */
+                if ( nodeIsBODY(node) )
+                {
+                    if (node->type != StartTag)
+                    {
+                        TY_(Report)(doc, html, node, DISCARDING_UNEXPECTED);
+                        TY_(FreeNode)( doc, node);
+                        continue;
+                    }
+
+                    if ( cfg(doc, TidyAccessibilityCheckLevel) == 0 )
+                    {
+                        if (frameset != NULL)
+                        {
+                            TY_(UngetToken)( doc );
+
+                            if (noframes == NULL)
+                            {
+                                noframes = TY_(InferredTag)(doc, TidyTag_NOFRAMES);
+                                TY_(InsertNodeAtEnd)(frameset, noframes);
+                                TY_(Report)(doc, html, noframes, INSERTING_TAG);
+                            }
+                            else
+                            {
+                                if (noframes->type == StartEndTag)
+                                    noframes->type = StartTag;
+                            }
+
+                            state = STATE_PARSE_NOFRAMES;
+                            keepToken = yes;
+                            continue;
+                        }
+                    }
+
+                    TY_(ConstrainVersion)(doc, ~VERS_FRAMESET);
+                    state = STATE_PARSE_BODY;
+                    keepToken = yes;
+                    continue;
+                }
+
+                /* Flag an error if we see more than one frameset. */
+                if ( nodeIsFRAMESET(node) )
+                {
+                    if (node->type != StartTag)
+                    {
+                        TY_(Report)(doc, html, node, DISCARDING_UNEXPECTED);
+                        TY_(FreeNode)( doc, node);
+                        continue;
+                    }
+
+                    if (frameset != NULL)
+                        TY_(Report)(doc, html, node, DUPLICATE_FRAMESET);
+                    else
+                        frameset = node;
+
+                    state = STATE_PARSE_FRAMESET;
+                    keepToken = yes;
+                    continue;
+                }
+
+                /* If not a frameset document coerce <noframes> to <body>. */
+                if ( nodeIsNOFRAMES(node) )
+                {
+                    if (node->type != StartTag)
+                    {
+                        TY_(Report)(doc, html, node, DISCARDING_UNEXPECTED);
+                        TY_(FreeNode)( doc, node);
+                        continue;
+                    }
+
+                    if (frameset == NULL)
+                    {
+                        TY_(Report)(doc, html, node, DISCARDING_UNEXPECTED);
+                        TY_(FreeNode)( doc, node);
+                        node = TY_(InferredTag)(doc, TidyTag_BODY);
+                        state = STATE_PARSE_BODY;
+                        keepToken = yes;
+                        continue;
+                    }
+
+                    if (noframes == NULL)
+                    {
+                        noframes = node;
+                        TY_(InsertNodeAtEnd)(frameset, noframes);
+                        state = STATE_PARSE_NOFRAMES;
+                        keepToken = yes;
+                    }
+                    else
+                    {
+                        TY_(FreeNode)( doc, node);
+                    }
+
+                    continue;
+                }
+
+                /* Deal with some other element that we're not expecting. */
+                if (TY_(nodeIsElement)(node))
+                {
+                    if (node->tag && node->tag->model & CM_HEAD)
+                    {
+                        MoveToHead(doc, html, node);
+                        continue;
+                    }
+
+                    /* Discard illegal frame element following a frameset. */
+                    if ( frameset != NULL && nodeIsFRAME(node) )
+                    {
+                        TY_(Report)(doc, html, node, DISCARDING_UNEXPECTED);
+                        TY_(FreeNode)(doc, node);
+                        continue;
+                    }
+                }
+
+                TY_(UngetToken)( doc );
+
+                /* Insert other content into noframes element. */
+                if (frameset)
+                {
                     if (noframes == NULL)
                     {
                         noframes = TY_(InferredTag)(doc, TidyTag_NOFRAMES);
                         TY_(InsertNodeAtEnd)(frameset, noframes);
-                        TY_(Report)(doc, html, noframes, INSERTING_TAG);
                     }
                     else
                     {
+                        TY_(Report)(doc, html, node, NOFRAMES_CONTENT);
                         if (noframes->type == StartEndTag)
                             noframes->type = StartTag;
                     }
 
-                    ParseTag(doc, noframes, mode);
+                    TY_(ConstrainVersion)(doc, VERS_FRAMESET);
+                    state = STATE_PARSE_NOFRAMES;
+                    keepToken = yes;
                     continue;
                 }
-            }
 
-            TY_(ConstrainVersion)(doc, ~VERS_FRAMESET);
-            break;  /* to parse body */
-        }
-
-        /* flag an error if we see more than one frameset */
-        if ( nodeIsFRAMESET(node) )
-        {
-            if (node->type != StartTag)
-            {
-                TY_(Report)(doc, html, node, DISCARDING_UNEXPECTED);
-                TY_(FreeNode)( doc, node);
-                continue;
-            }
-
-            if (frameset != NULL)
-                TY_(Report)(doc, html, node, DUPLICATE_FRAMESET);
-            else
-                frameset = node;
-
-            TY_(InsertNodeAtEnd)(html, node);
-            ParseTag(doc, node, mode);
-
-            /*
-              see if it includes a noframes element so
-              that we can merge subsequent noframes elements
-            */
-
-            for (node = frameset->content; node; node = node->next)
-            {
-                if ( nodeIsNOFRAMES(node) )
-                    noframes = node;
-            }
-            continue;
-        }
-
-        /* if not a frameset document coerce <noframes> to <body> */
-        if ( nodeIsNOFRAMES(node) )
-        {
-            if (node->type != StartTag)
-            {
-                TY_(Report)(doc, html, node, DISCARDING_UNEXPECTED);
-                TY_(FreeNode)( doc, node);
-                continue;
-            }
-
-            if (frameset == NULL)
-            {
-                TY_(Report)(doc, html, node, DISCARDING_UNEXPECTED);
-                TY_(FreeNode)( doc, node);
                 node = TY_(InferredTag)(doc, TidyTag_BODY);
-                break;
-            }
 
-            if (noframes == NULL)
-            {
-                noframes = node;
-                TY_(InsertNodeAtEnd)(frameset, noframes);
-            }
-            else
-                TY_(FreeNode)( doc, node);
+                /* Issue #132 - disable inserting BODY tag warning
+                 BUT only if NOT --show-body-only yes */
+                if (!showingBodyOnly(doc))
+                    TY_(Report)(doc, html, node, INSERTING_TAG );
 
-            ParseTag(doc, noframes, mode);
-            continue;
-        }
-
-        if (TY_(nodeIsElement)(node))
-        {
-            if (node->tag && node->tag->model & CM_HEAD)
-            {
-                MoveToHead(doc, html, node);
+                TY_(ConstrainVersion)(doc, ~VERS_FRAMESET);
+                state = STATE_PARSE_BODY;
+                keepToken = yes;
                 continue;
-            }
+            } break;
 
-            /* discard illegal frame element following a frameset */
-            if ( frameset != NULL && nodeIsFRAME(node) )
+
+            /**************************************************************
+             In this case, we're ready to parse the head, and move on to
+             look for the body or body alternative.
+             **************************************************************/
+            case STATE_PARSE_HEAD:
             {
-                TY_(Report)(doc, html, node, DISCARDING_UNEXPECTED);
-                TY_(FreeNode)(doc, node);
-                continue;
-            }
-        }
+                TidyParserMemory memory;
+                memory.identity = TY_(ParseHTML);
+                memory.mode = mode;
+                memory.original_node = html;
+                memory.reentry_node = node;
+                memory.reentry_mode = mode;
+                memory.reentry_state = STATE_PARSE_HEAD_DONE;
+                TY_(InsertNodeAtEnd)(html, node);
+                pushMemory( doc, memory );
+#if defined(ENABLE_DEBUG_LOG)
+                parser_depth--;
+                DEBUG_LOG(SPRTF("***Exiting ParseHTML, count: %d, depth %d\n", parser_count, parser_depth));
+#endif
+                return node;
+            } break;
 
-        TY_(UngetToken)( doc );
-
-        /* insert other content into noframes element */
-
-        if (frameset)
-        {
-            if (noframes == NULL)
+            case STATE_PARSE_HEAD_DONE:
             {
-                noframes = TY_(InferredTag)(doc, TidyTag_NOFRAMES);
-                TY_(InsertNodeAtEnd)(frameset, noframes);
-            }
-            else
+                head = node;
+                state = STATE_PRE_BODY;
+            } break;
+
+
+            /**************************************************************
+             In this case, we can finally parse a body.
+             **************************************************************/
+            case STATE_PARSE_BODY:
             {
-                TY_(Report)(doc, html, node, NOFRAMES_CONTENT);
-                if (noframes->type == StartEndTag)
-                    noframes->type = StartTag;
+                TidyParserMemory memory;
+                memory.identity = NULL; /* we don't need to reenter */
+                memory.mode = mode;
+                memory.original_node = html;
+                memory.reentry_node = NULL;
+                memory.reentry_mode = mode;
+                memory.reentry_state = STATE_COMPLETE;
+                TY_(InsertNodeAtEnd)(html, node);
+                pushMemory( doc, memory );
+#if defined(ENABLE_DEBUG_LOG)
+                parser_depth--;
+                DEBUG_LOG(SPRTF("***Exiting ParseHTML, count: %d, depth %d\n", parser_count, parser_depth));
+#endif
+                return node;
+            } break;
+
+
+            /**************************************************************
+             In this case, we will parse noframes. If necessary, the
+             node is already inserted in the proper spot.
+             **************************************************************/
+            case STATE_PARSE_NOFRAMES:
+            {
+                TidyParserMemory memory;
+                memory.identity = TY_(ParseHTML);
+                memory.mode = mode;
+                memory.original_node = html;
+                memory.reentry_node = frameset;
+                memory.reentry_mode = mode;
+                memory.reentry_state = STATE_PARSE_NOFRAMES_DONE;
+                pushMemory( doc, memory );
+#if defined(ENABLE_DEBUG_LOG)
+                parser_depth--;
+                DEBUG_LOG(SPRTF("***Exiting ParseHTML, count: %d, depth %d\n", parser_count, parser_depth));
+#endif
+                return noframes;
+            } break;
+
+            case STATE_PARSE_NOFRAMES_DONE:
+            {
+                frameset = node;
+                state = STATE_PRE_BODY;
+            } break;
+
+
+            /**************************************************************
+             In this case, we parse the frameset, and look for noframes
+             content to merge later if necessary.
+             **************************************************************/
+            case STATE_PARSE_FRAMESET:
+            {
+                TidyParserMemory memory;
+                memory.identity = TY_(ParseHTML);
+                memory.mode = mode;
+                memory.original_node = html;
+                memory.reentry_node = frameset;
+                memory.reentry_mode = mode;
+                memory.reentry_state = STATE_PARSE_FRAMESET_DONE;
+                TY_(InsertNodeAtEnd)(html, node);
+                pushMemory( doc, memory );
+#if defined(ENABLE_DEBUG_LOG)
+                parser_depth--;
+                DEBUG_LOG(SPRTF("***Exiting ParseHTML, count: %d, depth %d\n", parser_count, parser_depth));
+#endif
+                return node;
+            } break;
+
+            case STATE_PARSE_FRAMESET_DONE:
+            {
+                frameset = node;
+                /* See if it includes a noframes element so that
+                 we can merge subsequent noframes elements.
+                 */
+                for (node = frameset->content; node; node = node->next)
+                {
+                    if ( nodeIsNOFRAMES(node) )
+                        noframes = node;
+                }
+                state = STATE_PRE_BODY;
+            } break;
+
+
+            /**************************************************************
+             We really shouldn't get here, but if we do, finish nicely.
+             **************************************************************/
+            default:
+            {
+                state = STATE_COMPLETE;
             }
+        } /* switch */
+    } /* while */
 
-            TY_(ConstrainVersion)(doc, VERS_FRAMESET);
-            ParseTag(doc, noframes, mode);
-            continue;
-        }
+#if defined(ENABLE_DEBUG_LOG)
+    parser_depth--;
+    DEBUG_LOG(SPRTF("***Exiting ParseHTML, count: %d, depth %d\n", parser_count, parser_depth));
+#endif
+    return NULL;
+}
 
-        node = TY_(InferredTag)(doc, TidyTag_BODY);
-        /* Issue #132 - disable inserting BODY tag warning
-           BUT only if NOT --show-body-only yes */
-        if (!showingBodyOnly(doc))
-            TY_(Report)(doc, html, node, INSERTING_TAG );
-        TY_(ConstrainVersion)(doc, ~VERS_FRAMESET);
-        break;
+
+/** MARK: TY_(ParseBlock)
+ *  `element` is a node created by the lexer upon seeing the start tag, or
+ *  by the parser when the start tag is inferred
+ */
+Node* TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode, Bool popStack )
+{
+    TY_(oldParseBlock)( doc, element, mode );
+    return NULL;
+    
+#if 0
+#if defined(ENABLE_DEBUG_LOG)
+    static int in_parse_block = 0;
+    static int parse_block_cnt = 0;
+#endif
+    Lexer* lexer = doc->lexer;
+    Node *node = NULL;
+    Bool checkstack = yes;
+    uint istackbase = 0;
+    Bool keepToken = no;
+    parserState state = STATE_INITIAL;
+
+#if defined(ENABLE_DEBUG_LOG)
+    in_parse_block++;
+    parse_block_cnt++;
+    SPRTF("Entering ParseBlock %d... %d %s\n",in_parse_block,parse_block_cnt,
+          ((element && element->element) ? element->element : ""));
+#endif
+
+    /*
+     If we're re-entering, then we need to setup from a previous state,
+     instead of starting fresh. We can pull what we need from the document's
+     stack.
+     */
+    if ( popStack )
+    {
+        TidyParserMemory memory = popMemory( doc );
+        node = memory.reentry_node;
+        mode = memory.reentry_mode;
+        state = memory.reentry_state;
+        element = memory.original_node;
+        keepToken = node != NULL;
     }
 
-    /* node must be body */
 
-    TY_(InsertNodeAtEnd)(html, node);
-    ParseTag(doc, node, mode);
-    DEBUG_LOG(SPRTF("Exit ParseHTML 2...\n"));
+    if ( element->tag->model & CM_EMPTY ) {
+#if defined(ENABLE_DEBUG_LOG)
+        in_parse_block--;
+        SPRTF("Exit ParseBlockL 1 %d...\n",in_parse_block);
+#endif
+        return NULL;
+    }
+
+    if ( nodeIsFORM(element) && DescendantOf(element, TidyTag_FORM) )
+        TY_(Report)(doc, element, NULL, ILLEGAL_NESTING );
+
+    /*
+     InlineDup() asks the lexer to insert inline emphasis tags
+     currently pushed on the istack, but take care to avoid
+     propagating inline emphasis inside OBJECT or APPLET.
+     For these elements a fresh inline stack context is created
+     and disposed of upon reaching the end of the element.
+     They thus behave like table cells in this respect.
+     */
+    if (element->tag->model & CM_OBJECT)
+    {
+        istackbase = lexer->istackbase;
+        lexer->istackbase = lexer->istacksize;
+    }
+
+    if (!(element->tag->model & CM_MIXED))
+        TY_(InlineDup)( doc, NULL );
+
+    /*\
+     *  Issue #212 - If it is likely that it may be necessary
+     *  to move a leading space into a text node before this
+     *  element, then keep the mode MixedContent to keep any
+     *  leading space
+     \*/
+    if ( !(element->tag->model & CM_INLINE) || (element->tag->model & CM_FIELD ) )
+    {
+        mode = IgnoreWhitespace;
+    }
+    else if (mode == IgnoreWhitespace)
+    {
+        /* Issue #212 - Further fix in case ParseBlock() is called with 'IgnoreWhitespace'
+         when such a leading space may need to be inserted before this element to
+         preverve the browser view */
+        mode = MixedContent;
+    }
+
+    while ( state != STATE_COMPLETE )
+    {
+        if ( !keepToken )
+            node = TY_(GetToken)(doc, mode /*MixedContent*/);
+        keepToken = no;
+
+        if ( !node )
+            state = STATE_COMPLETE;
+
+        switch ( state )
+        {
+            case STATE_INITIAL:
+            {
+                /* end tag for this element */
+                if (node->type == EndTag && node->tag &&
+                    (node->tag == element->tag || element->was == node->tag))
+                {
+                    TY_(FreeNode)( doc, node );
+
+                    if (element->tag->model & CM_OBJECT)
+                    {
+                        /* pop inline stack */
+                        while (lexer->istacksize > lexer->istackbase)
+                            TY_(PopInline)( doc, NULL );
+                        lexer->istackbase = istackbase;
+                    }
+
+                    element->closed = yes;
+                    TrimSpaces( doc, element );
+#if defined(ENABLE_DEBUG_LOG)
+in_parse_block--;
+SPRTF("Exit ParseBlock 2 %d...\n",in_parse_block);
+#endif
+                    return NULL;
+                }
+
+                if ( nodeIsHTML(node) || nodeIsHEAD(node) || nodeIsBODY(node) )
+                {
+                    if ( TY_(nodeIsElement)(node) )
+                        TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED );
+                    TY_(FreeNode)( doc, node );
+                    continue;
+                }
+
+
+                if (node->type == EndTag)
+                {
+                    if (node->tag == NULL)
+                    {
+                        TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED );
+                        TY_(FreeNode)( doc, node );
+                        continue;
+                    }
+                    else if ( nodeIsBR(node) )
+                        node->type = StartTag;
+                    else if ( nodeIsP(node) )
+                    {
+                        /* Cannot have a block inside a paragraph, so no checking
+                         for an ancestor is necessary -- but we _can_ have
+                         paragraphs inside a block, so change it to an implicit
+                         empty paragraph, to be dealt with according to the user's
+                         options
+                         */
+                        node->type = StartEndTag;
+                        node->implicit = yes;
+                    }
+                    else if (DescendantOf( element, node->tag->id ))
+                    {
+                        /*
+                         if this is the end tag for an ancestor element
+                         then infer end tag for this element
+                         */
+                        TY_(UngetToken)( doc );
+                        state = STATE_COMPLETE;
+                        continue;
+                    }
+                    else
+                    {
+                        /* special case </tr> etc. for stuff moved in front of table */
+                        if ( lexer->exiled
+                            && (TY_(nodeHasCM)(node, CM_TABLE) || nodeIsTABLE(node)) )
+                        {
+                            TY_(UngetToken)( doc );
+                            TrimSpaces( doc, element );
+#if defined(ENABLE_DEBUG_LOG)
+in_parse_block--;
+SPRTF("Exit ParseBlock 2 %d...\n",in_parse_block);
+#endif
+                            return NULL;
+                        }
+                    }
+                }
+
+                /* mixed content model permits text */
+                if (TY_(nodeIsText)(node))
+                {
+                    if ( checkstack )
+                    {
+                        checkstack = no;
+                        if (!(element->tag->model & CM_MIXED))
+                        {
+                            if ( TY_(InlineDup)(doc, node) > 0 )
+                                continue;
+                        }
+                    }
+
+                    TY_(InsertNodeAtEnd)(element, node);
+                    mode = MixedContent;
+
+                    /*
+                     HTML4 strict doesn't allow mixed content for
+                     elements with %block; as their content model
+                     */
+                    /*
+                     But only body, map, blockquote, form and
+                     noscript have content model %block;
+                     */
+                    if ( nodeIsBODY(element)       ||
+                        nodeIsMAP(element)        ||
+                        nodeIsBLOCKQUOTE(element) ||
+                        nodeIsFORM(element)       ||
+                        nodeIsNOSCRIPT(element) )
+                        TY_(ConstrainVersion)( doc, ~VERS_HTML40_STRICT );
+                    continue;
+                }
+
+                if ( InsertMisc(element, node) )
+                    continue;
+
+                /* allow PARAM elements? */
+                if ( nodeIsPARAM(node) )
+                {
+                    if ( TY_(nodeHasCM)(element, CM_PARAM) && TY_(nodeIsElement)(node) )
+                    {
+                        TY_(InsertNodeAtEnd)(element, node);
+                        continue;
+                    }
+
+                    /* otherwise discard it */
+                    TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED );
+                    TY_(FreeNode)( doc, node );
+                    continue;
+                }
+
+                /* allow AREA elements? */
+                if ( nodeIsAREA(node) )
+                {
+                    if ( nodeIsMAP(element) && TY_(nodeIsElement)(node) )
+                    {
+                        TY_(InsertNodeAtEnd)(element, node);
+                        continue;
+                    }
+
+                    /* otherwise discard it */
+                    TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED );
+                    TY_(FreeNode)( doc, node );
+                    continue;
+                }
+
+                /* ignore unknown start/end tags */
+                if ( node->tag == NULL )
+                {
+                    TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED );
+                    TY_(FreeNode)( doc, node );
+                    continue;
+                }
+
+                /*
+                 Allow CM_INLINE elements here.
+
+                 Allow CM_BLOCK elements here unless
+                 lexer->excludeBlocks is yes.
+
+                 LI and DD are special cased.
+
+                 Otherwise infer end tag for this element.
+                 */
+
+                if ( !TY_(nodeHasCM)(node, CM_INLINE) )
+                {
+                    if ( !TY_(nodeIsElement)(node) )
+                    {
+                        if ( nodeIsFORM(node) )
+                            BadForm( doc );
+
+                        TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED );
+                        TY_(FreeNode)( doc, node );
+                        continue;
+                    }
+
+                    /* #427671 - Fix by Randy Waki - 10 Aug 00 */
+                    /*
+                     If an LI contains an illegal FRAME, FRAMESET, OPTGROUP, or OPTION
+                     start tag, discard the start tag and let the subsequent content get
+                     parsed as content of the enclosing LI.  This seems to mimic IE and
+                     Netscape, and avoids an infinite loop: without this check,
+                     ParseBlock (which is parsing the LI's content) and ParseList (which
+                     is parsing the LI's parent's content) repeatedly defer to each
+                     other to parse the illegal start tag, each time inferring a missing
+                     </li> or <li> respectively.
+
+                     NOTE: This check is a bit fragile.  It specifically checks for the
+                     four tags that happen to weave their way through the current series
+                     of tests performed by ParseBlock and ParseList to trigger the
+                     infinite loop.
+                     */
+                    if ( nodeIsLI(element) )
+                    {
+                        if ( nodeIsFRAME(node)    ||
+                            nodeIsFRAMESET(node) ||
+                            nodeIsOPTGROUP(node) ||
+                            nodeIsOPTION(node) )
+                        {
+                            TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED );
+                            TY_(FreeNode)( doc, node );  /* DSR - 27Apr02 avoid memory leak */
+                            continue;
+                        }
+                    }
+
+                    if ( nodeIsTD(element) || nodeIsTH(element) )
+                    {
+                        /* if parent is a table cell, avoid inferring the end of the cell */
+
+                        if ( TY_(nodeHasCM)(node, CM_HEAD) )
+                        {
+                            MoveToHead( doc, element, node );
+                            continue;
+                        }
+
+                        if ( TY_(nodeHasCM)(node, CM_LIST) )
+                        {
+                            TY_(UngetToken)( doc );
+                            node = TY_(InferredTag)(doc, TidyTag_UL);
+                            AddClassNoIndent(doc, node);
+                            lexer->excludeBlocks = yes;
+                        }
+                        else if ( TY_(nodeHasCM)(node, CM_DEFLIST) )
+                        {
+                            TY_(UngetToken)( doc );
+                            node = TY_(InferredTag)(doc, TidyTag_DL);
+                            lexer->excludeBlocks = yes;
+                        }
+
+                        /* infer end of current table cell */
+                        if ( !TY_(nodeHasCM)(node, CM_BLOCK) )
+                        {
+                            TY_(UngetToken)( doc );
+                            TrimSpaces( doc, element );
+#if defined(ENABLE_DEBUG_LOG)
+in_parse_block--;
+SPRTF("Exit ParseBlock 3 %d...\n",in_parse_block);
+#endif
+                            return NULL;
+                        }
+                    }
+                    else if ( TY_(nodeHasCM)(node, CM_BLOCK) )
+                    {
+                        if ( lexer->excludeBlocks )
+                        {
+                            if ( !TY_(nodeHasCM)(element, CM_OPT) )
+                                TY_(Report)(doc, element, node, MISSING_ENDTAG_BEFORE );
+
+                            TY_(UngetToken)( doc );
+
+                            if ( TY_(nodeHasCM)(element, CM_OBJECT) )
+                                lexer->istackbase = istackbase;
+
+                            TrimSpaces( doc, element );
+#if defined(ENABLE_DEBUG_LOG)
+in_parse_block--;
+SPRTF("Exit ParseBlock 4 %d...\n",in_parse_block);
+#endif
+                            return NULL;
+                        }
+                    }
+                    else /* things like list items */
+                    {
+                        if (node->tag->model & CM_HEAD)
+                        {
+                            MoveToHead( doc, element, node );
+                            continue;
+                        }
+
+                        /*
+                         special case where a form start tag
+                         occurs in a tr and is followed by td or th
+                         */
+
+                        if ( nodeIsFORM(element) &&
+                            nodeIsTD(element->parent) &&
+                            element->parent->implicit )
+                        {
+                            if ( nodeIsTD(node) )
+                            {
+                                TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED );
+                                TY_(FreeNode)( doc, node );
+                                continue;
+                            }
+
+                            if ( nodeIsTH(node) )
+                            {
+                                TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED );
+                                TY_(FreeNode)( doc, node );
+                                node = element->parent;
+                                TidyDocFree(doc, node->element);
+                                node->element = TY_(tmbstrdup)(doc->allocator, "th");
+                                node->tag = TY_(LookupTagDef)( TidyTag_TH );
+                                continue;
+                            }
+                        }
+
+                        if ( !TY_(nodeHasCM)(element, CM_OPT) && !element->implicit )
+                            TY_(Report)(doc, element, node, MISSING_ENDTAG_BEFORE );
+
+                        /* #521, warn on missing optional end-tags if not omitting them. */
+                        if ( cfgBool( doc, TidyOmitOptionalTags ) == no && TY_(nodeHasCM)(element, CM_OPT) )
+                            TY_(Report)(doc, element, node, MISSING_ENDTAG_OPTIONAL );
+
+
+                        TY_(UngetToken)( doc );
+
+                        if ( TY_(nodeHasCM)(node, CM_LIST) )
+                        {
+                            if ( element->parent && element->parent->tag &&
+                                element->parent->tag->parser == TY_(ParseList) )
+                            {
+                                TrimSpaces( doc, element );
+#if defined(ENABLE_DEBUG_LOG)
+in_parse_block--;
+SPRTF("Exit ParseBlock 5 %d...\n",in_parse_block);
+#endif
+                                return NULL;
+                            }
+
+                            node = TY_(InferredTag)(doc, TidyTag_UL);
+                            AddClassNoIndent(doc, node);
+                        }
+                        else if ( TY_(nodeHasCM)(node, CM_DEFLIST) )
+                        {
+                            if ( nodeIsDL(element->parent) )
+                            {
+                                TrimSpaces( doc, element );
+#if defined(ENABLE_DEBUG_LOG)
+in_parse_block--;
+SPRTF("Exit ParseBlock 6 %d...\n",in_parse_block);
+#endif
+                                return NULL;
+                            }
+
+                            node = TY_(InferredTag)(doc, TidyTag_DL);
+                        }
+                        else if ( TY_(nodeHasCM)(node, CM_TABLE) || TY_(nodeHasCM)(node, CM_ROW) )
+                        {
+                            /* http://tidy.sf.net/issue/1316307 */
+                            /* In exiled mode, return so table processing can
+                             continue. */
+                            if (lexer->exiled) {
+#if defined(ENABLE_DEBUG_LOG)
+in_parse_block--;
+SPRTF("Exit ParseBlock 7 %d...\n",in_parse_block);
+#endif
+                                return NULL;
+                            }
+                            node = TY_(InferredTag)(doc, TidyTag_TABLE);
+                        }
+                        else if ( TY_(nodeHasCM)(element, CM_OBJECT) )
+                        {
+                            /* pop inline stack */
+                            while ( lexer->istacksize > lexer->istackbase )
+                                TY_(PopInline)( doc, NULL );
+                            lexer->istackbase = istackbase;
+                            TrimSpaces( doc, element );
+#if defined(ENABLE_DEBUG_LOG)
+in_parse_block--;
+SPRTF("Exit ParseBlock 8 %d...\n",in_parse_block);
+#endif
+                            return NULL;
+
+                        }
+                        else
+                        {
+                            TrimSpaces( doc, element );
+#if defined(ENABLE_DEBUG_LOG)
+in_parse_block--;
+SPRTF("Exit ParseBlock 9 %d...\n",in_parse_block);
+#endif
+                            return NULL;
+                        }
+                    }
+                }
+
+                /*\
+                 *  Issue #307 - an <A> tag to ends any open <A> element
+                 *  Like #427827 - fixed by Randy Waki and Bjoern Hoehrmann 23 Aug 00
+                 *  in ParseInline(), fix copied HERE to ParseBlock()
+                 *  href: http://www.w3.org/TR/html-markup/a.html
+                 *  The interactive element a must not appear as a descendant of the a element.
+                 \*/
+                if ( nodeIsA(node) && !node->implicit &&
+                    (nodeIsA(element) || DescendantOf(element, TidyTag_A)) )
+                {
+                    if (node->type != EndTag && node->attributes == NULL
+                        && cfgBool(doc, TidyCoerceEndTags) )
+                    {
+                        node->type = EndTag;
+                        TY_(Report)(doc, element, node, COERCE_TO_ENDTAG);
+                        TY_(UngetToken)( doc );
+                        continue;
+                    }
+
+                    if (nodeIsA(element))
+                    {
+                        TY_(Report)(doc, element, node, MISSING_ENDTAG_BEFORE);
+                        TY_(UngetToken)( doc );
+                    }
+                    else
+                    {
+                        /* Issue #597 - if we not 'UngetToken' then it is being discarded.
+                         Add message, and 'FreeNode' - thanks @ralfjunker */
+                        TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED);
+                        TY_(FreeNode)(doc, node);
+                    }
+
+                    if (!(mode & Preformatted))
+                        TrimSpaces(doc, element);
+
+#if defined(ENABLE_DEBUG_LOG)
+in_parse_block--;
+SPRTF("Exit ParseBlock 9b %d...\n",in_parse_block);
+#endif
+                    return NULL;
+                }
+
+                /* parse known element */
+                if (TY_(nodeIsElement)(node))
+                {
+                    if (node->tag->model & CM_INLINE)
+                    {
+                        if (checkstack && !node->implicit)
+                        {
+                            checkstack = no;
+
+                            if (!(element->tag->model & CM_MIXED)) /* #431731 - fix by Randy Waki 25 Dec 00 */
+                            {
+                                if ( TY_(InlineDup)(doc, node) > 0 )
+                                    continue;
+                            }
+                        }
+
+                        mode = MixedContent;
+                    }
+                    else
+                    {
+                        checkstack = yes;
+                        mode = IgnoreWhitespace;
+                    }
+
+                    /* trim white space before <br> */
+                    if ( nodeIsBR(node) )
+                        TrimSpaces( doc, element );
+
+                    TY_(InsertNodeAtEnd)(element, node);
+
+                    if (node->implicit)
+                        TY_(Report)(doc, element, node, INSERTING_TAG );
+
+                    /* Issue #212 - WHY is this hard coded to 'IgnoreWhitespace' while an
+                     effort has been made above to set a 'MixedContent' mode in some cases?
+                     WHY IS THE 'mode' VARIABLE NOT USED HERE???? */
+//                    ParseTag( doc, node, IgnoreWhitespace /*MixedContent*/ );
+//                    continue;
+                    {
+                        TidyParserMemory memory;
+                        memory.identity = TY_(ParseBlock);
+                        memory.mode = IgnoreWhitespace; /* mode; */
+                        memory.original_node = element;
+                        memory.reentry_node = node;
+                        memory.reentry_mode = mode;
+                        memory.reentry_state = STATE_COMPLETE;
+                        pushMemory( doc, memory );
+                        return node;
+                    }
+                }
+
+                /* discard unexpected tags */
+                if (node->type == EndTag)
+                    TY_(PopInline)( doc, node );  /* if inline end tag */
+
+                TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED );
+                TY_(FreeNode)( doc, node );
+                continue;
+            } break; /* STATE_INITIAL */
+
+            default:
+            {
+                state = STATE_COMPLETE;
+            } break; /* default */
+
+        } /* switch */
+    } /* while */
+
+    if (!(element->tag->model & CM_OPT))
+        TY_(Report)(doc, element, node, MISSING_ENDTAG_FOR);
+
+    if (element->tag->model & CM_OBJECT)
+    {
+        /* pop inline stack */
+        while ( lexer->istacksize > lexer->istackbase )
+            TY_(PopInline)( doc, NULL );
+        lexer->istackbase = istackbase;
+    }
+
+    TrimSpaces( doc, element );
+
+#if defined(ENABLE_DEBUG_LOG)
+    in_parse_block--;
+    SPRTF("Exit ParseBlock 10 %d...\n",in_parse_block);
+#endif
+    return NULL;
+#endif
 }
 
-static Bool nodeCMIsOnlyInline( Node* node )
+
+/** MARK: TY_(ParseNamespace)
+ *  Act as a generic XML (sub)tree parser: collect each node and add it
+ *  to the DOM, without any further validation. It's useful for tags that
+ *  have XML-like content, such as `svg` and `math`.
+ *  @note Perhaps this is poorly named, as we're not parsing the namespace
+ *    of a particular tag, but a tag with XML-like content.
+ *  @note This is a non-recursive parser.
+ *  @todo Add schema- or other-hierarchy-definition-based validation
+ *    of the subtree here.
+ */
+Node* TY_(ParseNamespace)( TidyDocImpl* doc, Node *basenode, GetTokenMode mode, Bool popStack )
 {
-    return TY_(nodeHasCM)( node, CM_INLINE ) && !TY_(nodeHasCM)( node, CM_BLOCK );
+    Lexer* lexer = doc->lexer;
+    Node *node;
+    Node *parent = basenode;
+    uint istackbase;
+    AttVal* av; /* #130 MathML attr and entity fix! */
+
+    /* a la <table>: defer popping elements off the inline stack */
+    TY_(DeferDup)( doc );
+    istackbase = lexer->istackbase;
+    lexer->istackbase = lexer->istacksize;
+
+    mode = OtherNamespace; /* Preformatted; IgnoreWhitespace; */
+
+    while ((node = TY_(GetToken)(doc, mode)) != NULL)
+    {
+        /*
+        fix check to skip action in InsertMisc for regular/empty
+        nodes, which we don't want here...
+
+        The way we do it here is by checking and processing everything
+        and only what remains goes into InsertMisc()
+        */
+
+        /* is this a close tag? And does it match the current parent node? */
+        if (node->type == EndTag)
+        {
+            /*
+            to prevent end tags flowing from one 'alternate namespace' we
+            check this in two phases: first we check if the tag is a
+            descendant of the current node, and when it is, we check whether
+            it is the end tag for a node /within/ or /outside/ the basenode.
+            */
+            Bool outside;
+            Node *mp = FindMatchingDescendant(parent, node, basenode, &outside);
+
+            if (mp != NULL)
+            {
+                /*
+                when mp != parent as we might expect,
+                infer end tags until we 'hit' the matched
+                parent or the basenode
+                */
+                Node *n;
+
+                for (n = parent;
+                     n != NULL && n != basenode->parent && n != mp;
+                     n = n->parent)
+                {
+                    /* n->implicit = yes; */
+                    n->closed = yes;
+                    TY_(Report)(doc, n->parent, n, MISSING_ENDTAG_BEFORE);
+                }
+
+                /* Issue #369 - Since 'assert' is DEBUG only, and there are
+                   simple cases where these can be fired, removing them
+                   pending feedback from the original author!
+                   assert(outside == no ? n == mp : 1);
+                   assert(outside == yes ? n == basenode->parent : 1);
+                   =================================================== */
+
+                if (outside == no)
+                {
+                    /* EndTag for a node within the basenode subtree. Roll on... */
+                    n->closed = yes;
+                    TY_(FreeNode)(doc, node);
+
+                    node = n;
+                    parent = node->parent;
+                }
+                else
+                {
+                    /* EndTag for a node outside the basenode subtree: let the caller handle that. */
+                    TY_(UngetToken)( doc );
+                    node = basenode;
+                    parent = node->parent;
+                }
+
+                /* when we've arrived at the end-node for the base node, it's quitting time */
+                if (node == basenode)
+                {
+                    lexer->istackbase = istackbase;
+                    assert(basenode->closed == yes);
+                    return NULL;
+                }
+            }
+            else
+            {
+                /* unmatched close tag: report an error and discard */
+                /* TY_(Report)(doc, parent, node, NON_MATCHING_ENDTAG); Issue #308 - Seems wrong warning! */
+                TY_(Report)(doc, parent, node, DISCARDING_UNEXPECTED);
+                assert(parent);
+                /* assert(parent->tag != node->tag); Issue #308 - Seems would always be true! */
+                TY_(FreeNode)( doc, node); /* Issue #308 - Discard unexpected end tag memory */
+            }
+        }
+        else if (node->type == StartTag)
+        {
+            /* #130 MathML attr and entity fix!
+               care if it has attributes, and 'accidently' any of those attributes match known */
+            for ( av = node->attributes; av; av = av->next )
+            {
+                av->dict = 0; /* does something need to be freed? */
+            }
+            /* add another child to the current parent */
+            TY_(InsertNodeAtEnd)(parent, node);
+            parent = node;
+        }
+        else
+        {
+            /* #130 MathML attr and entity fix!
+               care if it has attributes, and 'accidently' any of those attributes match known */
+            for ( av = node->attributes; av; av = av->next )
+            {
+                av->dict = 0; /* does something need to be freed? */
+            }
+            TY_(InsertNodeAtEnd)(parent, node);
+        }
+    }
+
+    TY_(Report)(doc, basenode->parent, basenode, MISSING_ENDTAG_FOR);
+    return NULL;
 }
 
+
+/** MARK: TY_(ParseInline)
+ *  Parse inline element nodes.
+ */
+Node* TY_(ParseInline)( TidyDocImpl *doc, Node *node, GetTokenMode mode, Bool popStack )
+{
+    TY_(oldParseInline)( doc, node, mode );
+    return NULL;
+}
+
+
+/** MARK: TY_(ParseEmpty)
+ *  Parse empty element nodes.
+ *  @note This is a non-recursive parser.
+  */
+Node* TY_(ParseEmpty)( TidyDocImpl* doc, Node *element, GetTokenMode mode, Bool popStack )
+{
+    Lexer* lexer = doc->lexer;
+    if ( lexer->isvoyager )
+    {
+        Node *node = TY_(GetToken)( doc, mode);
+        if ( node )
+        {
+            if ( !(node->type == EndTag && node->tag == element->tag) )
+            {
+                /* TY_(Report)(doc, element, node, ELEMENT_NOT_EMPTY); */
+                TY_(UngetToken)( doc );
+            }
+            else
+            {
+                TY_(FreeNode)( doc, node );
+            }
+        }
+    }
+    return NULL;
+}
+
+
+/** MARK: TY_(ParseDefList)
+ *  Parses the `dl` tag.
+ */
+Node* TY_(ParseDefList)( TidyDocImpl* doc, Node *list, GetTokenMode mode, Bool popStack )
+{
+    TY_(oldParseDefList)( doc, list, mode );
+    return NULL;
+}
+
+
+/** MARK: TY_(ParseList)
+ *  Parses list tags.
+ */
+Node* TY_(ParseList)( TidyDocImpl* doc, Node *list, GetTokenMode ARG_UNUSED(mode), Bool popStack )
+{
+    TY_(oldParseList)( doc, list, mode );
+    return NULL;
+}
+
+
+/** MARK: TY_(ParseRow)
+ *  Parses the `row` tag.
+ */
+Node* TY_(ParseRow)( TidyDocImpl* doc, Node *row, GetTokenMode ARG_UNUSED(mode), Bool popStack )
+{
+    TY_(oldParseRow)( doc, row, mode );
+    return NULL;
+}
+
+
+/** MARK: TY_(ParseRowGroup)
+ *  Parses the `rowgroup` tag.
+ */
+Node* TY_(ParseRowGroup)( TidyDocImpl* doc, Node *rowgroup, GetTokenMode ARG_UNUSED(mode), Bool popStack )
+{
+    TY_(oldParseRowGroup)( doc, rowgroup, mode );
+    return NULL;
+}
+
+
+/** MARK: TY_(ParseColGroup)
+ *  Parses the `colgroup` tag.
+ */
+Node* TY_(ParseColGroup)( TidyDocImpl* doc, Node *colgroup, GetTokenMode ARG_UNUSED(mode), Bool popStack )
+{
+    TY_(oldParseColGroup)( doc, colgroup, mode );
+    return NULL;
+}
+
+
+/** MARK: TY_(ParseTableTag)
+ *  Parses the `table` tag.
+ */
+Node* TY_(ParseTableTag)( TidyDocImpl* doc, Node *table, GetTokenMode ARG_UNUSED(mode), Bool popStack )
+{
+    TY_(oldParseTableTag)( doc, table, mode );
+    return NULL;
+}
+
+
+/** MARK: TY_(ParsePre)
+ *  Parses the `pre` tag.
+ */
+Node* TY_(ParsePre)( TidyDocImpl* doc, Node *pre, GetTokenMode ARG_UNUSED(mode), Bool popStack )
+{
+    TY_(oldParsePre)( doc, pre, mode );
+    return NULL;
+}
+
+
+/** MARK: TY_(ParseOptGroup)
+ *  Parses the `optgroup` tag.
+ */
+Node* TY_(ParseOptGroup)( TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(mode), Bool popStack )
+{
+    TY_(oldParseOptGroup)( doc, field, mode );
+    return NULL;
+}
+
+
+/** MARK: TY_(ParseSelect)
+ *  Parses the `select` tag.
+ */
+Node* TY_(ParseSelect)( TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(mode), Bool popStack )
+{
+    TY_(oldParseSelect)( doc, field, mode );
+    return NULL;
+}
+
+
+/** MARK: TY_(ParseDataList)
+ *  Parses the `datalist` tag.
+ */
+Node* TY_(ParseDatalist)( TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(mode), Bool popStack )
+{
+    TY_(oldParseDatalist)( doc, field, mode );
+    return NULL;
+}
+
+
+/** MARK: TY_(ParseText)
+ *  Parses the `option` and `textarea` tags.
+ */
+Node* TY_(ParseText)( TidyDocImpl* doc, Node *field, GetTokenMode mode, Bool popStack )
+{
+    TY_(oldParseText)( doc, field, mode );
+    return NULL;
+}
+
+
+/** MARK: TY_(ParseTitle)
+ *  Parses the `title` tag.
+ */
+Node* TY_(ParseTitle)( TidyDocImpl* doc, Node *title, GetTokenMode ARG_UNUSED(mode), Bool popStack )
+{
+    TY_(oldParseTitle)( doc, title, mode );
+    return NULL;
+}
+
+
+/** MARK: TY_(ParseScript)
+ *  Parses the `script` tag.
+ *
+ *  @todo This isn't quite right for CDATA content as it recognises tags
+ *  within the content and parses them accordingly. This will unfortunately
+ *  screw up scripts which include:
+ *    < + letter
+ *    < + !
+ *    < + ?
+ *    < + / + letter
+ */
+Node* TY_(ParseScript)( TidyDocImpl* doc, Node *script, GetTokenMode ARG_UNUSED(mode), Bool popStack )
+{
+    TY_(oldParseScript)( doc, script, mode );
+    return NULL;
+}
+
+
+/** MARK: TY_(ParseHead)
+ *  Parses the `head` tag.
+ */
+Node* TY_(ParseHead)( TidyDocImpl* doc, Node *head, GetTokenMode ARG_UNUSED(mode), Bool popStack )
+{
+    TY_(oldParseHead)( doc, head, mode );
+    return NULL;
+}
+
+
+/** MARK: TY_(ParseBody)
+ *  Parses the `body` tag.
+ */
+Node* TY_(ParseBody)( TidyDocImpl* doc, Node *body, GetTokenMode mode, Bool popStack )
+{
+    TY_(oldParseBody)( doc, body, mode);
+    return NULL;
+#if 0
+    Lexer* lexer = doc->lexer;
+    Node *node = NULL;
+    Bool checkstack, iswhitenode;
+    Bool keepToken = no;
+    parserState state = STATE_INITIAL;
+
+    mode = IgnoreWhitespace;
+    checkstack = yes;
+
+    /*
+     If we're re-entering, then we need to setup from a previous state,
+     instead of starting fresh. We can pull what we need from the document's
+     stack.
+     */
+    if ( popStack )
+    {
+        TidyParserMemory memory = popMemory( doc );
+        node = memory.reentry_node;
+        mode = memory.reentry_mode;
+        state = memory.reentry_state;
+        body = memory.original_node;
+        keepToken = node != NULL;
+    }
+
+    TY_(BumpObject)( doc, body->parent );
+
+    DEBUG_LOG(SPRTF("Enter ParseBody...\n"));
+    while ( state != STATE_COMPLETE )
+    {
+        if ( !keepToken )
+            node = TY_(GetToken)( doc, mode );
+        keepToken = no;
+
+        if ( !node )
+            state = STATE_COMPLETE;
+
+        switch ( state )
+        {
+            case STATE_INITIAL:
+            {
+                /* find and discard multiple <body> elements */
+                if (node->tag == body->tag && node->type == StartTag)
+                {
+                    TY_(Report)(doc, body, node, DISCARDING_UNEXPECTED);
+                    TY_(FreeNode)(doc, node);
+                    continue;
+                }
+
+                /* #538536 Extra endtags not detected */
+                if ( nodeIsHTML(node) )
+                {
+                    if (TY_(nodeIsElement)(node) || lexer->seenEndHtml)
+                        TY_(Report)(doc, body, node, DISCARDING_UNEXPECTED);
+                    else
+                        lexer->seenEndHtml = 1;
+
+                    TY_(FreeNode)( doc, node);
+                    continue;
+                }
+
+                if ( lexer->seenEndBody &&
+                    ( node->type == StartTag ||
+                     node->type == EndTag   ||
+                     node->type == StartEndTag ) )
+                {
+                    TY_(Report)(doc, body, node, CONTENT_AFTER_BODY );
+                }
+
+                if ( node->tag == body->tag && node->type == EndTag )
+                {
+                    body->closed = yes;
+                    TrimSpaces(doc, body);
+                    TY_(FreeNode)( doc, node);
+                    lexer->seenEndBody = 1;
+                    mode = IgnoreWhitespace;
+
+                    if ( nodeIsNOFRAMES(body->parent) )
+                    {
+                        keepToken = yes;
+                        state = STATE_COMPLETE;
+                        continue;
+                    }
+
+                    continue;
+                }
+
+                if ( nodeIsNOFRAMES(node) )
+                {
+                    if (node->type == StartTag)
+                    {
+                        TY_(InsertNodeAtEnd)(body, node);
+                        TY_(ParseBlock)(doc, node, mode, no);
+                        continue;
+                    }
+
+                    if (node->type == EndTag && nodeIsNOFRAMES(body->parent) )
+                    {
+                        TrimSpaces(doc, body);
+                        TY_(UngetToken)( doc );
+                        keepToken = yes;
+                        state = STATE_COMPLETE;
+                        continue;
+                    }
+                }
+
+                if ( (nodeIsFRAME(node) || nodeIsFRAMESET(node))
+                    && nodeIsNOFRAMES(body->parent) )
+                {
+                    TrimSpaces(doc, body);
+                    TY_(UngetToken)( doc );
+                    keepToken = yes;
+                    state = STATE_COMPLETE;
+                    continue;
+                }
+
+                iswhitenode = no;
+
+                if ( TY_(nodeIsText)(node) &&
+                    node->end <= node->start + 1 &&
+                    lexer->lexbuf[node->start] == ' ' )
+                    iswhitenode = yes;
+
+                /* deal with comments etc. */
+                if (InsertMisc(body, node))
+                    continue;
+
+                /* mixed content model permits text */
+                if (TY_(nodeIsText)(node))
+                {
+                    if (iswhitenode && mode == IgnoreWhitespace)
+                    {
+                        TY_(FreeNode)( doc, node);
+                        continue;
+                    }
+
+                    /* HTML 2 and HTML4 strict don't allow text here */
+                    TY_(ConstrainVersion)(doc, ~(VERS_HTML40_STRICT | VERS_HTML20));
+
+                    if (checkstack)
+                    {
+                        checkstack = no;
+
+                        if ( TY_(InlineDup)(doc, node) > 0 )
+                            continue;
+                    }
+
+                    TY_(InsertNodeAtEnd)(body, node);
+                    mode = MixedContent;
+                    continue;
+                }
+
+                if (node->type == DocTypeTag)
+                {
+                    InsertDocType(doc, body, node);
+                    continue;
+                }
+                /* discard unknown  and PARAM tags */
+                if ( node->tag == NULL || nodeIsPARAM(node) )
+                {
+                    TY_(Report)(doc, body, node, DISCARDING_UNEXPECTED);
+                    TY_(FreeNode)( doc, node);
+                    continue;
+                }
+
+                /*
+                 Netscape allows LI and DD directly in BODY
+                 We infer UL or DL respectively and use this
+                 Bool to exclude block-level elements so as
+                 to match Netscape's observed behaviour.
+                 */
+                lexer->excludeBlocks = no;
+
+                if (( nodeIsINPUT(node) ||
+                     (!TY_(nodeHasCM)(node, CM_BLOCK) && !TY_(nodeHasCM)(node, CM_INLINE))
+                     ) && !TY_(IsHTML5Mode)(doc) )
+                {
+                    /* avoid this error message being issued twice */
+                    if (!(node->tag->model & CM_HEAD))
+                        TY_(Report)(doc, body, node, TAG_NOT_ALLOWED_IN);
+
+                    if (node->tag->model & CM_HTML)
+                    {
+                        /* copy body attributes if current body was inferred */
+                        if ( nodeIsBODY(node) && body->implicit
+                            && body->attributes == NULL )
+                        {
+                            body->attributes = node->attributes;
+                            node->attributes = NULL;
+                        }
+
+                        TY_(FreeNode)( doc, node);
+                        continue;
+                    }
+
+                    if (node->tag->model & CM_HEAD)
+                    {
+                        MoveToHead(doc, body, node);
+                        continue;
+                    }
+
+                    if (node->tag->model & CM_LIST)
+                    {
+                        TY_(UngetToken)( doc );
+                        node = TY_(InferredTag)(doc, TidyTag_UL);
+                        AddClassNoIndent(doc, node);
+                        lexer->excludeBlocks = yes;
+                    }
+                    else if (node->tag->model & CM_DEFLIST)
+                    {
+                        TY_(UngetToken)( doc );
+                        node = TY_(InferredTag)(doc, TidyTag_DL);
+                        lexer->excludeBlocks = yes;
+                    }
+                    else if (node->tag->model & (CM_TABLE | CM_ROWGRP | CM_ROW))
+                    {
+                        /* http://tidy.sf.net/issue/2855621 */
+                        if (node->type != EndTag) {
+                            TY_(UngetToken)( doc );
+                            node = TY_(InferredTag)(doc, TidyTag_TABLE);
+                        }
+                        lexer->excludeBlocks = yes;
+                    }
+                    else if ( nodeIsINPUT(node) )
+                    {
+                        TY_(UngetToken)( doc );
+                        node = TY_(InferredTag)(doc, TidyTag_FORM);
+                        lexer->excludeBlocks = yes;
+                    }
+                    else
+                    {
+                        if ( !TY_(nodeHasCM)(node, CM_ROW | CM_FIELD) )
+                        {
+                            TY_(UngetToken)( doc );
+                            return NULL;
+                        }
+
+                        /* ignore </td> </th> <option> etc. */
+                        TY_(FreeNode)( doc, node );
+                        continue;
+                    }
+                }
+
+                if (node->type == EndTag)
+                {
+                    if ( nodeIsBR(node) )
+                        node->type = StartTag;
+                    else if ( nodeIsP(node) )
+                    {
+                        node->type = StartEndTag;
+                        node->implicit = yes;
+                    }
+                    else if ( TY_(nodeHasCM)(node, CM_INLINE) )
+                        TY_(PopInline)( doc, node );
+                }
+
+                if (TY_(nodeIsElement)(node))
+                {
+                    if (nodeIsMAIN(node))
+                    {
+                        /*\ Issue #166 - repeated <main> element
+                         *  How to efficiently search for a previous main element?
+                         \*/
+                        if ( findNodeById(doc, TidyTag_MAIN) )
+                        {
+                            doc->badForm |= flg_BadMain; /* this is an ERROR in format */
+                            TY_(Report)(doc, body, node, DISCARDING_UNEXPECTED);
+                            TY_(FreeNode)( doc, node);
+                            continue;
+                        }
+                    }
+                    /* Issue #20 - merging from Ger Hobbelt fork put back CM_MIXED, which had been
+                     removed to fix this issue - reverting to fix 880221e
+                     */
+                    if ( TY_(nodeHasCM)(node, CM_INLINE) )
+                    {
+                        /* HTML4 strict doesn't allow inline content here */
+                        /* but HTML2 does allow img elements as children of body */
+                        if ( nodeIsIMG(node) )
+                            TY_(ConstrainVersion)(doc, ~VERS_HTML40_STRICT);
+                        else
+                            TY_(ConstrainVersion)(doc, ~(VERS_HTML40_STRICT|VERS_HTML20));
+
+                        if (checkstack && !node->implicit)
+                        {
+                            checkstack = no;
+
+                            if ( TY_(InlineDup)(doc, node) > 0 )
+                                continue;
+                        }
+
+                        mode = MixedContent;
+                    }
+                    else
+                    {
+                        checkstack = yes;
+                        mode = IgnoreWhitespace;
+                    }
+
+                    if (node->implicit)
+                        TY_(Report)(doc, body, node, INSERTING_TAG);
+
+                    TY_(InsertNodeAtEnd)(body, node);
+//                    ParseTag(doc, node, mode);
+//                    continue;
+                    state = STATE_PARSE_TAG;
+                    keepToken = yes;
+                    continue;
+                }
+
+                /* discard unexpected tags */
+                TY_(Report)(doc, body, node, DISCARDING_UNEXPECTED);
+                TY_(FreeNode)( doc, node);
+            } break;  /* STATE_INITIAL */
+
+
+            case STATE_PARSE_TAG:
+            {
+                TidyParserMemory memory;
+                memory.identity = TY_(ParseBody);
+                memory.mode = mode;
+                memory.original_node = body;
+                memory.reentry_node = NULL;
+                memory.reentry_mode = mode;
+                memory.reentry_state = STATE_INITIAL;
+                pushMemory( doc, memory );
+                return node;
+            } break;
+
+            default:
+            {
+                state = STATE_COMPLETE;
+            } break;
+
+        } /* switch */
+    } /* while */
+    DEBUG_LOG(SPRTF("Exit ParseBody 1...\n"));
+    return NULL;
+#endif
+}
+
+
+/** MARK: TY_(ParseNoFrames)
+ *  Parses the `noframes` tag.
+ */
+Node* TY_(ParseNoFrames)( TidyDocImpl* doc, Node *noframes, GetTokenMode mode, Bool popStack )
+{
+    TY_(oldParseNoFrames)( doc, noframes, mode );
+    return NULL;
+}
+
+
+/** MARK: TY_(ParseFrameSet)
+ *  Parses the `frameset` tag.
+ */
+Node* TY_(ParseFrameSet)( TidyDocImpl* doc, Node *frameset, GetTokenMode ARG_UNUSED(mode), Bool popStack )
+{
+    TY_(oldParseFrameSet)( doc, frameset, mode );
+    return NULL;
+}
+
+
+
+/***************************************************************************//*
+ ** MARK: - Post-Parse Operations
+ ***************************************************************************/
+
+
+/**
+ *  Encloses all naked body text within `p` tags.
+ */
 static void EncloseBodyText(TidyDocImpl* doc)
 {
     Node* node;
@@ -4502,9 +6037,13 @@ static void EncloseBodyText(TidyDocImpl* doc)
     }
 }
 
-/* <form>, <blockquote> and <noscript> do not allow #PCDATA in
-   HTML 4.01 Strict (%block; model instead of %flow;).
-  When requested, text nodes in these elements are wrapped in <p>. */
+
+/**
+ *  Encloses naked text in certain elements within `p` tags.
+ *
+ *  <form>, <blockquote>, and <noscript> do not allow #PCDATA in
+ *  HTML 4.01 Strict (%block; model instead of %flow;).
+ */
 static void EncloseBlockText(TidyDocImpl* doc, Node* node)
 {
     Node *next;
@@ -4548,6 +6087,10 @@ static void EncloseBlockText(TidyDocImpl* doc, Node* node)
     }
 }
 
+
+/**
+ *  Replaces elements that are obsolete with appropriate substitute tags.
+ */
 static void ReplaceObsoleteElements(TidyDocImpl* doc, Node* node)
 {
     Node *next;
@@ -4572,6 +6115,10 @@ static void ReplaceObsoleteElements(TidyDocImpl* doc, Node* node)
     }
 }
 
+
+/**
+ *  Performs checking of all attributes recursively starting at `node`.
+ */
 static void AttributeChecks(TidyDocImpl* doc, Node* node)
 {
     Node *next;
@@ -4596,9 +6143,315 @@ static void AttributeChecks(TidyDocImpl* doc, Node* node)
     }
 }
 
-/*
-  HTML is the top level element
-*/
+
+/***************************************************************************//*
+ ** MARK: - Internal API Implementation
+ ***************************************************************************/
+
+
+/** MARK: TY_(CheckNodeIntegrity)
+ *  Is used to perform a node integrity check after parsing an HTML or XML
+ *  document.
+ *  @note Actual performance of this check can be disabled by defining the
+ *  macro NO_NODE_INTEGRITY_CHECK.
+ */
+Bool TY_(CheckNodeIntegrity)(Node *node)
+{
+#ifndef NO_NODE_INTEGRITY_CHECK
+    Node *child;
+
+    if (node->prev)
+    {
+        if (node->prev->next != node)
+            return no;
+    }
+
+    if (node->next)
+    {
+        if (node->next == node || node->next->prev != node)
+            return no;
+    }
+
+    if (node->parent)
+    {
+        if (node->prev == NULL && node->parent->content != node)
+            return no;
+
+        if (node->next == NULL && node->parent->last != node)
+            return no;
+    }
+
+    for (child = node->content; child; child = child->next)
+        if ( child->parent != node || !TY_(CheckNodeIntegrity)(child) )
+            return no;
+
+#endif
+    return yes;
+}
+
+
+/** MARK: TY_(IsNewNode)
+ *  Used to check if a node uses CM_NEW, which determines how attributes
+ *  without values should be printed. This was introduced to deal with
+ *  user-defined tags e.g. ColdFusion.
+ */
+Bool TY_(IsNewNode)(Node *node)
+{
+    if (node && node->tag)
+    {
+        return (node->tag->model & CM_NEW);
+    }
+    return yes;
+}
+
+
+/** MARK: TY_(CoerceNode)
+ *  Transforms a given node to another element, for example, from a <p>
+ *  to a <br>.
+ */
+void TY_(CoerceNode)(TidyDocImpl* doc, Node *node, TidyTagId tid, Bool obsolete, Bool unexpected)
+{
+    const Dict* tag = TY_(LookupTagDef)(tid);
+    Node* tmp = TY_(InferredTag)(doc, tag->id);
+
+    if (obsolete)
+        TY_(Report)(doc, node, tmp, OBSOLETE_ELEMENT);
+    else if (unexpected)
+        TY_(Report)(doc, node, tmp, REPLACING_UNEX_ELEMENT);
+    else
+        TY_(Report)(doc, node, tmp, REPLACING_ELEMENT);
+
+    TidyDocFree(doc, tmp->element);
+    TidyDocFree(doc, tmp);
+
+    node->was = node->tag;
+    node->tag = tag;
+    node->type = StartTag;
+    node->implicit = yes;
+    TidyDocFree(doc, node->element);
+    node->element = TY_(tmbstrdup)(doc->allocator, tag->name);
+}
+
+
+/** MARK: TY_(RemoveNode)
+ *  Extract a node and its children from a markup tree
+ */
+Node *TY_(RemoveNode)(Node *node)
+{
+    if (node->prev)
+        node->prev->next = node->next;
+
+    if (node->next)
+        node->next->prev = node->prev;
+
+    if (node->parent)
+    {
+        if (node->parent->content == node)
+            node->parent->content = node->next;
+
+        if (node->parent->last == node)
+            node->parent->last = node->prev;
+    }
+
+    node->parent = node->prev = node->next = NULL;
+    return node;
+}
+
+
+/** MARK: TY_(DiscardElement)
+ *  Remove node from markup tree and discard it.
+ */
+Node *TY_(DiscardElement)( TidyDocImpl* doc, Node *element )
+{
+    Node *next = NULL;
+
+    if (element)
+    {
+        next = element->next;
+        TY_(RemoveNode)(element);
+        TY_(FreeNode)( doc, element);
+    }
+
+    return next;
+}
+
+
+/** MARK: TY_(InsertNodeAtStart)
+ *  Insert node into markup tree as the firt element of content of element.
+ */
+void TY_(InsertNodeAtStart)(Node *element, Node *node)
+{
+    node->parent = element;
+
+    if (element->content == NULL)
+        element->last = node;
+    else
+        element->content->prev = node;
+
+    node->next = element->content;
+    node->prev = NULL;
+    element->content = node;
+}
+
+
+/** MARK: TY_(InsertNodeAtEnd)
+ *  Insert node into markup tree as the last element of content of element.
+ */
+void TY_(InsertNodeAtEnd)(Node *element, Node *node)
+{
+    node->parent = element;
+    node->prev = element->last;
+
+    if (element->last != NULL)
+        element->last->next = node;
+    else
+        element->content = node;
+
+    element->last = node;
+}
+
+
+/** MARK: TY_(InsertNodeBeforeElement)
+ *  Insert node into markup tree before element.
+ */
+void TY_(InsertNodeBeforeElement)(Node *element, Node *node)
+{
+    Node *parent;
+
+    parent = element->parent;
+    node->parent = parent;
+    node->next = element;
+    node->prev = element->prev;
+    element->prev = node;
+
+    if (node->prev)
+        node->prev->next = node;
+
+    if (parent->content == element)
+        parent->content = node;
+}
+
+
+/** MARK: TY_(InsertNodeAfterElement)
+ *  Insert node into markup tree after element.
+ */
+void TY_(InsertNodeAfterElement)(Node *element, Node *node)
+{
+    Node *parent;
+
+    parent = element->parent;
+    node->parent = parent;
+
+    /* AQ - 13 Jan 2000 fix for parent == NULL */
+    if (parent != NULL && parent->last == element)
+        parent->last = node;
+    else
+    {
+        node->next = element->next;
+        /* AQ - 13 Jan 2000 fix for node->next == NULL */
+        if (node->next != NULL)
+            node->next->prev = node;
+    }
+
+    element->next = node;
+    node->prev = element;
+}
+
+
+/** MARK: TY_(TrimEmptyElement)
+ *  Trims a single, empty element, returning the next node.
+ */
+Node *TY_(TrimEmptyElement)( TidyDocImpl* doc, Node *element )
+{
+    if ( CanPrune(doc, element) )
+    {
+        if (element->type != TextNode)
+        {
+            doc->footnotes |= FN_TRIM_EMPTY_ELEMENT;
+            TY_(Report)(doc, element, NULL, TRIM_EMPTY_ELEMENT);
+        }
+
+        return TY_(DiscardElement)(doc, element);
+    }
+    return element->next;
+}
+
+
+/** MARK: TY_(DropEmptyElements)
+ *  Trims a tree of empty elements recursively, returning the next node.
+ */
+Node* TY_(DropEmptyElements)(TidyDocImpl* doc, Node* node)
+{
+    Node* next;
+
+    while (node)
+    {
+        next = node->next;
+
+        if (node->content)
+            TY_(DropEmptyElements)(doc, node->content);
+
+        if (!TY_(nodeIsElement)(node) &&
+            !(TY_(nodeIsText)(node) && !(node->start < node->end)))
+        {
+            node = next;
+            continue;
+        }
+
+        next = TY_(TrimEmptyElement)(doc, node);
+        node = next;
+    }
+
+    return node;
+}
+
+
+/** MARK: TY_(IsBlank)
+ *  Indicates whether or not a text node is blank, meaning that it consists
+ *  of nothing, or a single space.
+ */
+Bool TY_(IsBlank)(Lexer *lexer, Node *node)
+{
+    Bool isBlank = TY_(nodeIsText)(node);
+    if ( isBlank )
+        isBlank = ( node->end == node->start ||       /* Zero length */
+                   ( node->end == node->start+1      /* or one blank. */
+                    && lexer->lexbuf[node->start] == ' ' ) );
+        return isBlank;
+}
+
+
+/** MARK: TY_(IsJavaScript)
+ *  Indicates whether or not a node is declared as containing javascript
+ *  code.
+ */
+Bool TY_(IsJavaScript)(Node *node)
+{
+    Bool result = no;
+    AttVal *attr;
+
+    if (node->attributes == NULL)
+        return yes;
+
+    for (attr = node->attributes; attr; attr = attr->next)
+    {
+        if ( (attrIsLANGUAGE(attr) || attrIsTYPE(attr))
+             && AttrContains(attr, "javascript") )
+        {
+            result = yes;
+            break;
+        }
+    }
+
+    return result;
+}
+
+
+/** MARK: TY_(ParseDocument)
+ *  Parses an HTML document after lexing. It begins by properly configuring
+ *  the overall HTML structure, and subsequently processes all remaining
+ *  nodes.
+ */
 void TY_(ParseDocument)(TidyDocImpl* doc)
 {
     Node *node, *html, *doctype = NULL;
@@ -4649,9 +6502,7 @@ void TY_(ParseDocument)(TidyDocImpl* doc)
 
         if (node->type == StartTag && nodeIsHTML(node))
         {
-            AttVal *xmlns;
-
-            xmlns = TY_(AttrGetById)(node, TidyAttr_XMLNS);
+            AttVal *xmlns = TY_(AttrGetById)(node, TidyAttr_XMLNS);
 
             if (AttrValueIs(xmlns, XHTML_NAMESPACE))
             {
@@ -4678,11 +6529,11 @@ void TY_(ParseDocument)(TidyDocImpl* doc)
             html = node;
 
         /*\
-         *  #72, avoid MISSING_DOCTYPE if show-body-only. 
+         *  #72, avoid MISSING_DOCTYPE if show-body-only.
          *  #191, also if --doctype omit, that is TidyDoctypeOmit
          *  #342, adjust tags to html4-- if not 'auto' or 'html5'
         \*/
-        if (!TY_(FindDocType)(doc)) 
+        if (!TY_(FindDocType)(doc))
         {
             ulong dtmode = cfg( doc, TidyDoctypeMode );
             if ((dtmode != TidyDoctypeOmit) && !showingBodyOnly(doc))
@@ -4697,7 +6548,7 @@ void TY_(ParseDocument)(TidyDocImpl* doc)
             }
         }
         TY_(InsertNodeAtEnd)( &doc->root, html);
-        TY_(ParseHTML)( doc, html, IgnoreWhitespace );
+        ParseHTMLWithNode( doc, html );
         break;
     }
 
@@ -4710,7 +6561,7 @@ void TY_(ParseDocument)(TidyDocImpl* doc)
         /* a later check should complain if <body> is empty */
         html = TY_(InferredTag)(doc, TidyTag_HTML);
         TY_(InsertNodeAtEnd)( &doc->root, html);
-        TY_(ParseHTML)(doc, html, IgnoreWhitespace);
+        ParseHTMLWithNode( doc, html );
     }
 
     node = TY_(FindTITLE)(doc);
@@ -4744,6 +6595,11 @@ void TY_(ParseDocument)(TidyDocImpl* doc)
         EncloseBlockText(doc, &doc->root);
 }
 
+
+/** MARK: TY_(XMLPreserveWhiteSpace)
+ *  Indicates whether or not whitespace is to be preserved in XHTML/XML
+ *  documents.
+ */
 Bool TY_(XMLPreserveWhiteSpace)( TidyDocImpl* doc, Node *element)
 {
     AttVal *attribute;
@@ -4777,9 +6633,10 @@ Bool TY_(XMLPreserveWhiteSpace)( TidyDocImpl* doc, Node *element)
     return no;
 }
 
-/*
-  XML documents
-*/
+
+/** MARK: TY_(ParseXMLElement)
+ *  Parses the given XML element.
+ */
 static void ParseXMLElement(TidyDocImpl* doc, Node *element, GetTokenMode mode)
 {
     Lexer* lexer = doc->lexer;
@@ -4857,6 +6714,10 @@ static void ParseXMLElement(TidyDocImpl* doc, Node *element, GetTokenMode mode)
     }
 }
 
+
+/** MARK: TY_(ParseXMLDocument)
+ *  Parses the document using Tidy's XML parser.
+ */
 void TY_(ParseXMLDocument)(TidyDocImpl* doc)
 {
     Node *node, *doctype = NULL;
@@ -4918,6 +6779,7 @@ void TY_(ParseXMLDocument)(TidyDocImpl* doc)
 }
 
 
+
 /*
  * local variables:
  * mode: c
diff --git a/src/parser.h b/src/parser.h
index 5d62922..7d6c522 100644
--- a/src/parser.h
+++ b/src/parser.h
@@ -41,6 +41,74 @@
  ******************************************************************************/
 
 
+/**
+ *  The parsers keeps track of their states with the states defined here, and
+ *  use these symbols when pushing to the stack so that they can later recreate
+ *  their environments when re-entered.
+ */
+typedef enum {
+    /* Universal states. */
+    STATE_INITIAL,             /**< This is the initial state for every parser. */
+    STATE_COMPLETE,            /**< Complete! */
+    STATE_PARSE_TAG,
+    STATE_PARSE_TAG_DONE,
+    /* ParseHTML states. */
+    STATE_PRE_HEAD,            /**< In this state, we've not detected head yet. */
+    STATE_PRE_BODY,            /**< In this state, we'll consider frames vs. body. */
+    STATE_PARSE_BODY,          /**< In this state, we can parse the body. */
+    STATE_PARSE_HEAD,          /**< In this state, we will setup head for parsing. */
+    STATE_PARSE_HEAD_DONE,     /**< Resume here after parsing head. */
+    STATE_PARSE_NOFRAMES,      /**< In this state, we can parse noframes content. */
+    STATE_PARSE_NOFRAMES_DONE, /**< In this state, we can restore more state. */
+    STATE_PARSE_FRAMESET,      /**< In this state, we will parse frameset content. */
+    STATE_PARSE_FRAMESET_DONE, /**< We need to cleanup some things after parsing frameset. */
+} parserState;
+
+
+/**
+ *  This typedef represents the state of a parser when it enters and exits.
+ *  When the parser needs to finish work on the way back up the stack, it will
+ *  push one of these records to the stack, and it will pop a record from the
+ *  stack upon re-entry.
+ */
+typedef struct _TidyParserMemory
+{
+    Parser       *identity;      /**< Which parser pushed this record? */
+    Node         *original_node; /**< Originally provided node at entry. */
+    Node         *reentry_node;  /**< A node a parser might want to save. */
+    GetTokenMode reentry_mode;   /**< The mode to use for the next node. */
+    parserState  reentry_state;  /**< State to set during re-entry. */
+    GetTokenMode mode;           /**< The caller will peek at this value to get the correct mode. */
+} TidyParserMemory;
+
+
+/**
+ *  This typedef represents a stack of parserState. The Tidy document has its
+ *  own instance of this.
+ */
+typedef struct _TidyParserStack
+{
+    TidyParserMemory* content;    /**< A state record. */
+    TidyAllocator* allocator;     /**< The allocator used for creating. */
+    uint size;                    /**< Current size of the stack. */
+    int top;                      /**< Top of the stack. */
+} TidyParserStack;
+
+
+/**
+ *  Allocates and initializes the parser's stack. TidyCreate will perform
+ *  this automatically.
+ */
+void TY_(InitParserStack)( TidyDocImpl* doc );
+
+
+/**
+ *  Frees the parser's stack when done. TidyRelease will perform this
+ *  automatically.
+ */
+void TY_(FreeParserStack)( TidyDocImpl* doc );
+
+
 /**
  *  Is used to perform a node integrity check recursively after parsing
  *  an HTML or XML document.
@@ -96,7 +164,7 @@ TY_PRIVATE Node *TY_(RemoveNode)(Node *node);
 
 /**
  *  Remove node from markup tree and discard it.
- *  @param doc The Tidy document from which to discarb the node.
+ *  @param doc The Tidy document from which to discard the node.
  *  @param element The node to discard.
  *  @returns Returns the next node.
  */
@@ -202,4 +270,3 @@ TY_PRIVATE void TY_(ParseXMLDocument)( TidyDocImpl* doc );
 /** @} end internal_api group */
 
 #endif /* __PARSER_H__ */
-
diff --git a/src/tags.c b/src/tags.c
index 095c20c..8f467b4 100644
--- a/src/tags.c
+++ b/src/tags.c
@@ -168,7 +168,7 @@ static CheckAttribs CheckHTML;
 \*/
 static Dict tag_defs[] =
 {
-  { TidyTag_UNKNOWN,    "unknown!",   VERS_UNKNOWN,         NULL,                       (0),                                           NULL,          NULL           },
+  { TidyTag_UNKNOWN,    "unknown!",   VERS_UNKNOWN,         NULL,                            (0),                                           NULL,               NULL           },
 
   /* W3C defined elements */
   { TidyTag_A,          "a",          VERS_ELEM_A,          &TY_(W3CAttrsFor_A)[0],          (CM_INLINE|CM_BLOCK|CM_MIXED),                 TY_(ParseBlock),    NULL           }, /* Issue #167 & #169 - default HTML5 */
@@ -332,7 +332,7 @@ static Dict tag_defs[] =
   { TidyTag_WBR,         "wbr",          VERS_ELEM_WBR,         &TY_(W3CAttrsFor_WBR)[0],         (CM_INLINE|CM_EMPTY),          TY_(ParseEmpty),     NULL           },
 
   /* this must be the final entry */
-  { (TidyTagId)0,        NULL,         0,                    NULL,                       (0),                                           NULL,          NULL           }
+  { (TidyTagId)0,        NULL,           0,                     NULL,                             (0),                           NULL,                NULL           }
 };
 
 static uint tagsHash(ctmbstr s)
diff --git a/src/tags.h b/src/tags.h
index a071fd5..d69f52a 100644
--- a/src/tags.h
+++ b/src/tags.h
@@ -61,8 +61,13 @@ typedef enum
 
 
 /** This typedef describes a function to be used to parse HTML of a Tidy tag.
+ ** @param doc The Tidy document.
+ ** @param node The node being parsed.
+ ** @param mode The GetTokenMode to be used for parsing the node contents.
+ ** @param popStack A flag indicating that we are re-entering this parser, and
+ **   it should restore a state from the stack.
  */
-typedef void (Parser)( TidyDocImpl* doc, Node *node, GetTokenMode mode );
+typedef Node* (Parser)( TidyDocImpl* doc, Node *node, GetTokenMode mode, Bool popStack );
 
 
 /** This typedef describes a function be be used to check the attributes
diff --git a/src/tidy-int.h b/src/tidy-int.h
index 52d8e0f..6b399b6 100644
--- a/src/tidy-int.h
+++ b/src/tidy-int.h
@@ -16,6 +16,7 @@
 #include "pprint.h"
 #include "access.h"
 #include "message.h"
+#include "parser.h"
 
 #ifndef MAX
 #define MAX(a,b) (((a) > (b))?(a):(b))
@@ -41,19 +42,20 @@ struct _TidyDocImpl
     Lexer*              lexer;
 
     /* Config + Markup Declarations */
-    TidyConfigImpl          config;
-    TidyTagImpl             tags;
-    TidyAttribImpl          attribs;
-    TidyAccessImpl          access;
-    TidyMutedMessages       muted;
+    TidyConfigImpl           config;
+    TidyTagImpl              tags;
+    TidyAttribImpl           attribs;
+    TidyAccessImpl           access;
+    TidyMutedMessages        muted;
 
     /* The Pretty Print buffer */
-    TidyPrintImpl       pprint;
+    TidyPrintImpl            pprint;
 
     /* I/O */
     StreamIn*                docIn;
     StreamOut*               docOut;
     StreamOut*               errout;
+
     TidyReportFilter         reportFilter;
     TidyReportCallback       reportCallback;
     TidyMessageCallback      messageCallback;
@@ -62,6 +64,8 @@ struct _TidyDocImpl
     TidyConfigChangeCallback pConfigChangeCallback;
     TidyPPProgress           progressCallback;
 
+    TidyParserStack          stack;
+
     /* Parse + Repair Results */
     uint                optionErrors;
     uint                errors;
diff --git a/src/tidylib.c b/src/tidylib.c
index 854d4f9..a439629 100644
--- a/src/tidylib.c
+++ b/src/tidylib.c
@@ -112,6 +112,7 @@ TidyDocImpl* tidyDocCreate( TidyAllocator *allocator )
     TY_(InitAttrs)( doc );
     TY_(InitConfig)( doc );
     TY_(InitPrintBuf)( doc );
+    TY_(InitParserStack)( doc );
 
     /* Set the locale for tidy's output. This both configures
     ** LibTidy to use the environment's locale as well as the
@@ -172,6 +173,7 @@ void          tidyDocRelease( TidyDocImpl* doc )
          *  to determine which hash is to be used, so free it last.
         \*/
         TY_(FreeLexer)( doc );
+        TY_(FreeParserStack)( doc );
         TidyDocFree( doc, doc );
     }
 }