diff --git a/include/tidyplatform.h b/include/tidyplatform.h
index 6fa7be4..2fca4c4 100644
--- a/include/tidyplatform.h
+++ b/include/tidyplatform.h
@@ -611,6 +611,10 @@ extern "C" {
# define TIDY_THREAD_LOCAL __thread
#endif
+#ifndef TIDY_INDENTATION_LIMIT
+# define TIDY_INDENTATION_LIMIT 50
+#endif
+
typedef unsigned char byte;
typedef uint tchar; /* single, full character */
diff --git a/regression_testing/cases/dev-cases/case-005.conf b/regression_testing/cases/dev-cases/case-005.conf
new file mode 100755
index 0000000..eaf1e60
--- /dev/null
+++ b/regression_testing/cases/dev-cases/case-005.conf
@@ -0,0 +1,5 @@
+# Config for test case.
+tidy-mark: no
+indent: yes
+wrap: 999
+input-xml: yes
diff --git a/regression_testing/cases/dev-cases/case-005@0.xml b/regression_testing/cases/dev-cases/case-005@0.xml
new file mode 100644
index 0000000..3427ec2
--- /dev/null
+++ b/regression_testing/cases/dev-cases/case-005@0.xml
@@ -0,0 +1,123 @@
+
+
+
+
+ Gambardella, Matthew
+ XML Developer's Guide
+ Computer
+ 44.95
+ 2000-10-01
+ An in-depth look at creating applications
+ with XML.
+
+
+ Ralls, Kim
+ Midnight Rain
+ Fantasy
+ 5.95
+ 2000-12-16
+ A former architect battles corporate zombies,
+ an evil sorceress, and her own childhood to become queen
+ of the world.
+
+
+ Corets, Eva
+ Maeve Ascendant
+ Fantasy
+ 5.95
+ 2000-11-17
+ After the collapse of a nanotechnology
+ society in England, the young survivors lay the
+ foundation for a new society.
+
+
+ Corets, Eva
+ Oberon's Legacy
+ Fantasy
+ 5.95
+ 2001-03-10
+ In post-apocalypse England, the mysterious
+ agent known only as Oberon helps to create a new life
+ for the inhabitants of London. Sequel to Maeve
+ Ascendant.
+
+
+ Corets, Eva
+ The Sundered Grail
+ Fantasy
+ 5.95
+ 2001-09-10
+ The two daughters of Maeve, half-sisters,
+ battle one another for control of England. Sequel to
+ Oberon's Legacy.
+
+
+ Randall, Cynthia
+ Lover Birds
+ Romance
+ 4.95
+ 2000-09-02
+ When Carla meets Paul at an ornithology
+ conference, tempers fly as feathers get ruffled.
+
+
+ Thurman, Paula
+ Splish Splash
+ Romance
+ 4.95
+ 2000-11-02
+ A deep sea diver finds true love twenty
+ thousand leagues beneath the sea.
+
+
+ Knorr, Stefan
+ Creepy Crawlies
+ Horror
+ 4.95
+ 2000-12-06
+ An anthology of horror stories about roaches,
+ centipedes, scorpions and other insects.
+
+
+ Kress, Peter
+ Paradox Lost
+ Science Fiction
+ 6.95
+ 2000-11-02
+ After an inadvertant trip through a Heisenberg
+ Uncertainty Device, James Salway discovers the problems
+ of being quantum.
+
+
+ O'Brien, Tim
+ Microsoft .NET: The Programming Bible
+ Computer
+ 36.95
+ 2000-12-09
+ Microsoft's .NET initiative is explored in
+ detail in this deep programmer's reference.
+
+
+ O'Brien, Tim
+ MSXML3: A Comprehensive Guide
+ Computer
+ 36.95
+ 2000-12-01
+ The Microsoft MSXML3 parser is covered in
+ detail, with attention to XML DOM interfaces, XSLT processing,
+ SAX and more.
+
+
+ Galos, Mike
+ Visual Studio 7: A Comprehensive Guide
+ Computer
+ 49.95
+ 2001-04-16
+ Microsoft Visual Studio 7 is explored in depth,
+ looking at how Visual Basic, Visual C++, C#, and ASP+ are
+ integrated into a comprehensive development
+ environment.
+
+
diff --git a/regression_testing/cases/legacy-expects/case-480406.txt b/regression_testing/cases/dev-expects/case-005.txt
similarity index 100%
rename from regression_testing/cases/legacy-expects/case-480406.txt
rename to regression_testing/cases/dev-expects/case-005.txt
diff --git a/regression_testing/cases/dev-expects/case-005.xml b/regression_testing/cases/dev-expects/case-005.xml
new file mode 100644
index 0000000..e6d2e71
--- /dev/null
+++ b/regression_testing/cases/dev-expects/case-005.xml
@@ -0,0 +1,102 @@
+
+
+
+
+ Gambardella, Matthew
+ XML Developer's Guide
+ Computer
+ 44.95
+ 2000-10-01
+ An in-depth look at creating applications with XML.
+
+
+ Ralls, Kim
+ Midnight Rain
+ Fantasy
+ 5.95
+ 2000-12-16
+ A former architect battles corporate zombies, an evil sorceress, and her own childhood to become queen of the world.
+
+
+ Corets, Eva
+ Maeve Ascendant
+ Fantasy
+ 5.95
+ 2000-11-17
+ After the collapse of a nanotechnology society in England, the young survivors lay the foundation for a new society.
+
+
+ Corets, Eva
+ Oberon's Legacy
+ Fantasy
+ 5.95
+ 2001-03-10
+ In post-apocalypse England, the mysterious agent known only as Oberon helps to create a new life for the inhabitants of London. Sequel to Maeve Ascendant.
+
+
+ Corets, Eva
+ The Sundered Grail
+ Fantasy
+ 5.95
+ 2001-09-10
+ The two daughters of Maeve, half-sisters, battle one another for control of England. Sequel to Oberon's Legacy.
+
+
+ Randall, Cynthia
+ Lover Birds
+ Romance
+ 4.95
+ 2000-09-02
+ When Carla meets Paul at an ornithology conference, tempers fly as feathers get ruffled.
+
+
+ Thurman, Paula
+ Splish Splash
+ Romance
+ 4.95
+ 2000-11-02
+ A deep sea diver finds true love twenty thousand leagues beneath the sea.
+
+
+ Knorr, Stefan
+ Creepy Crawlies
+ Horror
+ 4.95
+ 2000-12-06
+ An anthology of horror stories about roaches, centipedes, scorpions and other insects.
+
+
+ Kress, Peter
+ Paradox Lost
+ Science Fiction
+ 6.95
+ 2000-11-02
+ After an inadvertant trip through a Heisenberg Uncertainty Device, James Salway discovers the problems of being quantum.
+
+
+ O'Brien, Tim
+ Microsoft .NET: The Programming Bible
+ Computer
+ 36.95
+ 2000-12-09
+ Microsoft's .NET initiative is explored in detail in this deep programmer's reference.
+
+
+ O'Brien, Tim
+ MSXML3: A Comprehensive Guide
+ Computer
+ 36.95
+ 2000-12-01
+ The Microsoft MSXML3 parser is covered in detail, with attention to XML DOM interfaces, XSLT processing, SAX and more.
+
+
+ Galos, Mike
+ Visual Studio 7: A Comprehensive Guide
+ Computer
+ 49.95
+ 2001-04-16
+ Microsoft Visual Studio 7 is explored in depth, looking at how Visual Basic, Visual C++, C#, and ASP+ are integrated into a comprehensive development environment.
+
+
diff --git a/regression_testing/cases/legacy-cases/case-480406.conf b/regression_testing/cases/legacy-cases/case-480406.conf
deleted file mode 100644
index 50bc5f5..0000000
--- a/regression_testing/cases/legacy-cases/case-480406.conf
+++ /dev/null
@@ -1,3 +0,0 @@
-// Tidy configuration file for bug #480406
-input-xml: yes
-output-xml: yes
diff --git a/regression_testing/cases/legacy-cases/case-480406@0.xml b/regression_testing/cases/legacy-cases/case-480406@0.xml
deleted file mode 100644
index 63acee1..0000000
--- a/regression_testing/cases/legacy-cases/case-480406@0.xml
+++ /dev/null
@@ -1,4 +0,0 @@
-
-
-
-
diff --git a/regression_testing/cases/legacy-cases/case-634889.conf b/regression_testing/cases/legacy-cases/case-634889.conf
deleted file mode 100644
index 6ca6d8d..0000000
--- a/regression_testing/cases/legacy-cases/case-634889.conf
+++ /dev/null
@@ -1,10 +0,0 @@
-tidy-mark: no
-output-xml: yes
-drop-proprietary-attributes: no
-new-inline-tags: o:lock, o:p, v-f, v-formula, v-formulas,
- v-imagedata, v-path, v-shape, v-shapetype, v-stroke
-new-empty-tags:
-new-blocklevel-tags:
-new-pre-tags:
-wrap-sections: no
-drop-empty-paras: no
diff --git a/regression_testing/cases/legacy-cases/case-634889@1.html b/regression_testing/cases/legacy-cases/case-634889@1.html
deleted file mode 100644
index 1747b79..0000000
--- a/regression_testing/cases/legacy-cases/case-634889@1.html
+++ /dev/null
@@ -1,9 +0,0 @@
-
-
- [ 634889 ] Problem with <o:p> ms word tag
-
-
-
Probably OK, now that ParseTagNames() is fixed.
-
-
-
diff --git a/regression_testing/cases/legacy-cases/case-646946@0.xml b/regression_testing/cases/legacy-cases/case-646946@0.xml
deleted file mode 100644
index 05d6fb5..0000000
--- a/regression_testing/cases/legacy-cases/case-646946@0.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-
-
-
-
diff --git a/regression_testing/cases/legacy-expects/case-480406.xml b/regression_testing/cases/legacy-expects/case-480406.xml
deleted file mode 100644
index fc8fb78..0000000
--- a/regression_testing/cases/legacy-expects/case-480406.xml
+++ /dev/null
@@ -1,3 +0,0 @@
-
-
-
diff --git a/regression_testing/cases/legacy-expects/case-634889.html b/regression_testing/cases/legacy-expects/case-634889.html
deleted file mode 100644
index 3157677..0000000
--- a/regression_testing/cases/legacy-expects/case-634889.html
+++ /dev/null
@@ -1,9 +0,0 @@
-
-
-[ 634889 ] Problem with <o:p> ms word tag
-
-
-
Probably OK, now that ParseTagNames() is fixed.
-
-
-
diff --git a/regression_testing/cases/legacy-expects/case-634889.txt b/regression_testing/cases/legacy-expects/case-634889.txt
deleted file mode 100644
index 218cfe6..0000000
--- a/regression_testing/cases/legacy-expects/case-634889.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-line 1 column 1 - Warning: missing declaration
-line 7 column 3 - Warning: is not approved by W3C
-Info: Document content looks like XHTML5
-Tidy found 2 warnings and 0 errors!
-
-About HTML Tidy: https://github.com/htacg/tidy-html5
-Bug reports and comments: https://github.com/htacg/tidy-html5/issues
-Official mailing list: https://lists.w3.org/Archives/Public/public-htacg/
-Latest HTML specification: https://html.spec.whatwg.org/multipage/
-Validate your HTML documents: https://validator.w3.org/nu/
-Lobby your company to join the W3C: https://www.w3.org/Consortium
-
-Do you speak a language other than English, or a different variant of
-English? Consider helping us to localize HTML Tidy. For details please see
-https://github.com/htacg/tidy-html5/blob/master/README/LOCALIZE.md
diff --git a/regression_testing/cases/legacy-expects/case-646946.txt b/regression_testing/cases/legacy-expects/case-646946.txt
deleted file mode 100644
index 3425d35..0000000
--- a/regression_testing/cases/legacy-expects/case-646946.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-No warnings or errors were found.
-
-About HTML Tidy: https://github.com/htacg/tidy-html5
-Bug reports and comments: https://github.com/htacg/tidy-html5/issues
-Official mailing list: https://lists.w3.org/Archives/Public/public-htacg/
-Latest HTML specification: https://html.spec.whatwg.org/multipage/
-Validate your HTML documents: https://validator.w3.org/nu/
-Lobby your company to join the W3C: https://www.w3.org/Consortium
-
-Do you speak a language other than English, or a different variant of
-English? Consider helping us to localize HTML Tidy. For details please see
-https://github.com/htacg/tidy-html5/blob/master/README/LOCALIZE.md
diff --git a/regression_testing/cases/legacy-expects/case-646946.xml b/regression_testing/cases/legacy-expects/case-646946.xml
deleted file mode 100644
index bb88916..0000000
--- a/regression_testing/cases/legacy-expects/case-646946.xml
+++ /dev/null
@@ -1,5 +0,0 @@
-
-
-
-
diff --git a/src/parser.c b/src/parser.c
index cb02e04..9844ab2 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -28,6 +28,14 @@
#define showingBodyOnly(doc) (cfgAutoBool(doc,TidyBodyOnly) == TidyYesState) ? yes : no
+/****************************************************************************//*
+ ** MARK: - Forward Declarations
+ ***************************************************************************/
+
+
+static Node* ParseXMLElement(TidyDocImpl* doc, Node *element, GetTokenMode mode);
+
+
/****************************************************************************//*
** MARK: - Node Operations
***************************************************************************/
@@ -858,7 +866,7 @@ static void growParserStack( TidyDocImpl* doc )
/**
* Indicates whether or not the stack is empty.
*/
-static inline Bool isEmptyParserStack( TidyDocImpl* doc )
+Bool TY_(isEmptyParserStack)( TidyDocImpl* doc )
{
return doc->stack.top < 0;
}
@@ -867,7 +875,7 @@ static inline Bool isEmptyParserStack( TidyDocImpl* doc )
/**
* Peek at the parser memory.
*/
-static inline FUNC_UNUSED TidyParserMemory peekMemory( TidyDocImpl* doc )
+TidyParserMemory TY_(peekMemory)( TidyDocImpl* doc )
{
return doc->stack.content[doc->stack.top];
}
@@ -877,7 +885,7 @@ static inline FUNC_UNUSED TidyParserMemory peekMemory( TidyDocImpl* doc )
* Peek at the parser memory "identity" field. This is just a convenience
* to avoid having to create a new struct instance in the caller.
*/
-static inline Parser* peekMemoryIdentity( TidyDocImpl* doc )
+Parser* TY_(peekMemoryIdentity)( TidyDocImpl* doc )
{
return doc->stack.content[doc->stack.top].identity;
}
@@ -887,7 +895,7 @@ static inline Parser* peekMemoryIdentity( TidyDocImpl* doc )
* Peek at the parser memory "mode" field. This is just a convenience
* to avoid having to create a new struct instance in the caller.
*/
-static GetTokenMode inline peekMemoryMode( TidyDocImpl* doc )
+GetTokenMode TY_(peekMemoryMode)( TidyDocImpl* doc )
{
return doc->stack.content[doc->stack.top].mode;
}
@@ -896,12 +904,23 @@ static GetTokenMode inline peekMemoryMode( TidyDocImpl* doc )
/**
* Pop out a parser memory.
*/
-static TidyParserMemory popMemory( TidyDocImpl* doc )
+TidyParserMemory TY_(popMemory)( TidyDocImpl* doc )
{
- if ( !isEmptyParserStack( doc ) )
+ if ( !TY_(isEmptyParserStack)( doc ) )
{
TidyParserMemory data = doc->stack.content[doc->stack.top];
- DEBUG_LOG(SPRTF("\n<--POP %s pointed to is %p,\t memory is %p (size is %lu), depth is %i\n", data.reentry_node ? data.reentry_node->element : NULL, data.reentry_node, &doc->stack.content[doc->stack.top], sizeof(TidyParserMemory), doc->stack.top - 1 ));
+ DEBUG_LOG(SPRTF("\n"
+ "<--POP original: %s @ %p\n"
+ " reentry: %s @ %p\n"
+ " stack depth: %lu @ %p\n"
+ " register 1: %i\n"
+ " register 2: %i\n\n",
+ data.original_node ? data.original_node->element : "none", data.original_node,
+ data.reentry_node ? data.reentry_node->element : "none", data.reentry_node,
+ doc->stack.top, &doc->stack.content[doc->stack.top],
+ data.register_1,
+ data.register_2
+ ));
doc->stack.top = doc->stack.top - 1;
return data;
}
@@ -913,7 +932,7 @@ static TidyParserMemory popMemory( TidyDocImpl* doc )
/**
* Push the parser memory to the stack.
*/
-static void pushMemory( TidyDocImpl* doc, TidyParserMemory data )
+void TY_(pushMemory)( TidyDocImpl* doc, TidyParserMemory data )
{
if ( doc->stack.top == doc->stack.size - 1 )
growParserStack( doc );
@@ -921,7 +940,18 @@ static void pushMemory( TidyDocImpl* doc, TidyParserMemory data )
doc->stack.top++;
doc->stack.content[doc->stack.top] = data;
- DEBUG_LOG(SPRTF("\n-->PUSH %s pointed to is %p,\t memory is %p (size is %lu), depth is %i\n", data.reentry_node ? data.reentry_node->element : NULL, data.reentry_node, &doc->stack.content[doc->stack.top], sizeof(TidyParserMemory), doc->stack.top ));
+ DEBUG_LOG(SPRTF("\n"
+ "-->PUSH original: %s @ %p\n"
+ " reentry: %s @ %p\n"
+ " stack depth: %lu @ %p\n"
+ " register 1: %i\n"
+ " register 2: %i\n\n",
+ data.original_node ? data.original_node->element : "none", data.original_node,
+ data.reentry_node ? data.reentry_node->element : "none", data.reentry_node,
+ doc->stack.top, &doc->stack.content[doc->stack.top],
+ data.register_1,
+ data.register_2
+ ));
}
@@ -938,6 +968,9 @@ static Parser* GetParserForNode( TidyDocImpl* doc, Node *node )
{
Lexer* lexer = doc->lexer;
+ if ( cfgBool( doc, TidyXmlTags ) )
+ return ParseXMLElement;
+
/* [i_a]2 prevent crash for active content (php, asp) docs */
if (!node || node->tag == NULL)
return NULL;
@@ -1008,9 +1041,9 @@ void ParseHTMLWithNode( TidyDocImpl* doc, Node* node )
We weren't given a node, which means this particular leaf is bottomed
out. We'll re-enter the parsers using information from the stack.
*/
- if ( !isEmptyParserStack(doc))
+ if ( !TY_(isEmptyParserStack)(doc))
{
- parser = peekMemoryIdentity(doc);
+ parser = TY_(peekMemoryIdentity)(doc);
if (parser)
{
continue;
@@ -1018,8 +1051,8 @@ void ParseHTMLWithNode( TidyDocImpl* doc, Node* node )
else
{
/* No parser means we're only passing back a parsing mode. */
- mode = peekMemoryMode( doc );
- popMemory( doc );
+ mode = TY_(peekMemoryMode)( doc );
+ TY_(popMemory)( doc );
}
}
@@ -1065,7 +1098,7 @@ Node* TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
if ( element == NULL )
{
- TidyParserMemory memory = popMemory( doc );
+ TidyParserMemory memory = TY_(popMemory)( doc );
node = memory.reentry_node; /* Throwaway, because the loop overwrites this immediately. */
mode = memory.reentry_mode;
element = memory.original_node;
@@ -1563,7 +1596,7 @@ Node* TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
memory.reentry_node = node;
memory.reentry_mode = mode;
memory.original_node = element;
- pushMemory(doc, memory);
+ TY_(pushMemory)(doc, memory);
DEBUG_LOG(SPRTF("<<element));
}
return node;
@@ -1621,11 +1654,11 @@ Node* TY_(ParseBody)( TidyDocImpl* doc, Node *body, GetTokenMode mode )
*/
if ( body == NULL )
{
- TidyParserMemory memory = popMemory( doc );
+ TidyParserMemory memory = TY_(popMemory)( doc );
node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */
body = memory.original_node;
- checkstack = memory.register_b_1;
- iswhitenode = memory.register_b_2;
+ checkstack = memory.register_1;
+ iswhitenode = memory.register_2;
mode = memory.mode;
DEBUG_LOG(SPRTF(">>>Re-Enter ParseBody with %s\n", node->element));
}
@@ -1691,10 +1724,10 @@ Node* TY_(ParseBody)( TidyDocImpl* doc, Node *body, GetTokenMode mode )
memory.identity = TY_(ParseBody);
memory.original_node = body;
memory.reentry_node = node;
- memory.register_b_1 = checkstack;
- memory.register_b_2 = iswhitenode;
+ memory.register_1 = checkstack;
+ memory.register_2 = iswhitenode;
memory.mode = mode;
- pushMemory( doc, memory );
+ TY_(pushMemory)( doc, memory );
return node;
}
@@ -1907,10 +1940,10 @@ Node* TY_(ParseBody)( TidyDocImpl* doc, Node *body, GetTokenMode mode )
memory.identity = TY_(ParseBody);
memory.original_node = body;
memory.reentry_node = node;
- memory.register_b_1 = checkstack;
- memory.register_b_2 = iswhitenode;
+ memory.register_1 = checkstack;
+ memory.register_2 = iswhitenode;
memory.mode = mode;
- pushMemory( doc, memory );
+ TY_(pushMemory)( doc, memory );
}
DEBUG_LOG(SPRTF("<<element));
return node;
@@ -1944,7 +1977,7 @@ Node* TY_(ParseColGroup)( TidyDocImpl* doc, Node *colgroup, GetTokenMode ARG_UNU
*/
if ( colgroup == NULL )
{
- TidyParserMemory memory = popMemory( doc );
+ TidyParserMemory memory = TY_(popMemory)( doc );
node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */
colgroup = memory.original_node;
mode = memory.mode;
@@ -2034,7 +2067,7 @@ Node* TY_(ParseColGroup)( TidyDocImpl* doc, Node *colgroup, GetTokenMode ARG_UNU
memory.original_node = colgroup;
memory.reentry_node = node;
memory.mode = mode;
- pushMemory( doc, memory );
+ TY_(pushMemory)( doc, memory );
}
DEBUG_LOG(SPRTF("<<element));
return node;
@@ -2061,7 +2094,7 @@ Node* TY_(ParseDatalist)( TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED
if ( field == NULL )
{
- TidyParserMemory memory = popMemory( doc );
+ TidyParserMemory memory = TY_(popMemory)( doc );
field = memory.original_node;
node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */
DEBUG_LOG(SPRTF(">>>Re-Enter ParseDataList with %s\n", node->element));
@@ -2103,7 +2136,7 @@ Node* TY_(ParseDatalist)( TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED
memory.reentry_mode = IgnoreWhitespace;
TY_(InsertNodeAtEnd)(field, node);
- pushMemory(doc, memory);
+ TY_(pushMemory)(doc, memory);
return node;
}
@@ -2144,7 +2177,7 @@ Node* TY_(ParseDefList)( TidyDocImpl* doc, Node *list, GetTokenMode mode )
if ( list == NULL )
{
- TidyParserMemory memory = popMemory( doc );
+ TidyParserMemory memory = TY_(popMemory)( doc );
list = memory.original_node;
node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */
state = memory.reentry_state;
@@ -2272,7 +2305,7 @@ Node* TY_(ParseDefList)( TidyDocImpl* doc, Node *list, GetTokenMode mode )
memory.original_node = list;
memory.reentry_node = node;
memory.reentry_state = STATE_POST_NODEISCENTER;
- pushMemory( doc, memory );
+ TY_(pushMemory)( doc, memory );
DEBUG_LOG(SPRTF("<<element));
return node;
}
@@ -2315,7 +2348,7 @@ Node* TY_(ParseDefList)( TidyDocImpl* doc, Node *list, GetTokenMode mode )
memory.original_node = list;
memory.reentry_node = node;
memory.reentry_state = STATE_INITIAL;
- pushMemory( doc, memory );
+ TY_(pushMemory)( doc, memory );
DEBUG_LOG(SPRTF("<<element));
return node;
}
@@ -2402,7 +2435,7 @@ Node* TY_(ParseFrameSet)( TidyDocImpl* doc, Node *frameset, GetTokenMode ARG_UNU
*/
if ( frameset == NULL )
{
- TidyParserMemory memory = popMemory( doc );
+ TidyParserMemory memory = TY_(popMemory)( doc );
node = memory.reentry_node; /* Throwaway, because we replace it entering the loop. */
frameset = memory.original_node;
DEBUG_LOG(SPRTF(">>>Re-Enter ParseFrameSet with %s\n", node->element));
@@ -2468,7 +2501,7 @@ Node* TY_(ParseFrameSet)( TidyDocImpl* doc, Node *frameset, GetTokenMode ARG_UNU
memory.original_node = frameset;
memory.reentry_node = node;
memory.mode = MixedContent;
- pushMemory( doc, memory );
+ TY_(pushMemory)( doc, memory );
DEBUG_LOG(SPRTF("<<element));
return node;
}
@@ -2509,11 +2542,11 @@ Node* TY_(ParseHead)( TidyDocImpl* doc, Node *head, GetTokenMode ARG_UNUSED(mode
if ( head == NULL )
{
- TidyParserMemory memory = popMemory( doc );
+ TidyParserMemory memory = TY_(popMemory)( doc );
head = memory.original_node;
node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */
- HasTitle = memory.register_b_1;
- HasBase = memory.register_b_2;
+ HasTitle = memory.register_1;
+ HasBase = memory.register_2;
DEBUG_LOG(SPRTF(">>>Re-Enter ParseHead with %s\n", node->element));
}
else
@@ -2622,9 +2655,9 @@ Node* TY_(ParseHead)( TidyDocImpl* doc, Node *head, GetTokenMode ARG_UNUSED(mode
memory.identity = TY_(ParseHead);
memory.original_node = head;
memory.reentry_node = node;
- memory.register_b_1 = HasTitle;
- memory.register_b_2 = HasBase;
- pushMemory( doc, memory );
+ memory.register_1 = HasTitle;
+ memory.register_2 = HasBase;
+ TY_(pushMemory)( doc, memory );
DEBUG_LOG(SPRTF("<<element));
return node;
}
@@ -2684,7 +2717,7 @@ Node* TY_(ParseHTML)( TidyDocImpl *doc, Node *html, GetTokenMode mode )
*/
if ( html == NULL )
{
- TidyParserMemory memory = popMemory( doc );
+ TidyParserMemory memory = TY_(popMemory)( doc );
node = memory.reentry_node;
mode = memory.reentry_mode;
state = memory.reentry_state;
@@ -2956,7 +2989,7 @@ Node* TY_(ParseHTML)( TidyDocImpl *doc, Node *html, GetTokenMode mode )
memory.reentry_mode = mode;
memory.reentry_state = STATE_PARSE_HEAD_REENTER;
TY_(InsertNodeAtEnd)(html, node);
- pushMemory( doc, memory );
+ TY_(pushMemory)( doc, memory );
DEBUG_LOG(SPRTF("<<element));
return node;
}
@@ -3693,7 +3726,7 @@ Node* TY_(ParseList)( TidyDocImpl* doc, Node *list, GetTokenMode ARG_UNUSED(mode
if ( list == NULL )
{
- TidyParserMemory memory = popMemory( doc );
+ TidyParserMemory memory = TY_(popMemory)( doc );
list = memory.original_node;
node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */
DEBUG_LOG(SPRTF(">>>Re-Enter ParseList with %s\n", node->element));
@@ -3859,7 +3892,7 @@ Node* TY_(ParseList)( TidyDocImpl* doc, Node *list, GetTokenMode ARG_UNUSED(mode
memory.original_node = list;
memory.reentry_node = node;
memory.mode = IgnoreWhitespace;
- pushMemory( doc, memory );
+ TY_(pushMemory)( doc, memory );
DEBUG_LOG(SPRTF("<<element));
return node;
}
@@ -4041,11 +4074,11 @@ Node* TY_(ParseNoFrames)( TidyDocImpl* doc, Node *noframes, GetTokenMode mode )
*/
if ( noframes == NULL )
{
- TidyParserMemory memory = popMemory( doc );
+ TidyParserMemory memory = TY_(popMemory)( doc );
node = memory.reentry_node; /* Throwaway, because we replace it entering the loop anyway.*/
noframes = memory.original_node;
state = memory.reentry_state;
- body_seen = memory.register_b_1;
+ body_seen = memory.register_1;
DEBUG_LOG(SPRTF(">>>Re-Enter ParseNoFrames with %s\n", node->element));
}
else
@@ -4123,11 +4156,11 @@ Node* TY_(ParseNoFrames)( TidyDocImpl* doc, Node *noframes, GetTokenMode mode )
memory.original_node = noframes;
memory.reentry_node = node;
memory.reentry_state = STATE_POST_NODEISBODY;
- memory.register_b_1 = lexer->seenEndBody;
+ memory.register_1 = lexer->seenEndBody;
memory.mode = IgnoreWhitespace;
TY_(InsertNodeAtEnd)(noframes, node);
- pushMemory( doc, memory );
+ TY_(pushMemory)( doc, memory );
DEBUG_LOG(SPRTF("<<element));
return node;
}
@@ -4168,7 +4201,7 @@ Node* TY_(ParseNoFrames)( TidyDocImpl* doc, Node *noframes, GetTokenMode mode )
memory.reentry_node = node;
memory.mode = IgnoreWhitespace; /*MixedContent*/
memory.reentry_state = STATE_INITIAL;
- pushMemory( doc, memory );
+ TY_(pushMemory)( doc, memory );
DEBUG_LOG(SPRTF("<<element));
return node;
}
@@ -4220,7 +4253,7 @@ Node* TY_(ParseOptGroup)( TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED
if ( field == NULL )
{
- TidyParserMemory memory = popMemory( doc );
+ TidyParserMemory memory = TY_(popMemory)( doc );
field = memory.original_node;
node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */
DEBUG_LOG(SPRTF(">>>Re-Enter ParseOptGroup with %s\n", node->element));
@@ -4259,7 +4292,7 @@ Node* TY_(ParseOptGroup)( TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED
memory.identity = TY_(ParseOptGroup);
memory.original_node = field;
memory.reentry_node = node;
- pushMemory( doc, memory );
+ TY_(pushMemory)( doc, memory );
DEBUG_LOG(SPRTF("<<element));
return node;
}
@@ -4293,7 +4326,7 @@ Node* TY_(ParsePre)( TidyDocImpl* doc, Node *pre, GetTokenMode ARG_UNUSED(mode)
if ( pre == NULL )
{
- TidyParserMemory memory = popMemory( doc );
+ TidyParserMemory memory = TY_(popMemory)( doc );
pre = memory.original_node;
node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */
state = memory.reentry_state;
@@ -4446,7 +4479,7 @@ Node* TY_(ParsePre)( TidyDocImpl* doc, Node *pre, GetTokenMode ARG_UNUSED(mode)
memory.original_node = pre;
memory.reentry_node = node;
memory.reentry_state = STATE_RENTRY_ACTION;
- pushMemory( doc, memory );
+ TY_(pushMemory)( doc, memory );
DEBUG_LOG(SPRTF("<<element));
return node;
}
@@ -4488,7 +4521,7 @@ Node* TY_(ParsePre)( TidyDocImpl* doc, Node *pre, GetTokenMode ARG_UNUSED(mode)
memory.original_node = pre;
memory.reentry_node = node;
memory.reentry_state = STATE_INITIAL;
- pushMemory( doc, memory );
+ TY_(pushMemory)( doc, memory );
DEBUG_LOG(SPRTF("<<element));
return node;
}
@@ -4548,11 +4581,11 @@ Node* TY_(ParseRow)( TidyDocImpl* doc, Node *row, GetTokenMode ARG_UNUSED(mode)
if ( row == NULL )
{
- TidyParserMemory memory = popMemory( doc );
+ TidyParserMemory memory = TY_(popMemory)( doc );
row = memory.original_node;
node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */
state = memory.reentry_state;
- exclude_state = memory.register_b_1;
+ exclude_state = memory.register_1;
DEBUG_LOG(SPRTF(">>>Re-Enter ParseRow with %s\n", node->element));
}
else
@@ -4692,8 +4725,8 @@ Node* TY_(ParseRow)( TidyDocImpl* doc, Node *row, GetTokenMode ARG_UNUSED(mode)
memory.original_node = row;
memory.reentry_node = node;
memory.reentry_state = STATE_POST_NOT_ENDTAG;
- memory.register_b_1 = exclude_state;
- pushMemory( doc, memory );
+ memory.register_1 = exclude_state;
+ TY_(pushMemory)( doc, memory );
DEBUG_LOG(SPRTF("<<element));
return node;
}
@@ -4727,8 +4760,8 @@ Node* TY_(ParseRow)( TidyDocImpl* doc, Node *row, GetTokenMode ARG_UNUSED(mode)
memory.original_node = row;
memory.reentry_node = node;
memory.reentry_state = STATE_POST_TD_TH;
- memory.register_b_1 = exclude_state;
- pushMemory( doc, memory );
+ memory.register_1 = exclude_state;
+ TY_(pushMemory)( doc, memory );
DEBUG_LOG(SPRTF("<<element));
return node;
}
@@ -4792,7 +4825,7 @@ Node* TY_(ParseRowGroup)( TidyDocImpl* doc, Node *rowgroup, GetTokenMode ARG_UNU
if ( rowgroup == NULL )
{
- TidyParserMemory memory = popMemory( doc );
+ TidyParserMemory memory = TY_(popMemory)( doc );
rowgroup = memory.original_node;
node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */
state = memory.reentry_state;
@@ -4887,7 +4920,7 @@ Node* TY_(ParseRowGroup)( TidyDocImpl* doc, Node *rowgroup, GetTokenMode ARG_UNU
memory.original_node = rowgroup;
memory.reentry_node = node;
memory.reentry_state = STATE_POST_NOT_TEXTNODE;
- pushMemory( doc, memory );
+ TY_(pushMemory)( doc, memory );
DEBUG_LOG(SPRTF("<<element));
return node;
}
@@ -4973,7 +5006,7 @@ Node* TY_(ParseRowGroup)( TidyDocImpl* doc, Node *rowgroup, GetTokenMode ARG_UNU
memory.original_node = rowgroup;
memory.reentry_node = node;
memory.reentry_state = STATE_INITIAL;
- pushMemory( doc, memory );
+ TY_(pushMemory)( doc, memory );
DEBUG_LOG(SPRTF("<<element));
return node;
} break;
@@ -5067,7 +5100,7 @@ Node* TY_(ParseSelect)( TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(m
if ( field == NULL )
{
- TidyParserMemory memory = popMemory( doc );
+ TidyParserMemory memory = TY_(popMemory)( doc );
field = memory.original_node;
node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */
DEBUG_LOG(SPRTF(">>>Re-Enter ParseSelect with %s\n", node->element));
@@ -5108,7 +5141,7 @@ Node* TY_(ParseSelect)( TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(m
memory.reentry_node = node;
TY_(InsertNodeAtEnd)(field, node);
- pushMemory( doc, memory );
+ TY_(pushMemory)( doc, memory );
DEBUG_LOG(SPRTF("<<element));
return node;
}
@@ -5144,10 +5177,10 @@ Node* TY_(ParseTableTag)( TidyDocImpl* doc, Node *table, GetTokenMode ARG_UNUSED
if ( table == NULL )
{
- TidyParserMemory memory = popMemory( doc );
+ TidyParserMemory memory = TY_(popMemory)( doc );
node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */
table = memory.original_node;
- lexer->exiled = memory.register_b_1;
+ lexer->exiled = memory.register_1;
DEBUG_LOG(SPRTF(">>>Re-Enter ParseTableTag with %s\n", node->element));
}
else
@@ -5219,9 +5252,9 @@ Node* TY_(ParseTableTag)( TidyDocImpl* doc, Node *table, GetTokenMode ARG_UNUSED
memory.identity = TY_(ParseTableTag);
memory.original_node = table;
memory.reentry_node = node;
- memory.register_b_1 = no; /* later, lexer->exiled = no */
+ memory.register_1 = no; /* later, lexer->exiled = no */
memory.mode = IgnoreWhitespace;
- pushMemory( doc, memory );
+ TY_(pushMemory)( doc, memory );
DEBUG_LOG(SPRTF("<<element));
return node;
}
@@ -5292,8 +5325,8 @@ Node* TY_(ParseTableTag)( TidyDocImpl* doc, Node *table, GetTokenMode ARG_UNUSED
memory.identity = TY_(ParseTableTag);
memory.original_node = table;
memory.reentry_node = node;
- memory.register_b_1 = lexer->exiled;
- pushMemory( doc, memory );
+ memory.register_1 = lexer->exiled;
+ TY_(pushMemory)( doc, memory );
DEBUG_LOG(SPRTF("<<element));
return node;
}
@@ -5457,6 +5490,116 @@ Node* TY_(ParseTitle)( TidyDocImpl* doc, Node *title, GetTokenMode ARG_UNUSED(mo
}
+/** MARK: ParseXMLElement
+ * Parses the given XML element.
+ */
+static Node* ParseXMLElement(TidyDocImpl* doc, Node *element, GetTokenMode mode)
+{
+ Lexer* lexer = doc->lexer;
+ Node *node;
+
+ if ( element == NULL )
+ {
+ TidyParserMemory memory = TY_(popMemory)( doc );
+ element = memory.original_node;
+ node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */
+ mode = memory.reentry_mode;
+ TY_(InsertNodeAtEnd)(element, node); /* The only re-entry action needed. */
+ }
+ else
+ {
+ /* if node is pre or has xml:space="preserve" then do so */
+ if ( TY_(XMLPreserveWhiteSpace)(doc, element) )
+ mode = Preformatted;
+
+ /* deal with comments etc. */
+ InsertMisc( &doc->root, element);
+
+ /* we shouldn't have plain text at this point. */
+ if (TY_(nodeIsText)(element))
+ {
+ TY_(Report)(doc, &doc->root, element, DISCARDING_UNEXPECTED);
+ TY_(FreeNode)( doc, element);
+ return NULL;
+ }
+ }
+ while ((node = TY_(GetToken)(doc, mode)) != NULL)
+ {
+ if (node->type == EndTag &&
+ node->element && element->element &&
+ TY_(tmbstrcmp)(node->element, element->element) == 0)
+ {
+ TY_(FreeNode)( doc, node);
+ element->closed = yes;
+ break;
+ }
+
+ /* discard unexpected end tags */
+ if (node->type == EndTag)
+ {
+ if (element)
+ TY_(Report)(doc, element, node, UNEXPECTED_ENDTAG_IN);
+ else
+ TY_(Report)(doc, element, node, UNEXPECTED_ENDTAG_ERR);
+
+ TY_(FreeNode)( doc, node);
+ continue;
+ }
+
+ /* parse content on seeing start tag */
+ if (node->type == StartTag)
+ {
+ TidyParserMemory memory = {0};
+ memory.identity = ParseXMLElement;
+ memory.original_node = element;
+ memory.reentry_node = node;
+ memory.reentry_mode = mode;
+ TY_(pushMemory)( doc, memory );
+ return node;
+ }
+
+ TY_(InsertNodeAtEnd)(element, node);
+ } /* while */
+
+ /*
+ if first child is text then trim initial space and
+ delete text node if it is empty.
+ */
+
+ node = element->content;
+
+ if (TY_(nodeIsText)(node) && mode != Preformatted)
+ {
+ if ( lexer->lexbuf[node->start] == ' ' )
+ {
+ node->start++;
+
+ if (node->start >= node->end)
+ TY_(DiscardElement)( doc, node );
+ }
+ }
+
+ /*
+ if last child is text then trim final space and
+ delete the text node if it is empty
+ */
+
+ node = element->last;
+
+ if (TY_(nodeIsText)(node) && mode != Preformatted)
+ {
+ if ( lexer->lexbuf[node->end - 1] == ' ' )
+ {
+ node->end--;
+
+ if (node->start >= node->end)
+ TY_(DiscardElement)( doc, node );
+ }
+ }
+ return NULL;
+}
+
+
/***************************************************************************//*
** MARK: - Post-Parse Operations
***************************************************************************/
@@ -6101,87 +6244,6 @@ void TY_(ParseDocument)(TidyDocImpl* doc)
}
-/** MARK: TY_(ParseXMLElement)
- * Parses the given XML element.
- */
-static void ParseXMLElement(TidyDocImpl* doc, Node *element, GetTokenMode mode)
-{
- Lexer* lexer = doc->lexer;
- Node *node;
-
- /* if node is pre or has xml:space="preserve" then do so */
-
- if ( TY_(XMLPreserveWhiteSpace)(doc, element) )
- mode = Preformatted;
-
- while ((node = TY_(GetToken)(doc, mode)) != NULL)
- {
- if (node->type == EndTag &&
- node->element && element->element &&
- TY_(tmbstrcmp)(node->element, element->element) == 0)
- {
- TY_(FreeNode)( doc, node);
- element->closed = yes;
- break;
- }
-
- /* discard unexpected end tags */
- if (node->type == EndTag)
- {
- if (element)
- TY_(Report)(doc, element, node, UNEXPECTED_ENDTAG_IN);
- else
- TY_(Report)(doc, element, node, UNEXPECTED_ENDTAG_ERR);
-
- TY_(FreeNode)( doc, node);
- continue;
- }
-
- /* parse content on seeing start tag */
- if (node->type == StartTag)
- ParseXMLElement( doc, node, mode );
-
- TY_(InsertNodeAtEnd)(element, node);
- }
-
- /*
- if first child is text then trim initial space and
- delete text node if it is empty.
- */
-
- node = element->content;
-
- if (TY_(nodeIsText)(node) && mode != Preformatted)
- {
- if ( lexer->lexbuf[node->start] == ' ' )
- {
- node->start++;
-
- if (node->start >= node->end)
- TY_(DiscardElement)( doc, node );
- }
- }
-
- /*
- if last child is text then trim final space and
- delete the text node if it is empty
- */
-
- node = element->last;
-
- if (TY_(nodeIsText)(node) && mode != Preformatted)
- {
- if ( lexer->lexbuf[node->end - 1] == ' ' )
- {
- node->end--;
-
- if (node->start >= node->end)
- TY_(DiscardElement)( doc, node );
- }
- }
-}
-
-
/** MARK: TY_(ParseXMLDocument)
* Parses the document using Tidy's XML parser.
*/
@@ -6232,7 +6294,7 @@ void TY_(ParseXMLDocument)(TidyDocImpl* doc)
if (node->type == StartTag)
{
TY_(InsertNodeAtEnd)( &doc->root, node );
- ParseXMLElement( doc, node, IgnoreWhitespace );
+ ParseHTMLWithNode( doc, node );
continue;
}
diff --git a/src/parser.h b/src/parser.h
index 0ccec79..8980372 100644
--- a/src/parser.h
+++ b/src/parser.h
@@ -55,8 +55,8 @@ typedef struct _TidyParserMemory
GetTokenMode reentry_mode; /**< The token mode to use when re-entering. */
int reentry_state; /**< State to set during re-entry. Defined locally in each parser. */
GetTokenMode mode; /**< The caller will peek at this value to get the correct mode. */
- Bool register_b_1; /**< Local variable storage. */
- Bool register_b_2; /**< Local variable storage. */
+ int register_1; /**< Local variable storage. */
+ int register_2; /**< Local variable storage. */
} TidyParserMemory;
@@ -86,6 +86,44 @@ void TY_(InitParserStack)( TidyDocImpl* doc );
void TY_(FreeParserStack)( TidyDocImpl* doc );
+/**
+ * Indicates whether or not the stack is empty.
+ */
+Bool TY_(isEmptyParserStack)( TidyDocImpl* doc );
+
+
+/**
+ * Peek at the parser memory.
+ */
+TidyParserMemory TY_(peekMemory)( TidyDocImpl* doc );
+
+
+/**
+ * Peek at the parser memory "identity" field. This is just a convenience
+ * to avoid having to create a new struct instance in the caller.
+ */
+Parser* TY_(peekMemoryIdentity)( TidyDocImpl* doc );
+
+
+/**
+ * Peek at the parser memory "mode" field. This is just a convenience
+ * to avoid having to create a new struct instance in the caller.
+ */
+GetTokenMode TY_(peekMemoryMode)( TidyDocImpl* doc );
+
+
+/**
+ * Pop out a parser memory.
+ */
+TidyParserMemory TY_(popMemory)( TidyDocImpl* doc );
+
+
+/**
+ * Push the parser memory to the stack.
+ */
+void TY_(pushMemory)( TidyDocImpl* doc, TidyParserMemory data );
+
+
/**
* Is used to perform a node integrity check recursively after parsing
* an HTML or XML document.
diff --git a/src/pprint.c b/src/pprint.c
index c433db3..e123c34 100644
--- a/src/pprint.c
+++ b/src/pprint.c
@@ -16,6 +16,7 @@
#include "entities.h"
#include "tmbstr.h"
#include "utf8.h"
+#include "sprtf.h"
/* *** FOR DEBUG ONLY *** */
/* #define DEBUG_PPRINT */
@@ -2330,102 +2331,152 @@ void TY_(PPrintTree)( TidyDocImpl* doc, uint mode, uint indent, Node *node )
}
}
+
void TY_(PPrintXMLTree)( TidyDocImpl* doc, uint mode, uint indent, Node *node )
{
Bool xhtmlOut = cfgBool( doc, TidyXhtmlOut );
- if (node == NULL)
- return;
-
- if (doc->progressCallback)
- {
- doc->progressCallback( tidyImplToDoc(doc), node->line, node->column, doc->pprint.line + 1 );
- }
+ Node* next = NULL;
- if ( node->type == TextNode)
+ while ( node )
{
- PPrintText( doc, mode, indent, node );
- }
- else if ( node->type == CommentTag )
- {
- PCondFlushLineSmart( doc, indent );
- PPrintComment( doc, indent, node);
- /* PCondFlushLine( doc, 0 ); */
- }
- else if ( node->type == RootNode )
- {
- Node *content;
- for ( content = node->content;
- content != NULL;
- content = content->next )
- TY_(PPrintXMLTree)( doc, mode, indent, content );
- }
- else if ( node->type == DocTypeTag )
- PPrintDocType( doc, indent, node );
- else if ( node->type == ProcInsTag )
- PPrintPI( doc, indent, node );
- else if ( node->type == XmlDecl )
- PPrintXmlDecl( doc, indent, node );
- else if ( node->type == CDATATag )
- PPrintCDATA( doc, indent, node );
- else if ( node->type == SectionTag )
- PPrintSection( doc, indent, node );
- else if ( node->type == AspTag )
- PPrintAsp( doc, indent, node );
- else if ( node->type == JsteTag)
- PPrintJste( doc, indent, node );
- else if ( node->type == PhpTag)
- PPrintPhp( doc, indent, node );
- else if ( TY_(nodeHasCM)(node, CM_EMPTY) ||
- (node->type == StartEndTag && !xhtmlOut) )
- {
- PCondFlushLineSmart( doc, indent );
- PPrintTag( doc, mode, indent, node );
- /* TY_(PFlushLine)( doc, indent ); */
- }
- else /* some kind of container element */
- {
- uint spaces = cfg( doc, TidyIndentSpaces );
- Node *content;
- Bool mixed = no;
- uint cindent;
-
- for ( content = node->content; content; content = content->next )
+ next = node->next;
+
+ if (doc->progressCallback)
{
- if ( TY_(nodeIsText)(content) )
+ doc->progressCallback( tidyImplToDoc(doc), node->line, node->column, doc->pprint.line + 1 );
+ }
+
+ if ( node->type == TextNode)
+ {
+ PPrintText( doc, mode, indent, node );
+ }
+ else if ( node->type == RootNode )
+ {
+ if (node->content)
+ node = node->content;
+ continue;
+ }
+ else if ( node->type == CommentTag )
+ {
+ PCondFlushLineSmart( doc, indent );
+ PPrintComment( doc, indent, node);
+ /* PCondFlushLine( doc, 0 ); */
+ }
+ else if ( node->type == DocTypeTag )
+ PPrintDocType( doc, indent, node );
+ else if ( node->type == ProcInsTag )
+ PPrintPI( doc, indent, node );
+ else if ( node->type == XmlDecl )
+ PPrintXmlDecl( doc, indent, node );
+ else if ( node->type == CDATATag )
+ PPrintCDATA( doc, indent, node );
+ else if ( node->type == SectionTag )
+ PPrintSection( doc, indent, node );
+ else if ( node->type == AspTag )
+ PPrintAsp( doc, indent, node );
+ else if ( node->type == JsteTag)
+ PPrintJste( doc, indent, node );
+ else if ( node->type == PhpTag)
+ PPrintPhp( doc, indent, node );
+ else if ( TY_(nodeHasCM)(node, CM_EMPTY) ||
+ (node->type == StartEndTag && !xhtmlOut) )
+ {
+ PCondFlushLineSmart( doc, indent );
+ PPrintTag( doc, mode, indent, node );
+ /* TY_(PFlushLine)( doc, indent ); */
+ }
+ else if ( node->type != RootNode ) /* some kind of container element */
+ {
+ TidyParserMemory memory = {0};
+ uint spaces = cfg( doc, TidyIndentSpaces );
+ Node *content;
+ Bool mixed = no;
+ uint cindent;
+
+ for ( content = node->content; content; content = content->next )
{
- mixed = yes;
- break;
+ if ( TY_(nodeIsText)(content) )
+ {
+ mixed = yes;
+ break;
+ }
}
- }
- PCondFlushLineSmart( doc, indent );
-
- if ( TY_(XMLPreserveWhiteSpace)(doc, node) )
- {
- indent = 0;
- mixed = no;
- cindent = 0;
- }
- else if (mixed)
- cindent = indent;
- else
- cindent = indent + spaces;
-
- PPrintTag( doc, mode, indent, node );
- if ( !mixed && node->content )
- TY_(PFlushLineSmart)( doc, cindent );
-
- for ( content = node->content; content; content = content->next )
- TY_(PPrintXMLTree)( doc, mode, cindent, content );
-
- if ( !mixed && node->content )
PCondFlushLineSmart( doc, indent );
- PPrintEndTag( doc, mode, indent, node );
- /* PCondFlushLine( doc, indent ); */
- }
+ if ( TY_(XMLPreserveWhiteSpace)(doc, node) )
+ {
+ indent = 0;
+ mixed = no;
+ cindent = 0;
+ }
+ else if (mixed)
+ cindent = indent;
+ else
+ cindent = indent + spaces;
+
+ PPrintTag( doc, mode, indent, node );
+ if ( !mixed && node->content )
+ TY_(PFlushLineSmart)( doc, cindent );
+
+ memory.original_node = node;
+ memory.reentry_node = next;
+ memory.register_1 = mixed;
+ memory.register_2 = indent;
+ TY_(pushMemory)(doc, memory);
+
+ /* Prevent infinite indentation. Seriously, at what point is
+ anyone going to read a file with infinite indentation? It
+ slows down rendering for arbitrarily-deep test cases that
+ are only meant to crash Tidy in the first place. Let's
+ consider whether to remove this limitation, lower it,
+ increase it, or add a new configuration option to control
+ it, or even emit an info-level message about it.
+ */
+ if (indent < TIDY_INDENTATION_LIMIT * spaces)
+ indent = cindent;
+
+ if (node->content)
+ {
+ node = node->content;
+ continue;
+ }
+ }
+
+ if (next)
+ {
+ node = next;
+ continue;
+ }
+
+ if ( TY_(isEmptyParserStack)(doc) == no )
+ {
+ /* It's possible that the reentry_node is null, because we
+ only pushed this record as a marker for the end tag while
+ there was no next node. Thus the loop will pop until we have
+ what we need. This also closes multiple end tags.
+ */
+ do {
+ TidyParserMemory memory = TY_(popMemory)(doc);
+ Node* close_node = memory.original_node;
+ Bool mixed = memory.register_1;
+ indent = memory.register_2;
+
+ if ( !mixed && close_node->content )
+ PCondFlushLineSmart( doc, indent );
+
+ PPrintEndTag( doc, mode, indent, close_node );
+ /* PCondFlushLine( doc, indent ); */
+
+ node = memory.reentry_node;
+ } while ( node == NULL && TY_(isEmptyParserStack)(doc) == no );
+ continue;;
+ }
+ node = NULL;
+ } /* while */
}
+
/*
* local variables:
* mode: c
diff --git a/src/tidylib.c b/src/tidylib.c
index 5907a96..48cf2cc 100644
--- a/src/tidylib.c
+++ b/src/tidylib.c
@@ -2048,16 +2048,24 @@ void dbg_show_node( TidyDocImpl* doc, Node *node, int caller, int indent )
SPRTF("\n");
}
-void dbg_show_all_nodes( TidyDocImpl* doc, Node *node, int indent )
+/* Tail recursion here with sensible compilers will re-use
+ the stack frame and avoid overflows during debugging.
+ */
+void dbg_show_all_nodes_loop( TidyDocImpl* doc, Node *node, int indent )
{
- while (node)
+ while ( node && (node = node->next) )
{
dbg_show_node( doc, node, 0, indent );
- dbg_show_all_nodes( doc, node->content, indent + 1 );
- node = node->next;
+ dbg_show_all_nodes_loop( doc, node->content, indent + 1 );
}
}
+void dbg_show_all_nodes( TidyDocImpl* doc, Node *node, int indent )
+{
+ dbg_show_node( doc, node, 0, indent );
+ dbg_show_all_nodes_loop( doc, node->content, indent + 1 );
+}
+
#endif
int tidyDocCleanAndRepair( TidyDocImpl* doc )