Merge pull request #988 from htacg/xml_recurse

The XML Parser and XML Pretty Printer are now non-recursive.
This commit is contained in:
Jim Derry 2021-08-17 07:32:07 -04:00 committed by GitHub
commit 845e55e5d4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
19 changed files with 639 additions and 322 deletions

View file

@ -611,6 +611,10 @@ extern "C" {
# define TIDY_THREAD_LOCAL __thread
#endif
#ifndef TIDY_INDENTATION_LIMIT
# define TIDY_INDENTATION_LIMIT 50
#endif
typedef unsigned char byte;
typedef uint tchar; /* single, full character */

View file

@ -0,0 +1,5 @@
# Config for test case.
tidy-mark: no
indent: yes
wrap: 999
input-xml: yes

View file

@ -0,0 +1,123 @@
<!--
This is a sample XML file.
-->
<?xml version="1.0"?>
<catalog>
<book id="bk101">
<author>Gambardella, Matthew</author>
<title>XML Developer's Guide</title>
<genre>Computer</genre>
<price>44.95</price>
<publish_date>2000-10-01</publish_date>
<description>An in-depth look at creating applications
with XML.</description>
</book>
<book id="bk102">
<author>Ralls, Kim</author>
<title>Midnight Rain</title>
<genre>Fantasy</genre>
<price>5.95</price>
<publish_date>2000-12-16</publish_date>
<description>A former architect battles corporate zombies,
an evil sorceress, and her own childhood to become queen
of the world.</description>
</book>
<book id="bk103">
<author>Corets, Eva</author>
<title>Maeve Ascendant</title>
<genre>Fantasy</genre>
<price>5.95</price>
<publish_date>2000-11-17</publish_date>
<description>After the collapse of a nanotechnology
society in England, the young survivors lay the
foundation for a new society.</description>
</book>
<book id="bk104">
<author>Corets, Eva</author>
<title>Oberon's Legacy</title>
<genre>Fantasy</genre>
<price>5.95</price>
<publish_date>2001-03-10</publish_date>
<description>In post-apocalypse England, the mysterious
agent known only as Oberon helps to create a new life
for the inhabitants of London. Sequel to Maeve
Ascendant.</description>
</book>
<book id="bk105">
<author>Corets, Eva</author>
<title>The Sundered Grail</title>
<genre>Fantasy</genre>
<price>5.95</price>
<publish_date>2001-09-10</publish_date>
<description>The two daughters of Maeve, half-sisters,
battle one another for control of England. Sequel to
Oberon's Legacy.</description>
</book>
<book id="bk106">
<author>Randall, Cynthia</author>
<title>Lover Birds</title>
<genre>Romance</genre>
<price>4.95</price>
<publish_date>2000-09-02</publish_date>
<description>When Carla meets Paul at an ornithology
conference, tempers fly as feathers get ruffled.</description>
</book>
<book id="bk107">
<author>Thurman, Paula</author>
<title>Splish Splash</title>
<genre>Romance</genre>
<price>4.95</price>
<publish_date>2000-11-02</publish_date>
<description>A deep sea diver finds true love twenty
thousand leagues beneath the sea.</description>
</book>
<book id="bk108">
<author>Knorr, Stefan</author>
<title>Creepy Crawlies</title>
<genre>Horror</genre>
<price>4.95</price>
<publish_date>2000-12-06</publish_date>
<description>An anthology of horror stories about roaches,
centipedes, scorpions and other insects.</description>
</book>
<book id="bk109">
<author>Kress, Peter</author>
<title>Paradox Lost</title>
<genre>Science Fiction</genre>
<price>6.95</price>
<publish_date>2000-11-02</publish_date>
<description>After an inadvertant trip through a Heisenberg
Uncertainty Device, James Salway discovers the problems
of being quantum.</description>
</book>
<book id="bk110">
<author>O'Brien, Tim</author>
<title>Microsoft .NET: The Programming Bible</title>
<genre>Computer</genre>
<price>36.95</price>
<publish_date>2000-12-09</publish_date>
<description>Microsoft's .NET initiative is explored in
detail in this deep programmer's reference.</description>
</book>
<book id="bk111">
<author>O'Brien, Tim</author>
<title>MSXML3: A Comprehensive Guide</title>
<genre>Computer</genre>
<price>36.95</price>
<publish_date>2000-12-01</publish_date>
<description>The Microsoft MSXML3 parser is covered in
detail, with attention to XML DOM interfaces, XSLT processing,
SAX and more.</description>
</book>
<book id="bk112">
<author>Galos, Mike</author>
<title>Visual Studio 7: A Comprehensive Guide</title>
<genre>Computer</genre>
<price>49.95</price>
<publish_date>2001-04-16</publish_date>
<description>Microsoft Visual Studio 7 is explored in depth,
looking at how Visual Basic, Visual C++, C#, and ASP+ are
integrated into a comprehensive development
environment.</description>
</book>
</catalog>

View file

@ -0,0 +1,102 @@
<?xml version="1.0"?>
<!--
This is a sample XML file.
-->
<catalog>
<book id="bk101">
<author>Gambardella, Matthew</author>
<title>XML Developer's Guide</title>
<genre>Computer</genre>
<price>44.95</price>
<publish_date>2000-10-01</publish_date>
<description>An in-depth look at creating applications with XML.</description>
</book>
<book id="bk102">
<author>Ralls, Kim</author>
<title>Midnight Rain</title>
<genre>Fantasy</genre>
<price>5.95</price>
<publish_date>2000-12-16</publish_date>
<description>A former architect battles corporate zombies, an evil sorceress, and her own childhood to become queen of the world.</description>
</book>
<book id="bk103">
<author>Corets, Eva</author>
<title>Maeve Ascendant</title>
<genre>Fantasy</genre>
<price>5.95</price>
<publish_date>2000-11-17</publish_date>
<description>After the collapse of a nanotechnology society in England, the young survivors lay the foundation for a new society.</description>
</book>
<book id="bk104">
<author>Corets, Eva</author>
<title>Oberon's Legacy</title>
<genre>Fantasy</genre>
<price>5.95</price>
<publish_date>2001-03-10</publish_date>
<description>In post-apocalypse England, the mysterious agent known only as Oberon helps to create a new life for the inhabitants of London. Sequel to Maeve Ascendant.</description>
</book>
<book id="bk105">
<author>Corets, Eva</author>
<title>The Sundered Grail</title>
<genre>Fantasy</genre>
<price>5.95</price>
<publish_date>2001-09-10</publish_date>
<description>The two daughters of Maeve, half-sisters, battle one another for control of England. Sequel to Oberon's Legacy.</description>
</book>
<book id="bk106">
<author>Randall, Cynthia</author>
<title>Lover Birds</title>
<genre>Romance</genre>
<price>4.95</price>
<publish_date>2000-09-02</publish_date>
<description>When Carla meets Paul at an ornithology conference, tempers fly as feathers get ruffled.</description>
</book>
<book id="bk107">
<author>Thurman, Paula</author>
<title>Splish Splash</title>
<genre>Romance</genre>
<price>4.95</price>
<publish_date>2000-11-02</publish_date>
<description>A deep sea diver finds true love twenty thousand leagues beneath the sea.</description>
</book>
<book id="bk108">
<author>Knorr, Stefan</author>
<title>Creepy Crawlies</title>
<genre>Horror</genre>
<price>4.95</price>
<publish_date>2000-12-06</publish_date>
<description>An anthology of horror stories about roaches, centipedes, scorpions and other insects.</description>
</book>
<book id="bk109">
<author>Kress, Peter</author>
<title>Paradox Lost</title>
<genre>Science Fiction</genre>
<price>6.95</price>
<publish_date>2000-11-02</publish_date>
<description>After an inadvertant trip through a Heisenberg Uncertainty Device, James Salway discovers the problems of being quantum.</description>
</book>
<book id="bk110">
<author>O'Brien, Tim</author>
<title>Microsoft .NET: The Programming Bible</title>
<genre>Computer</genre>
<price>36.95</price>
<publish_date>2000-12-09</publish_date>
<description>Microsoft's .NET initiative is explored in detail in this deep programmer's reference.</description>
</book>
<book id="bk111">
<author>O'Brien, Tim</author>
<title>MSXML3: A Comprehensive Guide</title>
<genre>Computer</genre>
<price>36.95</price>
<publish_date>2000-12-01</publish_date>
<description>The Microsoft MSXML3 parser is covered in detail, with attention to XML DOM interfaces, XSLT processing, SAX and more.</description>
</book>
<book id="bk112">
<author>Galos, Mike</author>
<title>Visual Studio 7: A Comprehensive Guide</title>
<genre>Computer</genre>
<price>49.95</price>
<publish_date>2001-04-16</publish_date>
<description>Microsoft Visual Studio 7 is explored in depth, looking at how Visual Basic, Visual C++, C#, and ASP+ are integrated into a comprehensive development environment.</description>
</book>
</catalog>

View file

@ -1,3 +0,0 @@
// Tidy configuration file for bug #480406
input-xml: yes
output-xml: yes

View file

@ -1,4 +0,0 @@
<?xml version="1.0"?>
<!-- [ #480406 ] Single document element discarded - use "-xml" on command line -->
<test />

View file

@ -1,10 +0,0 @@
tidy-mark: no
output-xml: yes
drop-proprietary-attributes: no
new-inline-tags: o:lock, o:p, v-f, v-formula, v-formulas,
v-imagedata, v-path, v-shape, v-shapetype, v-stroke
new-empty-tags:
new-blocklevel-tags:
new-pre-tags:
wrap-sections: no
drop-empty-paras: no

View file

@ -1,9 +0,0 @@
<html>
<head>
<title>[ 634889 ] Problem with &lt;o:p&gt; ms word tag</title>
</head>
<body>
<p>Probably OK, now that ParseTagNames() is fixed.</p>
<o:p></o:p>
</body>
</html>

View file

@ -1,6 +0,0 @@
<?xml version="1.0" standalone="yes"?>
<!-- [ 646946 ] Bad doctype guessing in XML mode -->
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN"
"http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd">
<svg width="1800" height="1500">
</svg>

Before

Width:  |  Height:  |  Size: 247 B

View file

@ -1,3 +0,0 @@
<?xml version="1.0"?>
<!-- [ #480406 ] Single document element discarded - use "-xml" on command line -->
<test />

View file

@ -1,9 +0,0 @@
<html>
<head>
<title>[ 634889 ] Problem with &lt;o:p&gt; ms word tag</title>
</head>
<body>
<p>Probably OK, now that ParseTagNames() is fixed.</p>
<o:p></o:p>
</body>
</html>

View file

@ -1,15 +0,0 @@
line 1 column 1 - Warning: missing <!DOCTYPE> declaration
line 7 column 3 - Warning: <o:p> is not approved by W3C
Info: Document content looks like XHTML5
Tidy found 2 warnings and 0 errors!
About HTML Tidy: https://github.com/htacg/tidy-html5
Bug reports and comments: https://github.com/htacg/tidy-html5/issues
Official mailing list: https://lists.w3.org/Archives/Public/public-htacg/
Latest HTML specification: https://html.spec.whatwg.org/multipage/
Validate your HTML documents: https://validator.w3.org/nu/
Lobby your company to join the W3C: https://www.w3.org/Consortium
Do you speak a language other than English, or a different variant of
English? Consider helping us to localize HTML Tidy. For details please see
https://github.com/htacg/tidy-html5/blob/master/README/LOCALIZE.md

View file

@ -1,12 +0,0 @@
No warnings or errors were found.
About HTML Tidy: https://github.com/htacg/tidy-html5
Bug reports and comments: https://github.com/htacg/tidy-html5/issues
Official mailing list: https://lists.w3.org/Archives/Public/public-htacg/
Latest HTML specification: https://html.spec.whatwg.org/multipage/
Validate your HTML documents: https://validator.w3.org/nu/
Lobby your company to join the W3C: https://www.w3.org/Consortium
Do you speak a language other than English, or a different variant of
English? Consider helping us to localize HTML Tidy. For details please see
https://github.com/htacg/tidy-html5/blob/master/README/LOCALIZE.md

View file

@ -1,5 +0,0 @@
<?xml version="1.0" standalone="yes"?>
<!-- [ 646946 ] Bad doctype guessing in XML mode -->
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN"
"http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd">
<svg width="1800" height="1500"></svg>

Before

Width:  |  Height:  |  Size: 238 B

View file

@ -28,6 +28,14 @@
#define showingBodyOnly(doc) (cfgAutoBool(doc,TidyBodyOnly) == TidyYesState) ? yes : no
/****************************************************************************//*
** MARK: - Forward Declarations
***************************************************************************/
static Node* ParseXMLElement(TidyDocImpl* doc, Node *element, GetTokenMode mode);
/****************************************************************************//*
** MARK: - Node Operations
***************************************************************************/
@ -858,7 +866,7 @@ static void growParserStack( TidyDocImpl* doc )
/**
* Indicates whether or not the stack is empty.
*/
static inline Bool isEmptyParserStack( TidyDocImpl* doc )
Bool TY_(isEmptyParserStack)( TidyDocImpl* doc )
{
return doc->stack.top < 0;
}
@ -867,7 +875,7 @@ static inline Bool isEmptyParserStack( TidyDocImpl* doc )
/**
* Peek at the parser memory.
*/
static inline FUNC_UNUSED TidyParserMemory peekMemory( TidyDocImpl* doc )
TidyParserMemory TY_(peekMemory)( TidyDocImpl* doc )
{
return doc->stack.content[doc->stack.top];
}
@ -877,7 +885,7 @@ static inline FUNC_UNUSED TidyParserMemory peekMemory( TidyDocImpl* doc )
* Peek at the parser memory "identity" field. This is just a convenience
* to avoid having to create a new struct instance in the caller.
*/
static inline Parser* peekMemoryIdentity( TidyDocImpl* doc )
Parser* TY_(peekMemoryIdentity)( TidyDocImpl* doc )
{
return doc->stack.content[doc->stack.top].identity;
}
@ -887,7 +895,7 @@ static inline Parser* peekMemoryIdentity( TidyDocImpl* doc )
* Peek at the parser memory "mode" field. This is just a convenience
* to avoid having to create a new struct instance in the caller.
*/
static GetTokenMode inline peekMemoryMode( TidyDocImpl* doc )
GetTokenMode TY_(peekMemoryMode)( TidyDocImpl* doc )
{
return doc->stack.content[doc->stack.top].mode;
}
@ -896,12 +904,23 @@ static GetTokenMode inline peekMemoryMode( TidyDocImpl* doc )
/**
* Pop out a parser memory.
*/
static TidyParserMemory popMemory( TidyDocImpl* doc )
TidyParserMemory TY_(popMemory)( TidyDocImpl* doc )
{
if ( !isEmptyParserStack( doc ) )
if ( !TY_(isEmptyParserStack)( doc ) )
{
TidyParserMemory data = doc->stack.content[doc->stack.top];
DEBUG_LOG(SPRTF("\n<--POP %s pointed to is %p,\t memory is %p (size is %lu), depth is %i\n", data.reentry_node ? data.reentry_node->element : NULL, data.reentry_node, &doc->stack.content[doc->stack.top], sizeof(TidyParserMemory), doc->stack.top - 1 ));
DEBUG_LOG(SPRTF("\n"
"<--POP original: %s @ %p\n"
" reentry: %s @ %p\n"
" stack depth: %lu @ %p\n"
" register 1: %i\n"
" register 2: %i\n\n",
data.original_node ? data.original_node->element : "none", data.original_node,
data.reentry_node ? data.reentry_node->element : "none", data.reentry_node,
doc->stack.top, &doc->stack.content[doc->stack.top],
data.register_1,
data.register_2
));
doc->stack.top = doc->stack.top - 1;
return data;
}
@ -913,7 +932,7 @@ static TidyParserMemory popMemory( TidyDocImpl* doc )
/**
* Push the parser memory to the stack.
*/
static void pushMemory( TidyDocImpl* doc, TidyParserMemory data )
void TY_(pushMemory)( TidyDocImpl* doc, TidyParserMemory data )
{
if ( doc->stack.top == doc->stack.size - 1 )
growParserStack( doc );
@ -921,7 +940,18 @@ static void pushMemory( TidyDocImpl* doc, TidyParserMemory data )
doc->stack.top++;
doc->stack.content[doc->stack.top] = data;
DEBUG_LOG(SPRTF("\n-->PUSH %s pointed to is %p,\t memory is %p (size is %lu), depth is %i\n", data.reentry_node ? data.reentry_node->element : NULL, data.reentry_node, &doc->stack.content[doc->stack.top], sizeof(TidyParserMemory), doc->stack.top ));
DEBUG_LOG(SPRTF("\n"
"-->PUSH original: %s @ %p\n"
" reentry: %s @ %p\n"
" stack depth: %lu @ %p\n"
" register 1: %i\n"
" register 2: %i\n\n",
data.original_node ? data.original_node->element : "none", data.original_node,
data.reentry_node ? data.reentry_node->element : "none", data.reentry_node,
doc->stack.top, &doc->stack.content[doc->stack.top],
data.register_1,
data.register_2
));
}
@ -938,6 +968,9 @@ static Parser* GetParserForNode( TidyDocImpl* doc, Node *node )
{
Lexer* lexer = doc->lexer;
if ( cfgBool( doc, TidyXmlTags ) )
return ParseXMLElement;
/* [i_a]2 prevent crash for active content (php, asp) docs */
if (!node || node->tag == NULL)
return NULL;
@ -1008,9 +1041,9 @@ void ParseHTMLWithNode( TidyDocImpl* doc, Node* node )
We weren't given a node, which means this particular leaf is bottomed
out. We'll re-enter the parsers using information from the stack.
*/
if ( !isEmptyParserStack(doc))
if ( !TY_(isEmptyParserStack)(doc))
{
parser = peekMemoryIdentity(doc);
parser = TY_(peekMemoryIdentity)(doc);
if (parser)
{
continue;
@ -1018,8 +1051,8 @@ void ParseHTMLWithNode( TidyDocImpl* doc, Node* node )
else
{
/* No parser means we're only passing back a parsing mode. */
mode = peekMemoryMode( doc );
popMemory( doc );
mode = TY_(peekMemoryMode)( doc );
TY_(popMemory)( doc );
}
}
@ -1065,7 +1098,7 @@ Node* TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
if ( element == NULL )
{
TidyParserMemory memory = popMemory( doc );
TidyParserMemory memory = TY_(popMemory)( doc );
node = memory.reentry_node; /* Throwaway, because the loop overwrites this immediately. */
mode = memory.reentry_mode;
element = memory.original_node;
@ -1563,7 +1596,7 @@ Node* TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
memory.reentry_node = node;
memory.reentry_mode = mode;
memory.original_node = element;
pushMemory(doc, memory);
TY_(pushMemory)(doc, memory);
DEBUG_LOG(SPRTF("<<<Leave ParseBlock to return node %s\n", node->element));
}
return node;
@ -1621,11 +1654,11 @@ Node* TY_(ParseBody)( TidyDocImpl* doc, Node *body, GetTokenMode mode )
*/
if ( body == NULL )
{
TidyParserMemory memory = popMemory( doc );
TidyParserMemory memory = TY_(popMemory)( doc );
node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */
body = memory.original_node;
checkstack = memory.register_b_1;
iswhitenode = memory.register_b_2;
checkstack = memory.register_1;
iswhitenode = memory.register_2;
mode = memory.mode;
DEBUG_LOG(SPRTF(">>>Re-Enter ParseBody with %s\n", node->element));
}
@ -1691,10 +1724,10 @@ Node* TY_(ParseBody)( TidyDocImpl* doc, Node *body, GetTokenMode mode )
memory.identity = TY_(ParseBody);
memory.original_node = body;
memory.reentry_node = node;
memory.register_b_1 = checkstack;
memory.register_b_2 = iswhitenode;
memory.register_1 = checkstack;
memory.register_2 = iswhitenode;
memory.mode = mode;
pushMemory( doc, memory );
TY_(pushMemory)( doc, memory );
return node;
}
@ -1907,10 +1940,10 @@ Node* TY_(ParseBody)( TidyDocImpl* doc, Node *body, GetTokenMode mode )
memory.identity = TY_(ParseBody);
memory.original_node = body;
memory.reentry_node = node;
memory.register_b_1 = checkstack;
memory.register_b_2 = iswhitenode;
memory.register_1 = checkstack;
memory.register_2 = iswhitenode;
memory.mode = mode;
pushMemory( doc, memory );
TY_(pushMemory)( doc, memory );
}
DEBUG_LOG(SPRTF("<<<Exiting ParseBody with a node to parse: %s\n", node->element));
return node;
@ -1944,7 +1977,7 @@ Node* TY_(ParseColGroup)( TidyDocImpl* doc, Node *colgroup, GetTokenMode ARG_UNU
*/
if ( colgroup == NULL )
{
TidyParserMemory memory = popMemory( doc );
TidyParserMemory memory = TY_(popMemory)( doc );
node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */
colgroup = memory.original_node;
mode = memory.mode;
@ -2034,7 +2067,7 @@ Node* TY_(ParseColGroup)( TidyDocImpl* doc, Node *colgroup, GetTokenMode ARG_UNU
memory.original_node = colgroup;
memory.reentry_node = node;
memory.mode = mode;
pushMemory( doc, memory );
TY_(pushMemory)( doc, memory );
}
DEBUG_LOG(SPRTF("<<<Exiting ParseColGroup with a node to parse: %s\n", node->element));
return node;
@ -2061,7 +2094,7 @@ Node* TY_(ParseDatalist)( TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED
if ( field == NULL )
{
TidyParserMemory memory = popMemory( doc );
TidyParserMemory memory = TY_(popMemory)( doc );
field = memory.original_node;
node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */
DEBUG_LOG(SPRTF(">>>Re-Enter ParseDataList with %s\n", node->element));
@ -2103,7 +2136,7 @@ Node* TY_(ParseDatalist)( TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED
memory.reentry_mode = IgnoreWhitespace;
TY_(InsertNodeAtEnd)(field, node);
pushMemory(doc, memory);
TY_(pushMemory)(doc, memory);
return node;
}
@ -2144,7 +2177,7 @@ Node* TY_(ParseDefList)( TidyDocImpl* doc, Node *list, GetTokenMode mode )
if ( list == NULL )
{
TidyParserMemory memory = popMemory( doc );
TidyParserMemory memory = TY_(popMemory)( doc );
list = memory.original_node;
node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */
state = memory.reentry_state;
@ -2272,7 +2305,7 @@ Node* TY_(ParseDefList)( TidyDocImpl* doc, Node *list, GetTokenMode mode )
memory.original_node = list;
memory.reentry_node = node;
memory.reentry_state = STATE_POST_NODEISCENTER;
pushMemory( doc, memory );
TY_(pushMemory)( doc, memory );
DEBUG_LOG(SPRTF("<<<Exiting ParseDefList 3 with a node to parse: %s\n", node->element));
return node;
}
@ -2315,7 +2348,7 @@ Node* TY_(ParseDefList)( TidyDocImpl* doc, Node *list, GetTokenMode mode )
memory.original_node = list;
memory.reentry_node = node;
memory.reentry_state = STATE_INITIAL;
pushMemory( doc, memory );
TY_(pushMemory)( doc, memory );
DEBUG_LOG(SPRTF("<<<Exiting ParseDefList 4 with a node to parse: %s\n", node->element));
return node;
}
@ -2402,7 +2435,7 @@ Node* TY_(ParseFrameSet)( TidyDocImpl* doc, Node *frameset, GetTokenMode ARG_UNU
*/
if ( frameset == NULL )
{
TidyParserMemory memory = popMemory( doc );
TidyParserMemory memory = TY_(popMemory)( doc );
node = memory.reentry_node; /* Throwaway, because we replace it entering the loop. */
frameset = memory.original_node;
DEBUG_LOG(SPRTF(">>>Re-Enter ParseFrameSet with %s\n", node->element));
@ -2468,7 +2501,7 @@ Node* TY_(ParseFrameSet)( TidyDocImpl* doc, Node *frameset, GetTokenMode ARG_UNU
memory.original_node = frameset;
memory.reentry_node = node;
memory.mode = MixedContent;
pushMemory( doc, memory );
TY_(pushMemory)( doc, memory );
DEBUG_LOG(SPRTF("<<<Exiting ParseFrameSet with a node to parse: %s\n", node->element));
return node;
}
@ -2509,11 +2542,11 @@ Node* TY_(ParseHead)( TidyDocImpl* doc, Node *head, GetTokenMode ARG_UNUSED(mode
if ( head == NULL )
{
TidyParserMemory memory = popMemory( doc );
TidyParserMemory memory = TY_(popMemory)( doc );
head = memory.original_node;
node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */
HasTitle = memory.register_b_1;
HasBase = memory.register_b_2;
HasTitle = memory.register_1;
HasBase = memory.register_2;
DEBUG_LOG(SPRTF(">>>Re-Enter ParseHead with %s\n", node->element));
}
else
@ -2622,9 +2655,9 @@ Node* TY_(ParseHead)( TidyDocImpl* doc, Node *head, GetTokenMode ARG_UNUSED(mode
memory.identity = TY_(ParseHead);
memory.original_node = head;
memory.reentry_node = node;
memory.register_b_1 = HasTitle;
memory.register_b_2 = HasBase;
pushMemory( doc, memory );
memory.register_1 = HasTitle;
memory.register_2 = HasBase;
TY_(pushMemory)( doc, memory );
DEBUG_LOG(SPRTF("<<<Exiting ParseHead with a node to parse: %s\n", node->element));
return node;
}
@ -2684,7 +2717,7 @@ Node* TY_(ParseHTML)( TidyDocImpl *doc, Node *html, GetTokenMode mode )
*/
if ( html == NULL )
{
TidyParserMemory memory = popMemory( doc );
TidyParserMemory memory = TY_(popMemory)( doc );
node = memory.reentry_node;
mode = memory.reentry_mode;
state = memory.reentry_state;
@ -2956,7 +2989,7 @@ Node* TY_(ParseHTML)( TidyDocImpl *doc, Node *html, GetTokenMode mode )
memory.reentry_mode = mode;
memory.reentry_state = STATE_PARSE_HEAD_REENTER;
TY_(InsertNodeAtEnd)(html, node);
pushMemory( doc, memory );
TY_(pushMemory)( doc, memory );
DEBUG_LOG(SPRTF("<<<Exiting ParseHTML at STATE_PARSE_HEAD, count: %d, depth %d\n", parser_count, --parser_depth));
return node;
} break;
@ -2981,7 +3014,7 @@ Node* TY_(ParseHTML)( TidyDocImpl *doc, Node *html, GetTokenMode mode )
memory.reentry_mode = mode;
memory.reentry_state = STATE_COMPLETE;
TY_(InsertNodeAtEnd)(html, node);
pushMemory( doc, memory );
TY_(pushMemory)( doc, memory );
DEBUG_LOG(SPRTF("<<<Exiting ParseHTML at STATE_PARSE_BODY, count: %d, depth %d\n", parser_count, --parser_depth));
return node;
} break;
@ -3000,7 +3033,7 @@ Node* TY_(ParseHTML)( TidyDocImpl *doc, Node *html, GetTokenMode mode )
memory.reentry_node = frameset;
memory.reentry_mode = mode;
memory.reentry_state = STATE_PARSE_NOFRAMES_REENTER;
pushMemory( doc, memory );
TY_(pushMemory)( doc, memory );
DEBUG_LOG(SPRTF("<<<Exiting ParseHTML at STATE_PARSE_NOFRAMES, count: %d, depth %d\n", parser_count, --parser_depth));
return noframes;
} break;
@ -3026,7 +3059,7 @@ Node* TY_(ParseHTML)( TidyDocImpl *doc, Node *html, GetTokenMode mode )
memory.reentry_mode = mode;
memory.reentry_state = STATE_PARSE_FRAMESET_REENTER;
TY_(InsertNodeAtEnd)(html, node);
pushMemory( doc, memory );
TY_(pushMemory)( doc, memory );
DEBUG_LOG(SPRTF("<<<Exiting ParseHTML at STATE_PARSE_FRAMESET, count: %d, depth %d\n", parser_count, --parser_depth));
return node;
} break;
@ -3081,7 +3114,7 @@ Node* TY_(ParseInline)( TidyDocImpl *doc, Node *element, GetTokenMode mode )
if ( element == NULL )
{
TidyParserMemory memory = popMemory( doc );
TidyParserMemory memory = TY_(popMemory)( doc );
element = memory.original_node;
node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */
mode = memory.reentry_mode;
@ -3651,7 +3684,7 @@ Node* TY_(ParseInline)( TidyDocImpl *doc, Node *element, GetTokenMode mode )
memory.reentry_node = node;
memory.mode = mode;
memory.reentry_mode = mode;
pushMemory( doc, memory );
TY_(pushMemory)( doc, memory );
DEBUG_LOG(SPRTF("<<<Exiting ParseInline 1 with a node to parse: %s\n", node->element));
return node;
}
@ -3693,7 +3726,7 @@ Node* TY_(ParseList)( TidyDocImpl* doc, Node *list, GetTokenMode ARG_UNUSED(mode
if ( list == NULL )
{
TidyParserMemory memory = popMemory( doc );
TidyParserMemory memory = TY_(popMemory)( doc );
list = memory.original_node;
node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */
DEBUG_LOG(SPRTF(">>>Re-Enter ParseList with %s\n", node->element));
@ -3859,7 +3892,7 @@ Node* TY_(ParseList)( TidyDocImpl* doc, Node *list, GetTokenMode ARG_UNUSED(mode
memory.original_node = list;
memory.reentry_node = node;
memory.mode = IgnoreWhitespace;
pushMemory( doc, memory );
TY_(pushMemory)( doc, memory );
DEBUG_LOG(SPRTF("<<<Exiting ParseList with a node to parse: %s\n", node->element));
return node;
}
@ -4041,11 +4074,11 @@ Node* TY_(ParseNoFrames)( TidyDocImpl* doc, Node *noframes, GetTokenMode mode )
*/
if ( noframes == NULL )
{
TidyParserMemory memory = popMemory( doc );
TidyParserMemory memory = TY_(popMemory)( doc );
node = memory.reentry_node; /* Throwaway, because we replace it entering the loop anyway.*/
noframes = memory.original_node;
state = memory.reentry_state;
body_seen = memory.register_b_1;
body_seen = memory.register_1;
DEBUG_LOG(SPRTF(">>>Re-Enter ParseNoFrames with %s\n", node->element));
}
else
@ -4123,11 +4156,11 @@ Node* TY_(ParseNoFrames)( TidyDocImpl* doc, Node *noframes, GetTokenMode mode )
memory.original_node = noframes;
memory.reentry_node = node;
memory.reentry_state = STATE_POST_NODEISBODY;
memory.register_b_1 = lexer->seenEndBody;
memory.register_1 = lexer->seenEndBody;
memory.mode = IgnoreWhitespace;
TY_(InsertNodeAtEnd)(noframes, node);
pushMemory( doc, memory );
TY_(pushMemory)( doc, memory );
DEBUG_LOG(SPRTF("<<<Exiting ParseNoFrames with a node to parse: %s\n", node->element));
return node;
}
@ -4168,7 +4201,7 @@ Node* TY_(ParseNoFrames)( TidyDocImpl* doc, Node *noframes, GetTokenMode mode )
memory.reentry_node = node;
memory.mode = IgnoreWhitespace; /*MixedContent*/
memory.reentry_state = STATE_INITIAL;
pushMemory( doc, memory );
TY_(pushMemory)( doc, memory );
DEBUG_LOG(SPRTF("<<<Exiting ParseNoFrames with a node to parse: %s\n", node->element));
return node;
}
@ -4220,7 +4253,7 @@ Node* TY_(ParseOptGroup)( TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED
if ( field == NULL )
{
TidyParserMemory memory = popMemory( doc );
TidyParserMemory memory = TY_(popMemory)( doc );
field = memory.original_node;
node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */
DEBUG_LOG(SPRTF(">>>Re-Enter ParseOptGroup with %s\n", node->element));
@ -4259,7 +4292,7 @@ Node* TY_(ParseOptGroup)( TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED
memory.identity = TY_(ParseOptGroup);
memory.original_node = field;
memory.reentry_node = node;
pushMemory( doc, memory );
TY_(pushMemory)( doc, memory );
DEBUG_LOG(SPRTF("<<<Exiting ParseOptGroup with a node to parse: %s\n", node->element));
return node;
}
@ -4293,7 +4326,7 @@ Node* TY_(ParsePre)( TidyDocImpl* doc, Node *pre, GetTokenMode ARG_UNUSED(mode)
if ( pre == NULL )
{
TidyParserMemory memory = popMemory( doc );
TidyParserMemory memory = TY_(popMemory)( doc );
pre = memory.original_node;
node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */
state = memory.reentry_state;
@ -4446,7 +4479,7 @@ Node* TY_(ParsePre)( TidyDocImpl* doc, Node *pre, GetTokenMode ARG_UNUSED(mode)
memory.original_node = pre;
memory.reentry_node = node;
memory.reentry_state = STATE_RENTRY_ACTION;
pushMemory( doc, memory );
TY_(pushMemory)( doc, memory );
DEBUG_LOG(SPRTF("<<<Exiting ParsePre with a node to parse: %s\n", node->element));
return node;
}
@ -4488,7 +4521,7 @@ Node* TY_(ParsePre)( TidyDocImpl* doc, Node *pre, GetTokenMode ARG_UNUSED(mode)
memory.original_node = pre;
memory.reentry_node = node;
memory.reentry_state = STATE_INITIAL;
pushMemory( doc, memory );
TY_(pushMemory)( doc, memory );
DEBUG_LOG(SPRTF("<<<Exiting ParsePre with a node to parse: %s\n", node->element));
return node;
}
@ -4548,11 +4581,11 @@ Node* TY_(ParseRow)( TidyDocImpl* doc, Node *row, GetTokenMode ARG_UNUSED(mode)
if ( row == NULL )
{
TidyParserMemory memory = popMemory( doc );
TidyParserMemory memory = TY_(popMemory)( doc );
row = memory.original_node;
node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */
state = memory.reentry_state;
exclude_state = memory.register_b_1;
exclude_state = memory.register_1;
DEBUG_LOG(SPRTF(">>>Re-Enter ParseRow with %s\n", node->element));
}
else
@ -4692,8 +4725,8 @@ Node* TY_(ParseRow)( TidyDocImpl* doc, Node *row, GetTokenMode ARG_UNUSED(mode)
memory.original_node = row;
memory.reentry_node = node;
memory.reentry_state = STATE_POST_NOT_ENDTAG;
memory.register_b_1 = exclude_state;
pushMemory( doc, memory );
memory.register_1 = exclude_state;
TY_(pushMemory)( doc, memory );
DEBUG_LOG(SPRTF("<<<Exiting ParseRow 1 with a node to parse: %s\n", node->element));
return node;
}
@ -4727,8 +4760,8 @@ Node* TY_(ParseRow)( TidyDocImpl* doc, Node *row, GetTokenMode ARG_UNUSED(mode)
memory.original_node = row;
memory.reentry_node = node;
memory.reentry_state = STATE_POST_TD_TH;
memory.register_b_1 = exclude_state;
pushMemory( doc, memory );
memory.register_1 = exclude_state;
TY_(pushMemory)( doc, memory );
DEBUG_LOG(SPRTF("<<<Exiting ParseRow 2 with a node to parse: %s\n", node->element));
return node;
}
@ -4792,7 +4825,7 @@ Node* TY_(ParseRowGroup)( TidyDocImpl* doc, Node *rowgroup, GetTokenMode ARG_UNU
if ( rowgroup == NULL )
{
TidyParserMemory memory = popMemory( doc );
TidyParserMemory memory = TY_(popMemory)( doc );
rowgroup = memory.original_node;
node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */
state = memory.reentry_state;
@ -4887,7 +4920,7 @@ Node* TY_(ParseRowGroup)( TidyDocImpl* doc, Node *rowgroup, GetTokenMode ARG_UNU
memory.original_node = rowgroup;
memory.reentry_node = node;
memory.reentry_state = STATE_POST_NOT_TEXTNODE;
pushMemory( doc, memory );
TY_(pushMemory)( doc, memory );
DEBUG_LOG(SPRTF("<<<Exiting ParseRowGroup 1 with a node to parse: %s\n", node->element));
return node;
}
@ -4973,7 +5006,7 @@ Node* TY_(ParseRowGroup)( TidyDocImpl* doc, Node *rowgroup, GetTokenMode ARG_UNU
memory.original_node = rowgroup;
memory.reentry_node = node;
memory.reentry_state = STATE_INITIAL;
pushMemory( doc, memory );
TY_(pushMemory)( doc, memory );
DEBUG_LOG(SPRTF("<<<Exiting ParseRowGroup 2 with a node to parse: %s\n", node->element));
return node;
} break;
@ -5067,7 +5100,7 @@ Node* TY_(ParseSelect)( TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(m
if ( field == NULL )
{
TidyParserMemory memory = popMemory( doc );
TidyParserMemory memory = TY_(popMemory)( doc );
field = memory.original_node;
node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */
DEBUG_LOG(SPRTF(">>>Re-Enter ParseSelect with %s\n", node->element));
@ -5108,7 +5141,7 @@ Node* TY_(ParseSelect)( TidyDocImpl* doc, Node *field, GetTokenMode ARG_UNUSED(m
memory.reentry_node = node;
TY_(InsertNodeAtEnd)(field, node);
pushMemory( doc, memory );
TY_(pushMemory)( doc, memory );
DEBUG_LOG(SPRTF("<<<Exiting ParseSelect with a node to parse: %s\n", node->element));
return node;
}
@ -5144,10 +5177,10 @@ Node* TY_(ParseTableTag)( TidyDocImpl* doc, Node *table, GetTokenMode ARG_UNUSED
if ( table == NULL )
{
TidyParserMemory memory = popMemory( doc );
TidyParserMemory memory = TY_(popMemory)( doc );
node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */
table = memory.original_node;
lexer->exiled = memory.register_b_1;
lexer->exiled = memory.register_1;
DEBUG_LOG(SPRTF(">>>Re-Enter ParseTableTag with %s\n", node->element));
}
else
@ -5219,9 +5252,9 @@ Node* TY_(ParseTableTag)( TidyDocImpl* doc, Node *table, GetTokenMode ARG_UNUSED
memory.identity = TY_(ParseTableTag);
memory.original_node = table;
memory.reentry_node = node;
memory.register_b_1 = no; /* later, lexer->exiled = no */
memory.register_1 = no; /* later, lexer->exiled = no */
memory.mode = IgnoreWhitespace;
pushMemory( doc, memory );
TY_(pushMemory)( doc, memory );
DEBUG_LOG(SPRTF("<<<Exiting ParseTableTag with a node to parse: %s\n", node->element));
return node;
}
@ -5292,8 +5325,8 @@ Node* TY_(ParseTableTag)( TidyDocImpl* doc, Node *table, GetTokenMode ARG_UNUSED
memory.identity = TY_(ParseTableTag);
memory.original_node = table;
memory.reentry_node = node;
memory.register_b_1 = lexer->exiled;
pushMemory( doc, memory );
memory.register_1 = lexer->exiled;
TY_(pushMemory)( doc, memory );
DEBUG_LOG(SPRTF("<<<Exiting ParseTableTag with a node to parse: %s\n", node->element));
return node;
}
@ -5457,6 +5490,116 @@ Node* TY_(ParseTitle)( TidyDocImpl* doc, Node *title, GetTokenMode ARG_UNUSED(mo
}
/** MARK: ParseXMLElement
* Parses the given XML element.
*/
static Node* ParseXMLElement(TidyDocImpl* doc, Node *element, GetTokenMode mode)
{
Lexer* lexer = doc->lexer;
Node *node;
if ( element == NULL )
{
TidyParserMemory memory = TY_(popMemory)( doc );
element = memory.original_node;
node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */
mode = memory.reentry_mode;
TY_(InsertNodeAtEnd)(element, node); /* The only re-entry action needed. */
}
else
{
/* if node is pre or has xml:space="preserve" then do so */
if ( TY_(XMLPreserveWhiteSpace)(doc, element) )
mode = Preformatted;
/* deal with comments etc. */
InsertMisc( &doc->root, element);
/* we shouldn't have plain text at this point. */
if (TY_(nodeIsText)(element))
{
TY_(Report)(doc, &doc->root, element, DISCARDING_UNEXPECTED);
TY_(FreeNode)( doc, element);
return NULL;
}
}
while ((node = TY_(GetToken)(doc, mode)) != NULL)
{
if (node->type == EndTag &&
node->element && element->element &&
TY_(tmbstrcmp)(node->element, element->element) == 0)
{
TY_(FreeNode)( doc, node);
element->closed = yes;
break;
}
/* discard unexpected end tags */
if (node->type == EndTag)
{
if (element)
TY_(Report)(doc, element, node, UNEXPECTED_ENDTAG_IN);
else
TY_(Report)(doc, element, node, UNEXPECTED_ENDTAG_ERR);
TY_(FreeNode)( doc, node);
continue;
}
/* parse content on seeing start tag */
if (node->type == StartTag)
{
TidyParserMemory memory = {0};
memory.identity = ParseXMLElement;
memory.original_node = element;
memory.reentry_node = node;
memory.reentry_mode = mode;
TY_(pushMemory)( doc, memory );
return node;
}
TY_(InsertNodeAtEnd)(element, node);
} /* while */
/*
if first child is text then trim initial space and
delete text node if it is empty.
*/
node = element->content;
if (TY_(nodeIsText)(node) && mode != Preformatted)
{
if ( lexer->lexbuf[node->start] == ' ' )
{
node->start++;
if (node->start >= node->end)
TY_(DiscardElement)( doc, node );
}
}
/*
if last child is text then trim final space and
delete the text node if it is empty
*/
node = element->last;
if (TY_(nodeIsText)(node) && mode != Preformatted)
{
if ( lexer->lexbuf[node->end - 1] == ' ' )
{
node->end--;
if (node->start >= node->end)
TY_(DiscardElement)( doc, node );
}
}
return NULL;
}
/***************************************************************************//*
** MARK: - Post-Parse Operations
***************************************************************************/
@ -6101,87 +6244,6 @@ void TY_(ParseDocument)(TidyDocImpl* doc)
}
/** MARK: TY_(ParseXMLElement)
* Parses the given XML element.
*/
static void ParseXMLElement(TidyDocImpl* doc, Node *element, GetTokenMode mode)
{
Lexer* lexer = doc->lexer;
Node *node;
/* if node is pre or has xml:space="preserve" then do so */
if ( TY_(XMLPreserveWhiteSpace)(doc, element) )
mode = Preformatted;
while ((node = TY_(GetToken)(doc, mode)) != NULL)
{
if (node->type == EndTag &&
node->element && element->element &&
TY_(tmbstrcmp)(node->element, element->element) == 0)
{
TY_(FreeNode)( doc, node);
element->closed = yes;
break;
}
/* discard unexpected end tags */
if (node->type == EndTag)
{
if (element)
TY_(Report)(doc, element, node, UNEXPECTED_ENDTAG_IN);
else
TY_(Report)(doc, element, node, UNEXPECTED_ENDTAG_ERR);
TY_(FreeNode)( doc, node);
continue;
}
/* parse content on seeing start tag */
if (node->type == StartTag)
ParseXMLElement( doc, node, mode );
TY_(InsertNodeAtEnd)(element, node);
}
/*
if first child is text then trim initial space and
delete text node if it is empty.
*/
node = element->content;
if (TY_(nodeIsText)(node) && mode != Preformatted)
{
if ( lexer->lexbuf[node->start] == ' ' )
{
node->start++;
if (node->start >= node->end)
TY_(DiscardElement)( doc, node );
}
}
/*
if last child is text then trim final space and
delete the text node if it is empty
*/
node = element->last;
if (TY_(nodeIsText)(node) && mode != Preformatted)
{
if ( lexer->lexbuf[node->end - 1] == ' ' )
{
node->end--;
if (node->start >= node->end)
TY_(DiscardElement)( doc, node );
}
}
}
/** MARK: TY_(ParseXMLDocument)
* Parses the document using Tidy's XML parser.
*/
@ -6232,7 +6294,7 @@ void TY_(ParseXMLDocument)(TidyDocImpl* doc)
if (node->type == StartTag)
{
TY_(InsertNodeAtEnd)( &doc->root, node );
ParseXMLElement( doc, node, IgnoreWhitespace );
ParseHTMLWithNode( doc, node );
continue;
}

View file

@ -55,8 +55,8 @@ typedef struct _TidyParserMemory
GetTokenMode reentry_mode; /**< The token mode to use when re-entering. */
int reentry_state; /**< State to set during re-entry. Defined locally in each parser. */
GetTokenMode mode; /**< The caller will peek at this value to get the correct mode. */
Bool register_b_1; /**< Local variable storage. */
Bool register_b_2; /**< Local variable storage. */
int register_1; /**< Local variable storage. */
int register_2; /**< Local variable storage. */
} TidyParserMemory;
@ -86,6 +86,44 @@ void TY_(InitParserStack)( TidyDocImpl* doc );
void TY_(FreeParserStack)( TidyDocImpl* doc );
/**
* Indicates whether or not the stack is empty.
*/
Bool TY_(isEmptyParserStack)( TidyDocImpl* doc );
/**
* Peek at the parser memory.
*/
TidyParserMemory TY_(peekMemory)( TidyDocImpl* doc );
/**
* Peek at the parser memory "identity" field. This is just a convenience
* to avoid having to create a new struct instance in the caller.
*/
Parser* TY_(peekMemoryIdentity)( TidyDocImpl* doc );
/**
* Peek at the parser memory "mode" field. This is just a convenience
* to avoid having to create a new struct instance in the caller.
*/
GetTokenMode TY_(peekMemoryMode)( TidyDocImpl* doc );
/**
* Pop out a parser memory.
*/
TidyParserMemory TY_(popMemory)( TidyDocImpl* doc );
/**
* Push the parser memory to the stack.
*/
void TY_(pushMemory)( TidyDocImpl* doc, TidyParserMemory data );
/**
* Is used to perform a node integrity check recursively after parsing
* an HTML or XML document.

View file

@ -16,6 +16,7 @@
#include "entities.h"
#include "tmbstr.h"
#include "utf8.h"
#include "sprtf.h"
/* *** FOR DEBUG ONLY *** */
/* #define DEBUG_PPRINT */
@ -2330,102 +2331,152 @@ void TY_(PPrintTree)( TidyDocImpl* doc, uint mode, uint indent, Node *node )
}
}
void TY_(PPrintXMLTree)( TidyDocImpl* doc, uint mode, uint indent, Node *node )
{
Bool xhtmlOut = cfgBool( doc, TidyXhtmlOut );
if (node == NULL)
return;
Node* next = NULL;
if (doc->progressCallback)
while ( node )
{
doc->progressCallback( tidyImplToDoc(doc), node->line, node->column, doc->pprint.line + 1 );
}
next = node->next;
if ( node->type == TextNode)
{
PPrintText( doc, mode, indent, node );
}
else if ( node->type == CommentTag )
{
PCondFlushLineSmart( doc, indent );
PPrintComment( doc, indent, node);
/* PCondFlushLine( doc, 0 ); */
}
else if ( node->type == RootNode )
{
Node *content;
for ( content = node->content;
content != NULL;
content = content->next )
TY_(PPrintXMLTree)( doc, mode, indent, content );
}
else if ( node->type == DocTypeTag )
PPrintDocType( doc, indent, node );
else if ( node->type == ProcInsTag )
PPrintPI( doc, indent, node );
else if ( node->type == XmlDecl )
PPrintXmlDecl( doc, indent, node );
else if ( node->type == CDATATag )
PPrintCDATA( doc, indent, node );
else if ( node->type == SectionTag )
PPrintSection( doc, indent, node );
else if ( node->type == AspTag )
PPrintAsp( doc, indent, node );
else if ( node->type == JsteTag)
PPrintJste( doc, indent, node );
else if ( node->type == PhpTag)
PPrintPhp( doc, indent, node );
else if ( TY_(nodeHasCM)(node, CM_EMPTY) ||
(node->type == StartEndTag && !xhtmlOut) )
{
PCondFlushLineSmart( doc, indent );
PPrintTag( doc, mode, indent, node );
/* TY_(PFlushLine)( doc, indent ); */
}
else /* some kind of container element */
{
uint spaces = cfg( doc, TidyIndentSpaces );
Node *content;
Bool mixed = no;
uint cindent;
for ( content = node->content; content; content = content->next )
if (doc->progressCallback)
{
if ( TY_(nodeIsText)(content) )
doc->progressCallback( tidyImplToDoc(doc), node->line, node->column, doc->pprint.line + 1 );
}
if ( node->type == TextNode)
{
PPrintText( doc, mode, indent, node );
}
else if ( node->type == RootNode )
{
if (node->content)
node = node->content;
continue;
}
else if ( node->type == CommentTag )
{
PCondFlushLineSmart( doc, indent );
PPrintComment( doc, indent, node);
/* PCondFlushLine( doc, 0 ); */
}
else if ( node->type == DocTypeTag )
PPrintDocType( doc, indent, node );
else if ( node->type == ProcInsTag )
PPrintPI( doc, indent, node );
else if ( node->type == XmlDecl )
PPrintXmlDecl( doc, indent, node );
else if ( node->type == CDATATag )
PPrintCDATA( doc, indent, node );
else if ( node->type == SectionTag )
PPrintSection( doc, indent, node );
else if ( node->type == AspTag )
PPrintAsp( doc, indent, node );
else if ( node->type == JsteTag)
PPrintJste( doc, indent, node );
else if ( node->type == PhpTag)
PPrintPhp( doc, indent, node );
else if ( TY_(nodeHasCM)(node, CM_EMPTY) ||
(node->type == StartEndTag && !xhtmlOut) )
{
PCondFlushLineSmart( doc, indent );
PPrintTag( doc, mode, indent, node );
/* TY_(PFlushLine)( doc, indent ); */
}
else if ( node->type != RootNode ) /* some kind of container element */
{
TidyParserMemory memory = {0};
uint spaces = cfg( doc, TidyIndentSpaces );
Node *content;
Bool mixed = no;
uint cindent;
for ( content = node->content; content; content = content->next )
{
mixed = yes;
break;
if ( TY_(nodeIsText)(content) )
{
mixed = yes;
break;
}
}
PCondFlushLineSmart( doc, indent );
if ( TY_(XMLPreserveWhiteSpace)(doc, node) )
{
indent = 0;
mixed = no;
cindent = 0;
}
else if (mixed)
cindent = indent;
else
cindent = indent + spaces;
PPrintTag( doc, mode, indent, node );
if ( !mixed && node->content )
TY_(PFlushLineSmart)( doc, cindent );
memory.original_node = node;
memory.reentry_node = next;
memory.register_1 = mixed;
memory.register_2 = indent;
TY_(pushMemory)(doc, memory);
/* Prevent infinite indentation. Seriously, at what point is
anyone going to read a file with infinite indentation? It
slows down rendering for arbitrarily-deep test cases that
are only meant to crash Tidy in the first place. Let's
consider whether to remove this limitation, lower it,
increase it, or add a new configuration option to control
it, or even emit an info-level message about it.
*/
if (indent < TIDY_INDENTATION_LIMIT * spaces)
indent = cindent;
if (node->content)
{
node = node->content;
continue;
}
}
PCondFlushLineSmart( doc, indent );
if ( TY_(XMLPreserveWhiteSpace)(doc, node) )
if (next)
{
indent = 0;
mixed = no;
cindent = 0;
node = next;
continue;
}
else if (mixed)
cindent = indent;
else
cindent = indent + spaces;
PPrintTag( doc, mode, indent, node );
if ( !mixed && node->content )
TY_(PFlushLineSmart)( doc, cindent );
if ( TY_(isEmptyParserStack)(doc) == no )
{
/* It's possible that the reentry_node is null, because we
only pushed this record as a marker for the end tag while
there was no next node. Thus the loop will pop until we have
what we need. This also closes multiple end tags.
*/
do {
TidyParserMemory memory = TY_(popMemory)(doc);
Node* close_node = memory.original_node;
Bool mixed = memory.register_1;
indent = memory.register_2;
for ( content = node->content; content; content = content->next )
TY_(PPrintXMLTree)( doc, mode, cindent, content );
if ( !mixed && close_node->content )
PCondFlushLineSmart( doc, indent );
if ( !mixed && node->content )
PCondFlushLineSmart( doc, indent );
PPrintEndTag( doc, mode, indent, close_node );
/* PCondFlushLine( doc, indent ); */
PPrintEndTag( doc, mode, indent, node );
/* PCondFlushLine( doc, indent ); */
}
node = memory.reentry_node;
} while ( node == NULL && TY_(isEmptyParserStack)(doc) == no );
continue;;
}
node = NULL;
} /* while */
}
/*
* local variables:
* mode: c

View file

@ -2048,16 +2048,24 @@ void dbg_show_node( TidyDocImpl* doc, Node *node, int caller, int indent )
SPRTF("\n");
}
void dbg_show_all_nodes( TidyDocImpl* doc, Node *node, int indent )
/* Tail recursion here with sensible compilers will re-use
the stack frame and avoid overflows during debugging.
*/
void dbg_show_all_nodes_loop( TidyDocImpl* doc, Node *node, int indent )
{
while (node)
while ( node && (node = node->next) )
{
dbg_show_node( doc, node, 0, indent );
dbg_show_all_nodes( doc, node->content, indent + 1 );
node = node->next;
dbg_show_all_nodes_loop( doc, node->content, indent + 1 );
}
}
void dbg_show_all_nodes( TidyDocImpl* doc, Node *node, int indent )
{
dbg_show_node( doc, node, 0, indent );
dbg_show_all_nodes_loop( doc, node->content, indent + 1 );
}
#endif
int tidyDocCleanAndRepair( TidyDocImpl* doc )