diff --git a/src/lexer.h b/src/lexer.h index 9d49898..78b1940 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -157,7 +157,8 @@ typedef enum #define CM_NO_INDENT (1 << 18) /**< Elements whose content needs to be indented only if containing one CM_BLOCK element. */ #define CM_OBSOLETE (1 << 19) /**< Elements that are obsolete (such as "dir", "menu"). */ #define CM_NEW (1 << 20) /**< User defined elements. Used to determine how attributes without value should be printed. */ -#define CM_OMITST (1 << 21) /**< Elements that cannot be omitted. */ +#define CM_OMITST (1 << 21) /**< Elements that cannot be omitted. */ +#define CM_VOID (1 << 22) /**< Elements that are void per https://www.w3.org/TR/2011/WD-html-markup-20110113/syntax.html#syntax-elements. */ /** @@ -649,7 +650,7 @@ TY_PRIVATE int TY_(InlineDup)( TidyDocImpl* doc, Node *node ); /** - * Fefer duplicates when entering a table or other + * Defer duplicates when entering a table or other * element where the inlines shouldn't be duplicated. */ TY_PRIVATE void TY_(DeferDup)( TidyDocImpl* doc ); diff --git a/src/parser.c b/src/parser.c index 14cad8d..8fc125d 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1156,7 +1156,7 @@ Node* TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode ) { /* Issue #212 - Further fix in case ParseBlock() is called with 'IgnoreWhitespace' when such a leading space may need to be inserted before this element to - preverve the browser view */ + preserve the browser view */ mode = MixedContent; } } /* Re-Entering */ @@ -5873,7 +5873,7 @@ Node* TY_(DropEmptyElements)(TidyDocImpl* doc, Node* node) /** MARK: TY_(InsertNodeAtStart) - * Insert node into markup tree as the firt element of content of element. + * Insert node into markup tree as the first element of content of element. */ void TY_(InsertNodeAtStart)(Node *element, Node *node) { @@ -6311,14 +6311,3 @@ void TY_(ParseXMLDocument)(TidyDocImpl* doc) if ( cfgBool(doc, TidyXmlDecl) ) TY_(FixXmlDecl)( doc ); } - - - -/* - * local variables: - * mode: c - * indent-tabs-mode: nil - * c-basic-offset: 4 - * eval: (c-set-offset 'substatement-open 0) - * end: - */ diff --git a/src/pprint.c b/src/pprint.c index e123c34..4917c06 100644 --- a/src/pprint.c +++ b/src/pprint.c @@ -18,105 +18,101 @@ #include "utf8.h" #include "sprtf.h" -/* *** FOR DEBUG ONLY *** */ -/* #define DEBUG_PPRINT */ -/* #define DEBUG_INDENT */ + +/****************************************************************************//* + ** MARK: - Debug Output + ** For debug only: + ** #define DEBUG_PPRINT + ** #define DEBUG_INDENT + ***************************************************************************/ + + #if defined(ENABLE_DEBUG_LOG) && defined(DEBUG_PPRINT) extern void dbg_show_node( TidyDocImpl* doc, Node *node, int caller, int indent ); #endif -/* - Block-level and unknown elements are printed on - new lines and their contents indented 2 spaces - Inline elements are printed inline. +/****************************************************************************//* + ** MARK: - Forward Declarations + ***************************************************************************/ - Inline content is wrapped on spaces (except in - attribute values or preformatted text, after - start tags and before end tags -*/ static void PPrintAsp( TidyDocImpl* doc, uint indent, Node* node ); -static void PPrintJste( TidyDocImpl* doc, uint indent, Node* node ); static void PPrintPhp( TidyDocImpl* doc, uint indent, Node* node ); -static int TextEndsWithNewline( Lexer *lexer, Node *node, uint mode ); -static int TextStartsWithWhitespace( Lexer *lexer, Node *node, uint start, uint mode ); -static Bool InsideHead( TidyDocImpl* doc, Node *node ); -static Bool ShouldIndent( TidyDocImpl* doc, Node *node ); -/*\ - * Issue #228 20150715 - macros to access --vertical-space tri state configuration parameter -\*/ + +/****************************************************************************//* + ** MARK: - Vertical-Space Tri-State Configuration accessor + ** Issue #228 20150715 - macros to access --vertical-space tri state + ** configuration parameter. + ***************************************************************************/ + + #define TidyClassicVS ((cfgAutoBool( doc, TidyVertSpace ) == TidyYesState) ? yes : no) #define TidyAddVS ((cfgAutoBool( doc, TidyVertSpace ) == TidyAutoState) ? no : yes ) -/*\ - * 20150515 - support using tabs instead of spaces - Issue #108 - * GH: https://github.com/htacg/tidy-html5/issues/108 - Keep indent with tabs #108 - * SF: https://sourceforge.net/p/tidy/feature-requests/3/ - #3 tabs in place of spaces -\*/ -static uint indent_char = ' '; -void TY_(PPrintTabs)(void) -{ - indent_char = '\t'; -} -void TY_(PPrintSpaces)(void) -{ - indent_char = ' '; -} -/* #431953 - start RJ Wraplen adjusted for smooth international ride */ +/****************************************************************************//* + ** MARK: - Wrapping Support + ***************************************************************************/ + +/** + * This typedef establishes the Unicode categories that individual characters + * will belong to. These are used to help ensure that TidyWraplen wraps at + * an appropriate place. + */ typedef enum { - UC00, /* None */ - UCPC, /* Punctuation, Connector */ - UCPD, /* Punctuation, Dash */ - UCPE, /* Punctuation, Close */ - UCPS, /* Punctuation, Open */ - UCPI, /* Punctuation, Initial quote */ - UCPF, /* Punctuation, Final quote */ - UCPO, /* Punctuation, Other */ - UCZS, /* Separator, Space */ - UCZL, /* Separator, Line */ - UCZP /* Separator, Paragraph */ + UC00, /**< None */ + UCPC, /**< Punctuation, Connector */ + UCPD, /**< Punctuation, Dash */ + UCPE, /**< Punctuation, Close */ + UCPS, /**< Punctuation, Open */ + UCPI, /**< Punctuation, Initial quote */ + UCPF, /**< Punctuation, Final quote */ + UCPO, /**< Punctuation, Other */ + UCZS, /**< Separator, Space */ + UCZL, /**< Separator, Line */ + UCZP /**< Separator, Paragraph */ } UnicodeCategory; -/* - From the original code, the following characters are removed: - U+2011 (non-breaking hyphen) - U+202F (narrow non-break space) - U+2044 (fraction slash) - U+200B (zero width space) - ...... (bidi formatting control characters) - - U+2011 and U+202F are non-breaking, U+2044 is a Sm character, - U+200B is a non-visible space, wrapping after it would make - this space visible, bidi should be done using HTML features - and the characters are neither Px or Zx. - - The following Unicode 3.0 punctuation characters are added: - - U+2048 (question exclamation mark) - U+2049 (exclamation question mark) - U+204A (tironian sign et) - U+204B (reversed pilcrow sign) - U+204C (black leftwards bullet) - U+204D (black rightwards bullet) - U+3030 (wavy dash) - U+30FB (katakana middle dot) - U+FE63 (small hyphen-minus) - U+FE68 (small reverse solidus) - U+FF3F (fullwidth low line) - U+FF5B (fullwidth left curly bracket) - U+FF5D (fullwidth right curly bracket) - - Other additional characters were not included in Unicode 3.0. - The table is based on Unicode 4.0. It must include only those - characters marking a wrapping point, "before" if the general - category is UCPS or UCPI, otherwise "after". -*/ +/** + * From the original code, the following characters are removed: + * + * U+2011 (non-breaking hyphen) + * U+202F (narrow non-break space) + * U+2044 (fraction slash) + * U+200B (zero width space) + * ...... (bidi formatting control characters) + * + * U+2011 and U+202F are non-breaking, U+2044 is a Sm character, + * U+200B is a non-visible space, wrapping after it would make + * this space visible, bidi should be done using HTML features + * and the characters are neither Px or Zx. + * + * The following Unicode 3.0 punctuation characters are added: + * + * U+2048 (question exclamation mark) + * U+2049 (exclamation question mark) + * U+204A (tironian sign et) + * U+204B (reversed pilcrow sign) + * U+204C (black leftwards bullet) + * U+204D (black rightwards bullet) + * U+3030 (wavy dash) + * U+30FB (katakana middle dot) + * U+FE63 (small hyphen-minus) + * U+FE68 (small reverse solidus) + * U+FF3F (fullwidth low line) + * U+FF5B (fullwidth left curly bracket) + * U+FF5D (fullwidth right curly bracket) + * + * Other additional characters were not included in Unicode 3.0. + * The table is based on Unicode 4.0. It must include only those + * characters marking a wrapping point, "before" if the general + * category is UCPS or UCPI, otherwise "after". + */ static struct _unicode4cat { unsigned long code; @@ -190,37 +186,46 @@ static struct _unicode4cat { 0x0000, UC00 } }; + +/** + * The values in this enum are used to indicate the wrapping point relative + * to a specific character. + */ typedef enum { - NoWrapPoint, - WrapBefore, - WrapAfter + NoWrapPoint, /**< Not a wrapping point. */ + WrapBefore, /**< Wrap before this character. */ + WrapAfter /**< Wrap after this character. */ } WrapPoint; -/* - If long lines of text have no white space as defined in HTML 4 - (U+0009, U+000A, U+000D, U+000C, U+0020) other characters could - be used to determine a wrap point. Since user agents would - normalize the inserted newline character to a space character, - this wrapping behaviour would insert visual whitespace into the - document. - Characters of the General Category Pi and Ps in the Unicode - character database (opening punctuation and initial quote - characters) mark a wrapping point before the character, other - punctuation characters (Pc, Pd, Pe, Pf, and Po), breakable - space characters (Zs), and paragraph and line separators - (Zl, Zp) mark a wrap point after the character. Using this - function Tidy can for example pretty print - -
....................“...quote...”...
- as -....................\n“...quote...”...
- or -....................“...quote...”\n...
- - if the next normal wrapping point would exceed the user - chosen wrapping column. +/** + * Given a character, indicate whether a wrap point exists before + * the character, after the character, or not at all. + * + * If long lines of text have no white space as defined in HTML 4 + * (U+0009, U+000A, U+000D, U+000C, U+0020) other characters could + * be used to determine a wrap point. Since user agents would + * normalize the inserted newline character to a space character, + * this wrapping behaviour would insert visual whitespace into the + * document. + * + * Characters of the General Category Pi and Ps in the Unicode + * character database (opening punctuation and initial quote + * characters) mark a wrapping point before the character, other + * punctuation characters (Pc, Pd, Pe, Pf, and Po), breakable + * space characters (Zs), and paragraph and line separators + * (Zl, Zp) mark a wrap point after the character. Using this + * function Tidy can for example pretty print + * + *....................“...quote...”...
+ * as + *....................\n“...quote...”...
+ * or + *....................“...quote...”\n...
+ * + * if the next normal wrapping point would exceed the user + * chosen wrapping column. */ static WrapPoint CharacterWrapPoint(tchar c) { @@ -240,6 +245,11 @@ static WrapPoint CharacterWrapPoint(tchar c) return NoWrapPoint; } + +/** + * Given a character in Big5 encoding, indicate whether a wrap point + * exists before the character, after the character, or not at all. + */ static WrapPoint Big5WrapPoint(tchar c) { if ((c & 0xFF00) == 0xA100) @@ -253,6 +263,14 @@ static WrapPoint Big5WrapPoint(tchar c) } +/****************************************************************************//* + ** MARK: - Print Buffer Allocation and Deallocation + ***************************************************************************/ + + +/** + * Initializes an instance of TidyIndent to default + */ static void InitIndent( TidyIndent* ind ) { ind->spaces = -1; @@ -260,6 +278,10 @@ static void InitIndent( TidyIndent* ind ) ind->attrStringStart = -1; } + +/** + * Initializes the tidy document's instance of the pretty print buffer. + */ void TY_(InitPrintBuf)( TidyDocImpl* doc ) { TidyClearMemory( &doc->pprint, sizeof(TidyPrintImpl) ); @@ -269,12 +291,25 @@ void TY_(InitPrintBuf)( TidyDocImpl* doc ) doc->pprint.line = 0; } + +/** + * Frees the tidy document's pretty print buffer. + */ void TY_(FreePrintBuf)( TidyDocImpl* doc ) { TidyDocFree( doc, doc->pprint.linebuf ); TY_(InitPrintBuf)( doc ); } + +/****************************************************************************//* + ** MARK: - Buffer Utilities + ***************************************************************************/ + + +/** + * Expand the size of the pretty print buffer. + */ static void expand( TidyPrintImpl* pprint, uint len ) { uint* ip; @@ -295,28 +330,64 @@ static void expand( TidyPrintImpl* pprint, uint len ) } } + +/****************************************************************************//* + ** MARK: - Indentation and Wrapping Utilities + ***************************************************************************/ + + +/** + * Returns the indent level of the current line. + */ static uint GetSpaces( TidyPrintImpl* pprint ) { int spaces = pprint->indent[ 0 ].spaces; return ( spaces < 0 ? 0U : (uint) spaces ); } + + +/** + * Clears the in-string flag. The pretty printer needs to know + * whether the current output position is within an attribute's + * string value in order to make word wrapping decisions. + */ static int ClearInString( TidyPrintImpl* pprint ) { TidyIndent *ind = pprint->indent + pprint->ixInd; return ind->attrStringStart = -1; } + + +/** + * Toggle's the in-string flag. The pretty printer needs to know + * whether the current output position is within an attribute's + * string value in order to make word wrapping decisions. + */ static int ToggleInString( TidyPrintImpl* pprint ) { TidyIndent *ind = pprint->indent + pprint->ixInd; Bool inString = ( ind->attrStringStart >= 0 ); return ind->attrStringStart = ( inString ? -1 : (int) pprint->linelen ); } + + +/** + * Returns whether or not the current output position is in an + * attribute's string value. This is used to make word wrapping + * decisions. + */ static Bool IsInString( TidyPrintImpl* pprint ) { TidyIndent *ind = pprint->indent + 0; /* Always 1st */ return ( ind->attrStringStart >= 0 && ind->attrStringStart < (int) pprint->linelen ); } + + +/** + * Indicates whether or not the current designated word wrap + * position is within an attribute's string. + */ static Bool IsWrapInString( TidyPrintImpl* pprint ) { TidyIndent *ind = pprint->indent + 0; /* Always 1st */ @@ -325,30 +396,36 @@ static Bool IsWrapInString( TidyPrintImpl* pprint ) (ind->attrStringStart > 0 && ind->attrStringStart < wrap) ); } -static Bool HasMixedContent (Node *element) -{ - Node * node; - - if (!element) - return no; - - for (node = element->content; node; node = node->next) - if ( TY_(nodeIsText)(node) ) - return yes; - - return no; -} +/** + * Clears the in-attribute flag. The pretty printer needs to know + * whether the current output position is within an attribute's + * string value in order to make word wrapping decisions. + */ static void ClearInAttrVal( TidyPrintImpl* pprint ) { TidyIndent *ind = pprint->indent + pprint->ixInd; ind->attrValStart = -1; } + + +/** + * Set the in-attribute flag and returns the attribute start + * position. The pretty printer needs to know whether the current + * output position is within an attribute's string value in order + * to make word wrapping decisions. + */ static int SetInAttrVal( TidyPrintImpl* pprint ) { TidyIndent *ind = pprint->indent + pprint->ixInd; return ind->attrValStart = (int) pprint->linelen; } + + +/** + * Indicates whether or not the current designated word wrap + * position is within an attribute. + */ static Bool IsWrapInAttrVal( TidyPrintImpl* pprint ) { TidyIndent *ind = pprint->indent + 0; /* Always 1st */ @@ -357,92 +434,10 @@ static Bool IsWrapInAttrVal( TidyPrintImpl* pprint ) (ind->attrValStart > 0 && ind->attrValStart < wrap) ); } -static Bool WantIndent( TidyDocImpl* doc ) -{ - TidyPrintImpl* pprint = &doc->pprint; - Bool wantIt = GetSpaces(pprint) > 0; - if ( wantIt ) - { - Bool indentAttrs = cfgBool( doc, TidyIndentAttributes ); - wantIt = ( ( !IsWrapInAttrVal(pprint) || indentAttrs ) && - !IsWrapInString(pprint) ); - } - return wantIt; -} - - -static uint WrapOff( TidyDocImpl* doc ) -{ - uint saveWrap = cfg( doc, TidyWrapLen ); - TY_(SetOptionInt)( doc, TidyWrapLen, 0xFFFFFFFF ); /* very large number */ - return saveWrap; -} - -static void WrapOn( TidyDocImpl* doc, uint saveWrap ) -{ - TY_(SetOptionInt)( doc, TidyWrapLen, saveWrap ); -} - -static uint WrapOffCond( TidyDocImpl* doc, Bool onoff ) -{ - if ( onoff ) - return WrapOff( doc ); - return cfg( doc, TidyWrapLen ); -} - - -static void AddC( TidyPrintImpl* pprint, uint c, uint string_index) -{ - if ( string_index + 1 >= pprint->lbufsize ) - expand( pprint, string_index + 1 ); - pprint->linebuf[string_index] = c; -} - -static uint AddChar( TidyPrintImpl* pprint, uint c ) -{ - AddC( pprint, c, pprint->linelen ); - return ++pprint->linelen; -} - -static uint AddAsciiString( TidyPrintImpl* pprint, ctmbstr str, uint string_index ) -{ - uint ix, len = TY_(tmbstrlen)( str ); - if ( string_index + len >= pprint->lbufsize ) - expand( pprint, string_index + len ); - - for ( ix=0; ix- xy
- will display properly. Whereas -- x
won't. -*/ +/** + * Supports AfterSpace(). + */ static Bool AfterSpaceImp(Lexer *lexer, Node *node, Bool isEmpty) { Node *prev; @@ -1335,77 +1709,64 @@ static Bool AfterSpaceImp(Lexer *lexer, Node *node, Bool isEmpty) return AfterSpaceImp(lexer, node->parent, isEmpty); } + +/** + * Indicates whether the given node immediately follows certain things. + * Line can be wrapped immediately after inline start tag provided + * if follows a text node ending in a space, or it follows a+ * xy
+ * will display properly. Whereas + *+ * x
won't. + */ static Bool AfterSpace(Lexer *lexer, Node *node) { return AfterSpaceImp(lexer, node, TY_(nodeCMIsEmpty)(node)); } + +/** + * Pretty prints a node's end tag. + */ static void PPrintEndTag( TidyDocImpl* doc, uint ARG_UNUSED(mode), - uint ARG_UNUSED(indent), Node *node ); - -/*\ - * See Issue #162 - void elements also get a closing tag, like img, br, ... - * - * from : http://www.w3.org/TR/html-markup/syntax.html#syntax-elements - * A complete list of the void elements in HTML: - * area, base, br, col, command, embed, hr, img, input, keygen, link, meta, param, source, track, wbr - * - * This could be sped up by NOT using the macro nodeIsXXXX, since this repeatedly checks the node, - * and then the node->tag, which here are checked at the beginning... - * - * Some have already been done... at least where no macro yet exists. - * - * And maybe a switch(id) case would be faster. -\*/ - -static Bool TY_(isVoidElement)( Node *node ) + uint ARG_UNUSED(indent), Node *node ) { - TidyTagId id; - if ( !node ) - return no; - if ( !node->tag ) - return no; - id = node->tag->id; - if (nodeIsAREA(node)) - return yes; - if (nodeIsBASE(node)) - return yes; - if (nodeIsBR(node)) - return yes; - if (nodeIsCOL(node)) - return yes; - /* if (nodeIsCOMMAND(node)) */ - if (id == TidyTag_COMMAND) - return yes; - if (nodeIsEMBED(node)) - return yes; - if (nodeIsHR(node)) - return yes; - if (nodeIsIMG(node)) - return yes; - if (nodeIsINPUT(node)) - return yes; - /* if (nodeIsKEYGEN(node)) */ - if (id == TidyTag_KEYGEN ) - return yes; - if (nodeIsLINK(node)) - return yes; - if (nodeIsMETA(node)) - return yes; - if (nodeIsPARAM(node)) - return yes; - /* if (nodeIsSOURCE(node)) */ - if (id == TidyTag_SOURCE ) - return yes; - /* if (nodeIsTRACK(node)) */ - if (id == TidyTag_TRACK ) - return yes; - if (nodeIsWBR(node)) - return yes; + TidyPrintImpl* pprint = &doc->pprint; + Bool uc = cfgBool( doc, TidyUpperCaseTags ); + tmbstr s = node->element; + tchar c; - return no; + AddString( pprint, "" ); + + if (s) + { + while (*s) + { + c = (unsigned char)*s; + + if (c > 0x7F) + s += TY_(GetUTF8)(s, &c); + else if (uc) + c = TY_(ToUpper)(c); + + AddChar(pprint, c); + ++s; + } + } + + AddChar( pprint, '>' ); } + +/** + * Prints the tag for the given node. + */ static void PPrintTag( TidyDocImpl* doc, uint mode, uint indent, Node *node ) { @@ -1452,10 +1813,11 @@ static void PPrintTag( TidyDocImpl* doc, * Appears this was added for Issue #111, #112, #113, but will now add an end tag * for elements like which do NOT have an EndTag, even in html5 * See Issue #162 - void elements also get a closing tag, like img, br, ... - * A complete list of the void elements in HTML: + * A complete list of the void elements in HTML. * area, base, br, col, command, embed, hr, img, input, keygen, link, meta, param, source, track, wbr + * A new CM_VOID was added to tag_defs[] to account for these. \*/ - if ((node->type == StartEndTag && TY_(HTMLVersion)(doc) == HT50) && !TY_(isVoidElement)(node) ) + if ((node->type == StartEndTag && TY_(HTMLVersion)(doc) == HT50) && !TY_(nodeHasCM)(node, CM_VOID ) ) { PPrintEndTag( doc, mode, indent, node ); } @@ -1489,35 +1851,10 @@ static void PPrintTag( TidyDocImpl* doc, } } -static void PPrintEndTag( TidyDocImpl* doc, uint ARG_UNUSED(mode), - uint ARG_UNUSED(indent), Node *node ) -{ - TidyPrintImpl* pprint = &doc->pprint; - Bool uc = cfgBool( doc, TidyUpperCaseTags ); - tmbstr s = node->element; - tchar c; - - AddString( pprint, "" ); - - if (s) - { - while (*s) - { - c = (unsigned char)*s; - - if (c > 0x7F) - s += TY_(GetUTF8)(s, &c); - else if (uc) - c = TY_(ToUpper)(c); - - AddChar(pprint, c); - ++s; - } - } - - AddChar( pprint, '>' ); -} +/** + * Pretty Prints a comment of a node. + */ static void PPrintComment( TidyDocImpl* doc, uint indent, Node* node ) { TidyPrintImpl* pprint = &doc->pprint; @@ -1530,9 +1867,13 @@ static void PPrintComment( TidyDocImpl* doc, uint indent, Node* node ) AddString(pprint, "--"); AddChar( pprint, '>' ); if ( node->linebreak && node->next ) - TY_(PFlushLineSmart)( doc, indent ); + PFlushLineSmart( doc, indent ); } + +/** + * Pretty Prints a doctype given a node. + */ static void PPrintDocType( TidyDocImpl* doc, uint indent, Node *node ) { TidyPrintImpl* pprint = &doc->pprint; @@ -1596,6 +1937,10 @@ static void PPrintDocType( TidyDocImpl* doc, uint indent, Node *node ) PCondFlushLineSmart( doc, indent ); } + +/** + * Pretty Print processing instructions. + */ static void PPrintPI( TidyDocImpl* doc, uint indent, Node *node ) { TidyPrintImpl* pprint = &doc->pprint; @@ -1627,6 +1972,10 @@ static void PPrintPI( TidyDocImpl* doc, uint indent, Node *node ) PCondFlushLine( doc, indent ); } + +/** + * Pretty Print an XML declaration. + */ static void PPrintXmlDecl( TidyDocImpl* doc, uint indent, Node *node ) { AttVal* att; @@ -1658,10 +2007,14 @@ static void PPrintXmlDecl( TidyDocImpl* doc, uint indent, Node *node ) AddChar( pprint, '?' ); AddChar( pprint, '>' ); WrapOn( doc, saveWrap ); - TY_(PFlushLineSmart)( doc, indent ); + PFlushLineSmart( doc, indent ); } -/* note ASP and JSTE share <% ... %> syntax */ + +/** + * Pretty Print ASP tags. + * @note ASP and JSTE share <% ... %> syntax + */ static void PPrintAsp( TidyDocImpl* doc, uint indent, Node *node ) { TidyPrintImpl* pprint = &doc->pprint; @@ -1677,7 +2030,11 @@ static void PPrintAsp( TidyDocImpl* doc, uint indent, Node *node ) WrapOn( doc, saveWrap ); } -/* JSTE also supports <# ... #> syntax */ + +/** + * Pretty Print JSTE tags. + * @note JSTE also supports ... syntax + */ static void PPrintJste( TidyDocImpl* doc, uint indent, Node *node ) { TidyPrintImpl* pprint = &doc->pprint; @@ -1693,7 +2050,11 @@ static void PPrintJste( TidyDocImpl* doc, uint indent, Node *node ) WrapOn( doc, saveWrap ); } -/* PHP is based on XML processing instructions */ + +/** + * Pretty Print PHP tags. + * @note PHP is based on XML processing instructions + */ static void PPrintPhp( TidyDocImpl* doc, uint indent, Node *node ) { TidyPrintImpl* pprint = &doc->pprint; @@ -1711,6 +2072,10 @@ static void PPrintPhp( TidyDocImpl* doc, uint indent, Node *node ) /* WrapOn( doc, saveWrap ); */ } + +/** + * Pretty Print CDATA. + */ static void PPrintCDATA( TidyDocImpl* doc, uint indent, Node *node ) { uint saveWrap; @@ -1730,6 +2095,10 @@ static void PPrintCDATA( TidyDocImpl* doc, uint indent, Node *node ) WrapOn( doc, saveWrap ); /* restore wrapping */ } + +/** + * Pretty Print the section tag. + */ static void PPrintSection( TidyDocImpl* doc, uint indent, Node *node ) { TidyPrintImpl* pprint = &doc->pprint; @@ -1746,108 +2115,10 @@ static void PPrintSection( TidyDocImpl* doc, uint indent, Node *node ) } -static ctmbstr CDATA_START = ""; -static ctmbstr JS_COMMENT_START = "//"; -static ctmbstr JS_COMMENT_END = ""; -static ctmbstr VB_COMMENT_START = "\'"; -static ctmbstr VB_COMMENT_END = ""; -static ctmbstr CSS_COMMENT_START = "/*"; -static ctmbstr CSS_COMMENT_END = "*/"; -static ctmbstr DEFAULT_COMMENT_START = ""; -static ctmbstr DEFAULT_COMMENT_END = ""; - -static Bool InsideHead( TidyDocImpl* doc, Node *node ) -{ - if ( nodeIsHEAD(node) ) - return yes; - - if ( node->parent != NULL ) - return InsideHead( doc, node->parent ); - - return no; -} - -/* Is text node and already ends w/ a newline? - - Used to pretty print CDATA/PRE text content. - If it already ends on a newline, it is not - necessary to print another before printing end tag. -*/ -static int TextEndsWithNewline(Lexer *lexer, Node *node, uint mode ) -{ - if ( (mode & (CDATA|COMMENT)) && TY_(nodeIsText)(node) && node->end > node->start ) - { - uint ch, ix = node->end - 1; - /*\ - * Skip non-newline whitespace. - * Issue #379 - Only if ix is GT start can it be decremented! - \*/ - while ( ix > node->start && (ch = (lexer->lexbuf[ix] & 0xff)) - && ( ch == ' ' || ch == '\t' || ch == '\r' ) ) - --ix; - - if ( lexer->lexbuf[ ix ] == '\n' ) - return node->end - ix - 1; /* #543262 tidy eats all memory */ - } - return -1; -} - -/*\ - * Issue #133 - creeping indent - a very OLD bug - 2nd tidy run increases the indent! - * If the node is text, then remove any white space equal to the indent, - * but this also applies to the AspTag, which is text like... - * And may apply to other text like nodes as well. - * - * Here the total white space is returned, and then a sister service, IncrWS() - * will advance the start of the lexer output by the amount of the indent. -\*/ -static Bool TY_(nodeIsTextLike)( Node *node ) -{ - if ( TY_(nodeIsText)(node) ) - return yes; - if ( node->type == AspTag ) - return yes; - if (node->type == PhpTag) - return yes; /* Issue #392 */ - /* add other text like nodes... */ - return no; -} - -static int TextStartsWithWhitespace( Lexer *lexer, Node *node, uint start, uint mode ) -{ - assert( node != NULL ); - if ( (mode & (CDATA|COMMENT)) && TY_(nodeIsTextLike)(node) && node->end > node->start && start >= node->start ) - { - uint ch, ix = start; - /* Skip whitespace. */ - while ( ix < node->end && (ch = (lexer->lexbuf[ix] & 0xff)) - && ( ch==' ' || ch=='\t' || ch=='\r' ) ) - ++ix; - - if ( ix > start ) - return ix - start; - } - return -1; -} - -static Bool HasCDATA( Lexer* lexer, Node* node ) -{ - /* Scan forward through the textarray. Since the characters we're - ** looking for are < 0x7f, we don't have to do any UTF-8 decoding. - */ - ctmbstr start = lexer->lexbuf + node->start; - int len = node->end - node->start + 1; - - if ( node->type != TextNode ) - return no; - - return ( NULL != TY_(tmbsubstrn)( start, len, CDATA_START )); -} - - -static -void PPrintScriptStyle( TidyDocImpl* doc, uint mode, uint indent, Node *node ) +/** + * @todo + */ +static void PPrintScriptStyle( TidyDocImpl* doc, uint mode, uint indent, Node *node ) { TidyPrintImpl* pprint = &doc->pprint; Node* content; @@ -1858,7 +2129,7 @@ void PPrintScriptStyle( TidyDocImpl* doc, uint mode, uint indent, Node *node ) Bool xhtmlOut = cfgBool( doc, TidyXhtmlOut ); if ( InsideHead(doc, node) ) - TY_(PFlushLineSmart)( doc, indent ); + PFlushLineSmart( doc, indent ); PCondFlushLineSmart( doc, indent ); /* Issue #56 - long outstanding bug - flush any existing closing tag */ @@ -1868,7 +2139,7 @@ void PPrintScriptStyle( TidyDocImpl* doc, uint mode, uint indent, Node *node ) In this case we don't want to flush the line, preferring to keep the required closing SCRIPT tag on the same line. */ if ( node->content != NULL ) - TY_(PFlushLineSmart)(doc, indent); + PFlushLineSmart(doc, indent); if ( xhtmlOut && node->content != NULL ) { @@ -1914,7 +2185,7 @@ void PPrintScriptStyle( TidyDocImpl* doc, uint mode, uint indent, Node *node ) be one child and the only caller of this function defines all these modes already... */ - TY_(PPrintTree)( doc, (mode | PREFORMATTED | NOWRAP | CDATA), + TY_(PPrintTree)( doc, (mode | PREFORMATTED | NOWRAP | CDATA), indent, content ); if ( content == node->last ) @@ -1948,85 +2219,22 @@ void PPrintScriptStyle( TidyDocImpl* doc, uint mode, uint indent, Node *node ) { #if defined(ENABLE_DEBUG_LOG) && defined(DEBUG_INDENT) SPRTF("%s Indent from %d to %d\n", __FUNCTION__, pprint->indent[ 0 ].spaces, indent ); -#endif +#endif pprint->indent[ 0 ].spaces = indent; } PPrintEndTag( doc, mode, indent, node ); if ( cfgAutoBool(doc, TidyIndentContent) == TidyNoState && node->next != NULL && !( TY_(nodeHasCM)(node, CM_INLINE) || TY_(nodeIsText)(node) ) ) - TY_(PFlushLineSmart)( doc, indent ); + PFlushLineSmart( doc, indent ); } - -static Bool ShouldIndent( TidyDocImpl* doc, Node *node ) -{ - TidyTriState indentContent = cfgAutoBool( doc, TidyIndentContent ); - if ( indentContent == TidyNoState ) - return no; - - if ( nodeIsTEXTAREA(node) ) - return no; - - if ( indentContent == TidyAutoState ) - { - if ( node->content && TY_(nodeHasCM)(node, CM_NO_INDENT) ) - { - for ( node = node->content; node; node = node->next ) - if ( TY_(nodeHasCM)(node, CM_BLOCK) ) - return yes; - return no; - } - - if ( TY_(nodeHasCM)(node, CM_HEADING) ) - return no; - - if ( nodeIsHTML(node) ) - return no; - - if ( nodeIsP(node) ) - return no; - - if ( nodeIsTITLE(node) ) - return no; - - /* http://tidy.sf.net/issue/1610888 - Indenting produces spurious lines with IE 6.x */ - if ( nodeIsDIV(node) && node->last && nodeIsIMG(node->last) ) - return no; - } - - if ( TY_(nodeHasCM)(node, CM_FIELD | CM_OBJECT) ) - return yes; - - if ( nodeIsMAP(node) ) - return yes; - - return ( !TY_(nodeHasCM)( node, CM_INLINE ) && node->content ); -} - -/* - Feature request #434940 - fix by Dave Raggett/Ignacio Vazquez-Abrams 21 Jun 01 - print just the content of the body element. - useful when you want to reuse material from - other documents. - - -- Sebastiano Vignaor