New merge-emphasis & coerce-endtags options added.

Fixes #19.
This commit is contained in:
Michael[tm] Smith 2012-03-17 16:26:41 +09:00
parent 3ed33a1823
commit 1052c2b81e
7 changed files with 80 additions and 5 deletions

View file

@ -112,6 +112,7 @@ typedef enum
TidyQuiet, /**< No 'Parsing X', guessed DTD or summary */ TidyQuiet, /**< No 'Parsing X', guessed DTD or summary */
TidyIndentContent, /**< Indent content of appropriate tags */ TidyIndentContent, /**< Indent content of appropriate tags */
/**< "auto" does text/block level content indentation */ /**< "auto" does text/block level content indentation */
TidyCoerceEndTags, /**< Coerce end tags from start tags where probably intended */
TidyHideEndTags, /**< Suppress optional end tags */ TidyHideEndTags, /**< Suppress optional end tags */
TidyXmlTags, /**< Treat input as XML */ TidyXmlTags, /**< Treat input as XML */
TidyXmlOut, /**< Create output as XML */ TidyXmlOut, /**< Create output as XML */
@ -199,6 +200,7 @@ typedef enum
#else #else
TidyPunctWrapNotUsed, TidyPunctWrapNotUsed,
#endif #endif
TidyMergeEmphasis, /**< Merge nested B and I elements */
TidyMergeDivs, /**< Merge multiple DIVs */ TidyMergeDivs, /**< Merge multiple DIVs */
TidyDecorateInferredUL, /**< Mark inferred UL elements with no indent CSS */ TidyDecorateInferredUL, /**< Mark inferred UL elements with no indent CSS */
TidyPreserveEntities, /**< Preserve entities */ TidyPreserveEntities, /**< Preserve entities */

View file

@ -8,7 +8,7 @@
<body> <body>
<h1 id="top">Quick Reference</h1> <h1 id="top">Quick Reference</h1>
<h2>HTML Tidy Configuration Options</h2> <h2>HTML Tidy Configuration Options</h2>
<p>Version: <a href="https://github.com/w3c/tidy-html5/tree/5c4d2e5">https://github.com/w3c/tidy-html5/tree/5c4d2e5</a></p> <p>Version: <a href="https://github.com/w3c/tidy-html5/tree/3a9a794">https://github.com/w3c/tidy-html5/tree/3a9a794</a></p>
<p> <p>
<a class="h3" href="#MarkupHeader">HTML, XHTML, XML</a> <a class="h3" href="#MarkupHeader">HTML, XHTML, XML</a>
<br /> <br />
@ -88,6 +88,13 @@
<td>Boolean</td> <td>Boolean</td>
<td>no</td> <td>no</td>
</tr> </tr>
<tr>
<td>
<a href="#coerce-endtags">coerce-endtags</a>
</td>
<td>Boolean</td>
<td>yes</td>
</tr>
<tr> <tr>
<td> <td>
<a href="#css-prefix">css-prefix</a> <a href="#css-prefix">css-prefix</a>
@ -251,6 +258,13 @@
<td>AutoBool</td> <td>AutoBool</td>
<td>auto</td> <td>auto</td>
</tr> </tr>
<tr>
<td>
<a href="#merge-emphasis">merge-emphasis</a>
</td>
<td>Boolean</td>
<td>yes</td>
</tr>
<tr> <tr>
<td> <td>
<a href="#merge-spans">merge-spans</a> <a href="#merge-spans">merge-spans</a>
@ -866,6 +880,23 @@
<tr> <tr>
<td>&#160;</td> <td>&#160;</td>
</tr> </tr>
<tr>
<td class="tabletitle" valign="top" id="coerce-endtags">coerce-endtags</td>
<td class="tabletitlelink" valign="top" align="right">
<a href="#top">Top</a>
</td>
</tr>
<tr>
<td valign="top">Type: <strong>Boolean</strong><br />
Default: <strong>yes</strong><br />Example: <strong>y/n, yes/no, t/f, true/false, 1/0</strong></td>
<td align="right" valign="top"></td>
</tr>
<tr>
<td colspan="2">This option specifies if Tidy should coerce a start tag into an end tag in cases where it looks like an end tag was probably intended; for example, given &lt;span&gt;foo &lt;b&gt;bar&lt;b&gt; baz&lt;/span&gt;, Tidy will output &lt;span&gt;foo &lt;b&gt;bar&lt;/b&gt; baz&lt;/span&gt;. </td>
</tr>
<tr>
<td>&#160;</td>
</tr>
<tr> <tr>
<td class="tabletitle" valign="top" id="css-prefix">css-prefix</td> <td class="tabletitle" valign="top" id="css-prefix">css-prefix</td>
<td class="tabletitlelink" valign="top" align="right"> <td class="tabletitlelink" valign="top" align="right">
@ -1271,6 +1302,23 @@
<tr> <tr>
<td>&#160;</td> <td>&#160;</td>
</tr> </tr>
<tr>
<td class="tabletitle" valign="top" id="merge-emphasis">merge-emphasis</td>
<td class="tabletitlelink" valign="top" align="right">
<a href="#top">Top</a>
</td>
</tr>
<tr>
<td valign="top">Type: <strong>Boolean</strong><br />
Default: <strong>yes</strong><br />Example: <strong>y/n, yes/no, t/f, true/false, 1/0</strong></td>
<td align="right" valign="top"></td>
</tr>
<tr>
<td colspan="2">This option specifies if Tidy should merge nested &lt;b&gt; and &lt;i&gt; elements; for example, for the case &lt;b class="rtop-2"&gt;foo &lt;b class="r2-2"&gt;bar&lt;/b&gt; baz&lt;/b&gt;, Tidy will output &lt;b class="rtop-2"&gt;foo bar baz&lt;/b&gt;. Note that if you set this option to "no" Tidy will by default still attempt to correct some cases of nested &lt;b&gt; and &lt;i&gt; elements; for example, given &lt;span&gt;foo &lt;b&gt;bar &lt;b&gt;baz&lt;/b&gt;&lt;/b&gt; &lt;/span&gt;, Tidy will output &lt;span&gt;foo &lt;b&gt;bar baz&lt;/b&gt;&lt;/span&gt;. To suppress that behavior, set the coerce-endtags option to "no". </td>
</tr>
<tr>
<td>&#160;</td>
</tr>
<tr> <tr>
<td class="tabletitle" valign="top" id="merge-spans">merge-spans</td> <td class="tabletitle" valign="top" id="merge-spans">merge-spans</td>
<td class="tabletitlelink" valign="top" align="right"> <td class="tabletitlelink" valign="top" align="right">

View file

@ -239,6 +239,7 @@ static const TidyOptionImpl option_defs[] =
{ TidyShowWarnings, DG, "show-warnings", BL, yes, ParseBool, boolPicks }, { TidyShowWarnings, DG, "show-warnings", BL, yes, ParseBool, boolPicks },
{ TidyQuiet, MS, "quiet", BL, no, ParseBool, boolPicks }, { TidyQuiet, MS, "quiet", BL, no, ParseBool, boolPicks },
{ TidyIndentContent, PP, "indent", IN, TidyNoState, ParseAutoBool, autoBoolPicks }, { TidyIndentContent, PP, "indent", IN, TidyNoState, ParseAutoBool, autoBoolPicks },
{ TidyCoerceEndTags, MU, "coerce-endtags", BL, yes, ParseBool, boolPicks },
{ TidyHideEndTags, MU, "hide-endtags", BL, no, ParseBool, boolPicks }, { TidyHideEndTags, MU, "hide-endtags", BL, no, ParseBool, boolPicks },
{ TidyXmlTags, MU, "input-xml", BL, no, ParseBool, boolPicks }, { TidyXmlTags, MU, "input-xml", BL, no, ParseBool, boolPicks },
{ TidyXmlOut, MU, "output-xml", BL, no, ParseBool, boolPicks }, { TidyXmlOut, MU, "output-xml", BL, no, ParseBool, boolPicks },
@ -311,6 +312,7 @@ static const TidyOptionImpl option_defs[] =
#if SUPPORT_ASIAN_ENCODINGS #if SUPPORT_ASIAN_ENCODINGS
{ TidyPunctWrap, PP, "punctuation-wrap", BL, no, ParseBool, boolPicks }, { TidyPunctWrap, PP, "punctuation-wrap", BL, no, ParseBool, boolPicks },
#endif #endif
{ TidyMergeEmphasis, MU, "merge-emphasis", BL, yes, ParseBool, boolPicks },
{ TidyMergeDivs, MU, "merge-divs", IN, TidyAutoState, ParseAutoBool, autoBoolPicks }, { TidyMergeDivs, MU, "merge-divs", IN, TidyAutoState, ParseAutoBool, autoBoolPicks },
{ TidyDecorateInferredUL, MU, "decorate-inferred-ul", BL, no, ParseBool, boolPicks }, { TidyDecorateInferredUL, MU, "decorate-inferred-ul", BL, no, ParseBool, boolPicks },
{ TidyPreserveEntities, MU, "preserve-entities", BL, no, ParseBool, boolPicks }, { TidyPreserveEntities, MU, "preserve-entities", BL, no, ParseBool, boolPicks },

View file

@ -470,6 +470,12 @@ static const TidyOptionDoc option_docs[] =
{TidyHideComments, {TidyHideComments,
"This option specifies if Tidy should print out comments. " "This option specifies if Tidy should print out comments. "
}, },
{TidyCoerceEndTags,
"This option specifies if Tidy should coerce a start tag into an end tag "
"in cases where it looks like an end tag was probably intended; "
"for example, given &lt;span&gt;foo &lt;b&gt;bar&lt;b&gt; baz&lt;/span&gt;, "
"Tidy will output &lt;span&gt;foo &lt;b&gt;bar&lt;/b&gt; baz&lt;/span&gt;. "
},
{TidyHideEndTags, {TidyHideEndTags,
"This option specifies if Tidy should omit optional end-tags when " "This option specifies if Tidy should omit optional end-tags when "
"generating the pretty printed markup. This option is ignored if you are " "generating the pretty printed markup. This option is ignored if you are "
@ -504,6 +510,18 @@ static const TidyOptionDoc option_docs[] =
"that takes a list of predefined values to lower case. This is required " "that takes a list of predefined values to lower case. This is required "
"for XHTML documents. " "for XHTML documents. "
}, },
{TidyMergeEmphasis,
"This option specifies if Tidy should merge nested &lt;b&gt; and &lt;i&gt; "
"elements; for example, for the case "
"&lt;b class=\"rtop-2\"&gt;foo &lt;b class=\"r2-2\"&gt;bar&lt;/b&gt; baz&lt;/b&gt;, "
"Tidy will output &lt;b class=\"rtop-2\"&gt;foo bar baz&lt;/b&gt;. "
"Note that if you set this option to \"no\" Tidy will by default still "
"attempt to correct some cases of nested &lt;b&gt; and &lt;i&gt; elements; "
"for example, given "
"&lt;span&gt;foo &lt;b&gt;bar &lt;b&gt;baz&lt;/b&gt;&lt;/b&gt; &lt;/span&gt;, "
"Tidy will output &lt;span&gt;foo &lt;b&gt;bar baz&lt;/b&gt;&lt;/span&gt;. "
"To suppress that behavior, set the coerce-endtags option to \"no\". "
},
{TidyMergeDivs, {TidyMergeDivs,
"Can be used to modify behavior of -c (--clean yes) option. " "Can be used to modify behavior of -c (--clean yes) option. "
"This option specifies if Tidy should merge nested &lt;div&gt; such as " "This option specifies if Tidy should merge nested &lt;div&gt; such as "

View file

@ -1400,6 +1400,7 @@ void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
&& !nodeIsSUP(node) && !nodeIsSUP(node)
&& !nodeIsQ(node) && !nodeIsQ(node)
&& !nodeIsSPAN(node) && !nodeIsSPAN(node)
&& cfgBool(doc, TidyCoerceEndTags)
) )
{ {
/* proceeds only if "node" does not have any attribute and /* proceeds only if "node" does not have any attribute and
@ -1617,7 +1618,8 @@ void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
/* #427827 - fix by Randy Waki and Bjoern Hoehrmann 23 Aug 00 */ /* #427827 - fix by Randy Waki and Bjoern Hoehrmann 23 Aug 00 */
/* other fixes by Dave Raggett */ /* other fixes by Dave Raggett */
/* if (node->attributes == NULL) */ /* if (node->attributes == NULL) */
if (node->type != EndTag && node->attributes == NULL) if (node->type != EndTag && node->attributes == NULL
&& cfgBool(doc, TidyCoerceEndTags) )
{ {
node->type = EndTag; node->type = EndTag;
TY_(ReportError)(doc, element, node, COERCE_TO_ENDTAG); TY_(ReportError)(doc, element, node, COERCE_TO_ENDTAG);
@ -3022,7 +3024,8 @@ void TY_(ParseTitle)(TidyDocImpl* doc, Node *title, GetTokenMode ARG_UNUSED(mode
Node *node; Node *node;
while ((node = TY_(GetToken)(doc, MixedContent)) != NULL) while ((node = TY_(GetToken)(doc, MixedContent)) != NULL)
{ {
if (node->tag == title->tag && node->type == StartTag) if (node->tag == title->tag && node->type == StartTag
&& cfgBool(doc, TidyCoerceEndTags) )
{ {
TY_(ReportError)(doc, title, node, COERCE_TO_ENDTAG); TY_(ReportError)(doc, title, node, COERCE_TO_ENDTAG);
node->type = EndTag; node->type = EndTag;

View file

@ -1246,13 +1246,15 @@ int tidyDocCleanAndRepair( TidyDocImpl* doc )
Bool tidyMark = cfgBool( doc, TidyMark ); Bool tidyMark = cfgBool( doc, TidyMark );
Bool tidyXmlTags = cfgBool( doc, TidyXmlTags ); Bool tidyXmlTags = cfgBool( doc, TidyXmlTags );
Bool wantNameAttr = cfgBool( doc, TidyAnchorAsName ); Bool wantNameAttr = cfgBool( doc, TidyAnchorAsName );
Bool mergeEmphasis = cfgBool( doc, TidyMergeEmphasis );
Node* node; Node* node;
if (tidyXmlTags) if (tidyXmlTags)
return tidyDocStatus( doc ); return tidyDocStatus( doc );
/* simplifies <b><b> ... </b> ...</b> etc. */ /* simplifies <b><b> ... </b> ...</b> etc. */
TY_(NestedEmphasis)( doc, &doc->root ); if ( mergeEmphasis )
TY_(NestedEmphasis)( doc, &doc->root );
/* cleans up <dir>indented text</dir> etc. */ /* cleans up <dir>indented text</dir> etc. */
TY_(List2BQ)( doc, &doc->root ); TY_(List2BQ)( doc, &doc->root );

View file

@ -1 +1 @@
static const char TY_(release_date)[] = "https://github.com/w3c/tidy-html5/tree/3a9a794"; static const char TY_(release_date)[] = "https://github.com/w3c/tidy-html5/tree/3ed33a1";