From 1052c2b81e2e58eec6e2fb08a5ffa48e19073c89 Mon Sep 17 00:00:00 2001 From: "Michael[tm] Smith" Date: Sat, 17 Mar 2012 16:26:41 +0900 Subject: [PATCH] New merge-emphasis & coerce-endtags options added. Fixes #19. --- include/tidyenum.h | 2 ++ quickref.html | 50 +++++++++++++++++++++++++++++++++++++++++++++- src/config.c | 2 ++ src/localize.c | 18 +++++++++++++++++ src/parser.c | 7 +++++-- src/tidylib.c | 4 +++- src/version.h | 2 +- 7 files changed, 80 insertions(+), 5 deletions(-) diff --git a/include/tidyenum.h b/include/tidyenum.h index a910ec7..91da43a 100644 --- a/include/tidyenum.h +++ b/include/tidyenum.h @@ -112,6 +112,7 @@ typedef enum TidyQuiet, /**< No 'Parsing X', guessed DTD or summary */ TidyIndentContent, /**< Indent content of appropriate tags */ /**< "auto" does text/block level content indentation */ + TidyCoerceEndTags, /**< Coerce end tags from start tags where probably intended */ TidyHideEndTags, /**< Suppress optional end tags */ TidyXmlTags, /**< Treat input as XML */ TidyXmlOut, /**< Create output as XML */ @@ -199,6 +200,7 @@ typedef enum #else TidyPunctWrapNotUsed, #endif + TidyMergeEmphasis, /**< Merge nested B and I elements */ TidyMergeDivs, /**< Merge multiple DIVs */ TidyDecorateInferredUL, /**< Mark inferred UL elements with no indent CSS */ TidyPreserveEntities, /**< Preserve entities */ diff --git a/quickref.html b/quickref.html index 8623f8c..a026f2d 100644 --- a/quickref.html +++ b/quickref.html @@ -8,7 +8,7 @@

Quick Reference

HTML Tidy Configuration Options

-

Version: https://github.com/w3c/tidy-html5/tree/5c4d2e5

+

Version: https://github.com/w3c/tidy-html5/tree/3a9a794

HTML, XHTML, XML
@@ -88,6 +88,13 @@ Boolean no + + + coerce-endtags + + Boolean + yes + css-prefix @@ -251,6 +258,13 @@ AutoBool auto + + + merge-emphasis + + Boolean + yes + merge-spans @@ -866,6 +880,23 @@   + + coerce-endtags + + Top + + + + Type: Boolean
+ Default: yes
Example: y/n, yes/no, t/f, true/false, 1/0 + + + + This option specifies if Tidy should coerce a start tag into an end tag in cases where it looks like an end tag was probably intended; for example, given <span>foo <b>bar<b> baz</span>, Tidy will output <span>foo <b>bar</b> baz</span>. + + +   + css-prefix @@ -1271,6 +1302,23 @@   + + merge-emphasis + + Top + + + + Type: Boolean
+ Default: yes
Example: y/n, yes/no, t/f, true/false, 1/0 + + + + This option specifies if Tidy should merge nested <b> and <i> elements; for example, for the case <b class="rtop-2">foo <b class="r2-2">bar</b> baz</b>, Tidy will output <b class="rtop-2">foo bar baz</b>. Note that if you set this option to "no" Tidy will by default still attempt to correct some cases of nested <b> and <i> elements; for example, given <span>foo <b>bar <b>baz</b></b> </span>, Tidy will output <span>foo <b>bar baz</b></span>. To suppress that behavior, set the coerce-endtags option to "no". + + +   + merge-spans diff --git a/src/config.c b/src/config.c index b48d910..0f42596 100644 --- a/src/config.c +++ b/src/config.c @@ -239,6 +239,7 @@ static const TidyOptionImpl option_defs[] = { TidyShowWarnings, DG, "show-warnings", BL, yes, ParseBool, boolPicks }, { TidyQuiet, MS, "quiet", BL, no, ParseBool, boolPicks }, { TidyIndentContent, PP, "indent", IN, TidyNoState, ParseAutoBool, autoBoolPicks }, + { TidyCoerceEndTags, MU, "coerce-endtags", BL, yes, ParseBool, boolPicks }, { TidyHideEndTags, MU, "hide-endtags", BL, no, ParseBool, boolPicks }, { TidyXmlTags, MU, "input-xml", BL, no, ParseBool, boolPicks }, { TidyXmlOut, MU, "output-xml", BL, no, ParseBool, boolPicks }, @@ -311,6 +312,7 @@ static const TidyOptionImpl option_defs[] = #if SUPPORT_ASIAN_ENCODINGS { TidyPunctWrap, PP, "punctuation-wrap", BL, no, ParseBool, boolPicks }, #endif + { TidyMergeEmphasis, MU, "merge-emphasis", BL, yes, ParseBool, boolPicks }, { TidyMergeDivs, MU, "merge-divs", IN, TidyAutoState, ParseAutoBool, autoBoolPicks }, { TidyDecorateInferredUL, MU, "decorate-inferred-ul", BL, no, ParseBool, boolPicks }, { TidyPreserveEntities, MU, "preserve-entities", BL, no, ParseBool, boolPicks }, diff --git a/src/localize.c b/src/localize.c index 43d4359..9a07d84 100644 --- a/src/localize.c +++ b/src/localize.c @@ -470,6 +470,12 @@ static const TidyOptionDoc option_docs[] = {TidyHideComments, "This option specifies if Tidy should print out comments. " }, + {TidyCoerceEndTags, + "This option specifies if Tidy should coerce a start tag into an end tag " + "in cases where it looks like an end tag was probably intended; " + "for example, given <span>foo <b>bar<b> baz</span>, " + "Tidy will output <span>foo <b>bar</b> baz</span>. " + }, {TidyHideEndTags, "This option specifies if Tidy should omit optional end-tags when " "generating the pretty printed markup. This option is ignored if you are " @@ -504,6 +510,18 @@ static const TidyOptionDoc option_docs[] = "that takes a list of predefined values to lower case. This is required " "for XHTML documents. " }, + {TidyMergeEmphasis, + "This option specifies if Tidy should merge nested <b> and <i> " + "elements; for example, for the case " + "<b class=\"rtop-2\">foo <b class=\"r2-2\">bar</b> baz</b>, " + "Tidy will output <b class=\"rtop-2\">foo bar baz</b>. " + "Note that if you set this option to \"no\" Tidy will by default still " + "attempt to correct some cases of nested <b> and <i> elements; " + "for example, given " + "<span>foo <b>bar <b>baz</b></b> </span>, " + "Tidy will output <span>foo <b>bar baz</b></span>. " + "To suppress that behavior, set the coerce-endtags option to \"no\". " + }, {TidyMergeDivs, "Can be used to modify behavior of -c (--clean yes) option. " "This option specifies if Tidy should merge nested <div> such as " diff --git a/src/parser.c b/src/parser.c index 46d1728..85108f8 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1400,6 +1400,7 @@ void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode ) && !nodeIsSUP(node) && !nodeIsQ(node) && !nodeIsSPAN(node) + && cfgBool(doc, TidyCoerceEndTags) ) { /* proceeds only if "node" does not have any attribute and @@ -1617,7 +1618,8 @@ void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode ) /* #427827 - fix by Randy Waki and Bjoern Hoehrmann 23 Aug 00 */ /* other fixes by Dave Raggett */ /* if (node->attributes == NULL) */ - if (node->type != EndTag && node->attributes == NULL) + if (node->type != EndTag && node->attributes == NULL + && cfgBool(doc, TidyCoerceEndTags) ) { node->type = EndTag; TY_(ReportError)(doc, element, node, COERCE_TO_ENDTAG); @@ -3022,7 +3024,8 @@ void TY_(ParseTitle)(TidyDocImpl* doc, Node *title, GetTokenMode ARG_UNUSED(mode Node *node; while ((node = TY_(GetToken)(doc, MixedContent)) != NULL) { - if (node->tag == title->tag && node->type == StartTag) + if (node->tag == title->tag && node->type == StartTag + && cfgBool(doc, TidyCoerceEndTags) ) { TY_(ReportError)(doc, title, node, COERCE_TO_ENDTAG); node->type = EndTag; diff --git a/src/tidylib.c b/src/tidylib.c index 553e4de..0ff8cd6 100644 --- a/src/tidylib.c +++ b/src/tidylib.c @@ -1246,13 +1246,15 @@ int tidyDocCleanAndRepair( TidyDocImpl* doc ) Bool tidyMark = cfgBool( doc, TidyMark ); Bool tidyXmlTags = cfgBool( doc, TidyXmlTags ); Bool wantNameAttr = cfgBool( doc, TidyAnchorAsName ); + Bool mergeEmphasis = cfgBool( doc, TidyMergeEmphasis ); Node* node; if (tidyXmlTags) return tidyDocStatus( doc ); /* simplifies ... ... etc. */ - TY_(NestedEmphasis)( doc, &doc->root ); + if ( mergeEmphasis ) + TY_(NestedEmphasis)( doc, &doc->root ); /* cleans up

indented text etc. */ TY_(List2BQ)( doc, &doc->root ); diff --git a/src/version.h b/src/version.h index 92242c9..4124ec8 100644 --- a/src/version.h +++ b/src/version.h @@ -1 +1 @@ -static const char TY_(release_date)[] = "https://github.com/w3c/tidy-html5/tree/3a9a794"; \ No newline at end of file +static const char TY_(release_date)[] = "https://github.com/w3c/tidy-html5/tree/3ed33a1"; \ No newline at end of file