Address issue #476, which started out as a simple message update.

- Change default value of `--fix-bad-comments` to `no`. - Ensure that when _not_ fixing, nothing is actually fixed. - Ensure that when fixing, initial adjacent hyphens actually are fixed. - Issue tidyinfo for all fixes made. - Issue tidywarning when when not making fixes for non-HTML5 doctypes.
2017-09-24 18:15:40 -04:00 · 2017-09-24 18:15:40 -04:00 · 483e0fec9e
parent cd9d46b53d
commit 483e0fec9e
9 changed files with 85 additions and 21 deletions
--- a/include/tidyenum.h
+++ b/include/tidyenum.h
@ -210,6 +210,9 @@ extern "C" {
    FN(INVALID_XML_ID)                \
    FN(JOINING_ATTRIBUTE)             \
    FN(MALFORMED_COMMENT)             \
    FN(MALFORMED_COMMENT_DROPPING)    \
    FN(MALFORMED_COMMENT_EOS)         \
    FN(MALFORMED_COMMENT_WARN)        \
    FN(MALFORMED_DOCTYPE)             \
    FN(MISMATCHED_ATTRIBUTE_ERROR)    \
    FN(MISMATCHED_ATTRIBUTE_WARN)     \
--- a/localize/translations/tidy.pot
+++ b/localize/translations/tidy.pot
@ -5,7 +5,7 @@ msgstr ""
 "Plural-Forms: nplurals=2; plural=n != 1;\n"
 "X-Generator: HTML Tidy poconvert.rb\n"
 "Project-Id-Version: \n"
-"POT-Creation-Date: 2017-09-23 07:53:22\n"
+"POT-Creation-Date: 2017-09-24 18:12:27\n"
 "Last-Translator: jderry\n"
 "Language-Team: \n"
@ -466,10 +466,12 @@ msgid ""
 "This option specifies if Tidy should replace unexpected hyphens with "
 "<code>=</code> characters when it comes across adjacent hyphens. "
 "<br/>"
-"The default is <var>yes</var>. "
+"The default is <var>no</var>. "
 "<br/>"
-"This option is provided for users of Cold Fusion which uses the "
+"HTML has abandonded SGML comment syntax, and allows adjacent hypens "
-"comment syntax: <code>&lt;!--- ---&gt;</code>. "
+"for all versions of HTML, although XML and XHTML do not. If you plan "
 "to support older browsers that require SGML comment syntax, then "
 "consider setting this value to <var>yes</var>."
 msgstr ""
 #. Important notes for translators:
@ -2300,7 +2302,19 @@ msgid "%s joining values of repeated attribute \"%s\""
 msgstr ""
 msgctxt "MALFORMED_COMMENT"
-msgid "adjacent hyphens within comment"
+msgid "tidy replaced adjacent \"-\" with \"=\""
 msgstr ""
 msgctxt "MALFORMED_COMMENT_DROPPING"
 msgid "dropping a possible comment due to a missing hyphen"
 msgstr ""
 msgctxt "MALFORMED_COMMENT_EOS"
 msgid "the end of the document was reached before the end of the comment"
 msgstr ""
 msgctxt "MALFORMED_COMMENT_WARN"
 msgid "detected adjacent hyphens within the comment; consider fix-bad-comments"
 msgstr ""
 msgctxt "MALFORMED_DOCTYPE"
--- a/src/config.c
+++ b/src/config.c
@ -241,7 +241,7 @@ static const TidyOptionImpl option_defs[] =
    { TidyEscapeCdata,             MU, "escape-cdata",                BL, no,              ParsePickList,     &boolPicks          },
    { TidyEscapeScripts,           PP, "escape-scripts",              BL, yes,             ParsePickList,     &boolPicks          }, /* 20160227 - Issue #348 */
    { TidyFixBackslash,            MU, "fix-backslash",               BL, yes,             ParsePickList,     &boolPicks          },
-    { TidyFixComments,             MU, "fix-bad-comments",            BL, yes,             ParsePickList,     &boolPicks          },
+    { TidyFixComments,             MU, "fix-bad-comments",            BL, no,              ParsePickList,     &boolPicks          },
    { TidyFixUri,                  MU, "fix-uri",                     BL, yes,             ParsePickList,     &boolPicks          },
    { TidyForceOutput,             MS, "force-output",                BL, no,              ParsePickList,     &boolPicks          },
    { TidyGDocClean,               MU, "gdoc",                        BL, no,              ParsePickList,     &boolPicks          },
--- a/src/language_en.h
+++ b/src/language_en.h
@ -476,10 +476,12 @@ static languageDefinition language_en = { whichPluralForm_en, {
        "This option specifies if Tidy should replace unexpected hyphens with "
        "<code>=</code> characters when it comes across adjacent hyphens. "
        "<br/>"
-        "The default is <var>yes</var>. "
+        "The default is <var>no</var>. "
        "<br/>"
-        "This option is provided for users of Cold Fusion which uses the "
+        "HTML has abandonded SGML comment syntax, and allows adjacent hypens "
-        "comment syntax: <code>&lt;!--- ---&gt;</code>. "
+        "for all versions of HTML, although XML and XHTML do not. If you plan "
        "to support older browsers that require SGML comment syntax, then "
        "consider setting this value to <var>yes</var>."
    },
    {/* Important notes for translators:
      - Use only <code></code>, <var></var>, <em></em>, <strong></strong>, and
@ -1886,7 +1888,10 @@ static languageDefinition language_en = { whichPluralForm_en, {
    { INVALID_UTF16,                0,   "%s invalid UTF-16 surrogate pair (char. code %s)"                        },
    { INVALID_XML_ID,               0,   "%s cannot copy name attribute to id"                                     },
    { JOINING_ATTRIBUTE,            0,   "%s joining values of repeated attribute \"%s\""                          },
-    { MALFORMED_COMMENT,            0,   "adjacent hyphens within comment"                                         },
+    { MALFORMED_COMMENT,            0,   "tidy replaced adjacent \"-\" with \"=\""                                 },
    { MALFORMED_COMMENT_DROPPING,   0,   "dropping a possible comment due to a missing hyphen"                     },
    { MALFORMED_COMMENT_EOS,        0,   "the end of the document was reached before the end of the comment"       },
    { MALFORMED_COMMENT_WARN,       0,   "detected adjacent hyphens within the comment; consider fix-bad-comments" },
    { MALFORMED_DOCTYPE,            0,   "discarding malformed <!DOCTYPE>"                                         },
    { MISMATCHED_ATTRIBUTE_ERROR,   0,   "%s attribute \"%s\" not allowed for %s"                                  },
    { MISMATCHED_ATTRIBUTE_WARN,    0,   "%s attribute \"%s\" not allowed for %s"                                  },
--- a/src/language_fr.h
+++ b/src/language_fr.h
@ -1,4 +1,4 @@
-#ifndef language_fr_h
+#ifndef language_fr_h
 #define language_fr_h
 /*
 * language_fr.h
--- a/src/lexer.c
+++ b/src/lexer.c
@ -2613,6 +2613,7 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
    Bool isempty = no;
    AttVal *attributes = NULL;
    Node *node;
    Bool fixComments = cfgBool(doc, TidyFixComments);
    /* Lexer->token must be set on return. Nullify it for safety. */
    lexer->token = NULL;
@ -2772,7 +2773,11 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
                            continue;
                        }
-                        TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT );
+                        /*
                           We only print this message if there's a missing
                           starting hyphen; this comment will be dropped.
                         */
                        TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT_DROPPING );
                    }
                    else if (c == 'd' || c == 'D')
                    {
@ -3045,6 +3050,13 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
                    continue;
                c = TY_(ReadChar)(doc->docIn);
                /* Fix hyphens at beginning of tag */
                if ( c != '-' && fixComments && lexer->txtstart - lexer->txtend == 0 )
                {
                    lexer->lexbuf[lexer->lexsize - 1] = '=';
                }
                TY_(AddCharToLexer)(lexer, c);
                if (c != '-')
@ -3056,7 +3068,26 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
                if (c == '>')
                {
                    if (badcomment)
                    {
                        /*
                           We've got bad comments that we either fixed or
                           ignored; provide proper user feedback based on
                           doctype and whether or not we fixed them.
                         */
                        if ( (TY_(HTMLVersion)(doc) & HT50) )
                        {
                            if ( fixComments )
                                TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT );
                            /* Otherwise for HTML5, it's safe to ignore. */
                        }
                        else
                        {
                            if ( fixComments )
                                TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT );
                            else
                                TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT_WARN );
                        }
                    }
                    /* do not store closing -- in lexbuf */
                    lexer->lexsize -= 2;
@ -3089,7 +3120,8 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
                badcomment++;
-                if ( cfgBool(doc, TidyFixComments) )
+                /* fix hyphens in the middle */
                if ( fixComments )
                    lexer->lexbuf[lexer->lexsize - 2] = '=';
                /* if '-' then look for '>' to end the comment */
@ -3099,7 +3131,8 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
                    goto end_comment;
                }
-                /* otherwise continue to look for --> */
+                /* fix hyphens end, and continue to look for --> */
                if ( fixComments )
                    lexer->lexbuf[lexer->lexsize - 1] = '=';
                /* http://tidy.sf.net/bug/1266647 */
@ -3482,7 +3515,10 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
    else if (lexer->state == LEX_COMMENT) /* comment */
    {
        if (c == EndOfStream)
-            TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT );
+        {
            /* We print this if we reached end of the stream mid-comment. */
            TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT_EOS );
        }
        lexer->txtend = lexer->lexsize;
        lexer->lexbuf[lexer->lexsize] = '\0';
--- a/src/message.c
+++ b/src/message.c
@ -298,7 +298,10 @@ static struct _dispatchTable {
    { INVALID_UTF16,                TidyWarning,     formatEncodingReport    },
    { INVALID_XML_ID,               TidyWarning,     formatAttributeReport   },
    { JOINING_ATTRIBUTE,            TidyWarning,     formatAttributeReport   },
-    { MALFORMED_COMMENT,            TidyWarning,     formatStandard          },
+    { MALFORMED_COMMENT,            TidyInfo,        formatStandard          },
    { MALFORMED_COMMENT_EOS,        TidyError,       formatStandard          },
    { MALFORMED_COMMENT_DROPPING,   TidyWarning,     formatStandard          },
    { MALFORMED_COMMENT_WARN,       TidyWarning,     formatStandard          },
    { MALFORMED_DOCTYPE,            TidyWarning,     formatStandard          },
    { MISMATCHED_ATTRIBUTE_ERROR,   TidyError,       formatAttributeReport   },
    { MISMATCHED_ATTRIBUTE_WARN,    TidyWarning,     formatAttributeReport   },
@ -777,6 +780,9 @@ TidyMessageImpl *formatStandard(TidyDocImpl* doc, Node *element, Node *node, uin
        case DOCTYPE_AFTER_TAGS:
        case DUPLICATE_FRAMESET:
        case MALFORMED_COMMENT:
        case MALFORMED_COMMENT_DROPPING:
        case MALFORMED_COMMENT_EOS:
        case MALFORMED_COMMENT_WARN:
        case MALFORMED_DOCTYPE:
        case MISSING_DOCTYPE:
        case MISSING_TITLE_ELEMENT: