diff --git a/include/tidyenum.h b/include/tidyenum.h
index 6f4ff19..d88e86d 100755
--- a/include/tidyenum.h
+++ b/include/tidyenum.h
@@ -210,6 +210,9 @@ extern "C" {
FN(INVALID_XML_ID) \
FN(JOINING_ATTRIBUTE) \
FN(MALFORMED_COMMENT) \
+ FN(MALFORMED_COMMENT_DROPPING) \
+ FN(MALFORMED_COMMENT_EOS) \
+ FN(MALFORMED_COMMENT_WARN) \
FN(MALFORMED_DOCTYPE) \
FN(MISMATCHED_ATTRIBUTE_ERROR) \
FN(MISMATCHED_ATTRIBUTE_WARN) \
diff --git a/localize/translations/tidy.pot b/localize/translations/tidy.pot
index 3a5c734..3201df1 100644
--- a/localize/translations/tidy.pot
+++ b/localize/translations/tidy.pot
@@ -5,7 +5,7 @@ msgstr ""
"Plural-Forms: nplurals=2; plural=n != 1;\n"
"X-Generator: HTML Tidy poconvert.rb\n"
"Project-Id-Version: \n"
-"POT-Creation-Date: 2017-09-23 07:53:22\n"
+"POT-Creation-Date: 2017-09-24 18:12:27\n"
"Last-Translator: jderry\n"
"Language-Team: \n"
@@ -466,10 +466,12 @@ msgid ""
"This option specifies if Tidy should replace unexpected hyphens with "
"=
characters when it comes across adjacent hyphens. "
"
"
-"The default is yes. "
+"The default is no. "
"
"
-"This option is provided for users of Cold Fusion which uses the "
-"comment syntax: <!--- --->
. "
+"HTML has abandonded SGML comment syntax, and allows adjacent hypens "
+"for all versions of HTML, although XML and XHTML do not. If you plan "
+"to support older browsers that require SGML comment syntax, then "
+"consider setting this value to yes."
msgstr ""
#. Important notes for translators:
@@ -2300,7 +2302,19 @@ msgid "%s joining values of repeated attribute \"%s\""
msgstr ""
msgctxt "MALFORMED_COMMENT"
-msgid "adjacent hyphens within comment"
+msgid "tidy replaced adjacent \"-\" with \"=\""
+msgstr ""
+
+msgctxt "MALFORMED_COMMENT_DROPPING"
+msgid "dropping a possible comment due to a missing hyphen"
+msgstr ""
+
+msgctxt "MALFORMED_COMMENT_EOS"
+msgid "the end of the document was reached before the end of the comment"
+msgstr ""
+
+msgctxt "MALFORMED_COMMENT_WARN"
+msgid "detected adjacent hyphens within the comment; consider fix-bad-comments"
msgstr ""
msgctxt "MALFORMED_DOCTYPE"
diff --git a/src/config.c b/src/config.c
index 8de74c6..9a27b37 100644
--- a/src/config.c
+++ b/src/config.c
@@ -241,7 +241,7 @@ static const TidyOptionImpl option_defs[] =
{ TidyEscapeCdata, MU, "escape-cdata", BL, no, ParsePickList, &boolPicks },
{ TidyEscapeScripts, PP, "escape-scripts", BL, yes, ParsePickList, &boolPicks }, /* 20160227 - Issue #348 */
{ TidyFixBackslash, MU, "fix-backslash", BL, yes, ParsePickList, &boolPicks },
- { TidyFixComments, MU, "fix-bad-comments", BL, yes, ParsePickList, &boolPicks },
+ { TidyFixComments, MU, "fix-bad-comments", BL, no, ParsePickList, &boolPicks },
{ TidyFixUri, MU, "fix-uri", BL, yes, ParsePickList, &boolPicks },
{ TidyForceOutput, MS, "force-output", BL, no, ParsePickList, &boolPicks },
{ TidyGDocClean, MU, "gdoc", BL, no, ParsePickList, &boolPicks },
diff --git a/src/language_en.h b/src/language_en.h
index 18db40c..f97bfd4 100755
--- a/src/language_en.h
+++ b/src/language_en.h
@@ -476,10 +476,12 @@ static languageDefinition language_en = { whichPluralForm_en, {
"This option specifies if Tidy should replace unexpected hyphens with "
"=
characters when it comes across adjacent hyphens. "
"
"
- "The default is yes. "
+ "The default is no. "
"
"
- "This option is provided for users of Cold Fusion which uses the "
- "comment syntax: <!--- --->
. "
+ "HTML has abandonded SGML comment syntax, and allows adjacent hypens "
+ "for all versions of HTML, although XML and XHTML do not. If you plan "
+ "to support older browsers that require SGML comment syntax, then "
+ "consider setting this value to yes."
},
{/* Important notes for translators:
- Use only
, , , , and
@@ -1886,7 +1888,10 @@ static languageDefinition language_en = { whichPluralForm_en, {
{ INVALID_UTF16, 0, "%s invalid UTF-16 surrogate pair (char. code %s)" },
{ INVALID_XML_ID, 0, "%s cannot copy name attribute to id" },
{ JOINING_ATTRIBUTE, 0, "%s joining values of repeated attribute \"%s\"" },
- { MALFORMED_COMMENT, 0, "adjacent hyphens within comment" },
+ { MALFORMED_COMMENT, 0, "tidy replaced adjacent \"-\" with \"=\"" },
+ { MALFORMED_COMMENT_DROPPING, 0, "dropping a possible comment due to a missing hyphen" },
+ { MALFORMED_COMMENT_EOS, 0, "the end of the document was reached before the end of the comment" },
+ { MALFORMED_COMMENT_WARN, 0, "detected adjacent hyphens within the comment; consider fix-bad-comments" },
{ MALFORMED_DOCTYPE, 0, "discarding malformed " },
{ MISMATCHED_ATTRIBUTE_ERROR, 0, "%s attribute \"%s\" not allowed for %s" },
{ MISMATCHED_ATTRIBUTE_WARN, 0, "%s attribute \"%s\" not allowed for %s" },
diff --git a/src/language_fr.h b/src/language_fr.h
index 0aa4d45..aca77db 100644
--- a/src/language_fr.h
+++ b/src/language_fr.h
@@ -1,4 +1,4 @@
-#ifndef language_fr_h
+#ifndef language_fr_h
#define language_fr_h
/*
* language_fr.h
@@ -201,7 +201,7 @@ static languageDefinition language_fr = { whichPluralForm_fr, {
{ TidyFixComments, 0,
"Cette option précise si Tidy doit remplacer les traits d'unions par le caractère =
"
"s'il rencontre des traits-d'unions adjacents.
La valeur par défaut est yes.
Cette option est fourni pour les utilisateurs de Cold Fusion qui utilisent la syntaxe de "
+ ">Cette option est fourni pour les utilisateurs de ColdFusion qui utilisent la syntaxe de "
"commentaire: <!--- --->
. "
},
{ TidyFixUri, 0,
diff --git a/src/lexer.c b/src/lexer.c
index 4ff4388..4cc1f35 100755
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -2613,6 +2613,7 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
Bool isempty = no;
AttVal *attributes = NULL;
Node *node;
+ Bool fixComments = cfgBool(doc, TidyFixComments);
/* Lexer->token must be set on return. Nullify it for safety. */
lexer->token = NULL;
@@ -2772,7 +2773,11 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
continue;
}
- TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT );
+ /*
+ We only print this message if there's a missing
+ starting hyphen; this comment will be dropped.
+ */
+ TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT_DROPPING );
}
else if (c == 'd' || c == 'D')
{
@@ -3045,6 +3050,13 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
continue;
c = TY_(ReadChar)(doc->docIn);
+
+ /* Fix hyphens at beginning of tag */
+ if ( c != '-' && fixComments && lexer->txtstart - lexer->txtend == 0 )
+ {
+ lexer->lexbuf[lexer->lexsize - 1] = '=';
+ }
+
TY_(AddCharToLexer)(lexer, c);
if (c != '-')
@@ -3056,7 +3068,26 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
if (c == '>')
{
if (badcomment)
- TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT );
+ {
+ /*
+ We've got bad comments that we either fixed or
+ ignored; provide proper user feedback based on
+ doctype and whether or not we fixed them.
+ */
+ if ( (TY_(HTMLVersion)(doc) & HT50) )
+ {
+ if ( fixComments )
+ TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT );
+ /* Otherwise for HTML5, it's safe to ignore. */
+ }
+ else
+ {
+ if ( fixComments )
+ TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT );
+ else
+ TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT_WARN );
+ }
+ }
/* do not store closing -- in lexbuf */
lexer->lexsize -= 2;
@@ -3089,7 +3120,8 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
badcomment++;
- if ( cfgBool(doc, TidyFixComments) )
+ /* fix hyphens in the middle */
+ if ( fixComments )
lexer->lexbuf[lexer->lexsize - 2] = '=';
/* if '-' then look for '>' to end the comment */
@@ -3099,8 +3131,9 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
goto end_comment;
}
- /* otherwise continue to look for --> */
- lexer->lexbuf[lexer->lexsize - 1] = '=';
+ /* fix hyphens end, and continue to look for --> */
+ if ( fixComments )
+ lexer->lexbuf[lexer->lexsize - 1] = '=';
/* http://tidy.sf.net/bug/1266647 */
TY_(AddCharToLexer)(lexer, c);
@@ -3482,7 +3515,10 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
else if (lexer->state == LEX_COMMENT) /* comment */
{
if (c == EndOfStream)
- TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT );
+ {
+ /* We print this if we reached end of the stream mid-comment. */
+ TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT_EOS );
+ }
lexer->txtend = lexer->lexsize;
lexer->lexbuf[lexer->lexsize] = '\0';
diff --git a/src/message.c b/src/message.c
index 409f571..dbfd303 100755
--- a/src/message.c
+++ b/src/message.c
@@ -298,7 +298,10 @@ static struct _dispatchTable {
{ INVALID_UTF16, TidyWarning, formatEncodingReport },
{ INVALID_XML_ID, TidyWarning, formatAttributeReport },
{ JOINING_ATTRIBUTE, TidyWarning, formatAttributeReport },
- { MALFORMED_COMMENT, TidyWarning, formatStandard },
+ { MALFORMED_COMMENT, TidyInfo, formatStandard },
+ { MALFORMED_COMMENT_EOS, TidyError, formatStandard },
+ { MALFORMED_COMMENT_DROPPING, TidyWarning, formatStandard },
+ { MALFORMED_COMMENT_WARN, TidyWarning, formatStandard },
{ MALFORMED_DOCTYPE, TidyWarning, formatStandard },
{ MISMATCHED_ATTRIBUTE_ERROR, TidyError, formatAttributeReport },
{ MISMATCHED_ATTRIBUTE_WARN, TidyWarning, formatAttributeReport },
@@ -777,6 +780,9 @@ TidyMessageImpl *formatStandard(TidyDocImpl* doc, Node *element, Node *node, uin
case DOCTYPE_AFTER_TAGS:
case DUPLICATE_FRAMESET:
case MALFORMED_COMMENT:
+ case MALFORMED_COMMENT_DROPPING:
+ case MALFORMED_COMMENT_EOS:
+ case MALFORMED_COMMENT_WARN:
case MALFORMED_DOCTYPE:
case MISSING_DOCTYPE:
case MISSING_TITLE_ELEMENT:
diff --git a/src/parser.c b/src/parser.c
index 168823c..f27e726 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -69,7 +69,7 @@ Bool TY_(CheckNodeIntegrity)(Node *node)
used to determine how attributes
without values should be printed
this was introduced to deal with
- user defined tags e.g. Cold Fusion
+ user defined tags e.g. ColdFusion
*/
Bool TY_(IsNewNode)(Node *node)
{
diff --git a/src/parser.h b/src/parser.h
index 3ad42b8..c8e1b61 100644
--- a/src/parser.h
+++ b/src/parser.h
@@ -18,7 +18,7 @@ Bool TY_(TextNodeEndWithSpace)( Lexer *lexer, Node *node );
used to determine how attributes
without values should be printed
this was introduced to deal with
- user defined tags e.g. Cold Fusion
+ user defined tags e.g. ColdFusion
*/
Bool TY_(IsNewNode)(Node *node);