diff --git a/include/tidyenum.h b/include/tidyenum.h index 6f4ff19..d88e86d 100755 --- a/include/tidyenum.h +++ b/include/tidyenum.h @@ -210,6 +210,9 @@ extern "C" { FN(INVALID_XML_ID) \ FN(JOINING_ATTRIBUTE) \ FN(MALFORMED_COMMENT) \ + FN(MALFORMED_COMMENT_DROPPING) \ + FN(MALFORMED_COMMENT_EOS) \ + FN(MALFORMED_COMMENT_WARN) \ FN(MALFORMED_DOCTYPE) \ FN(MISMATCHED_ATTRIBUTE_ERROR) \ FN(MISMATCHED_ATTRIBUTE_WARN) \ diff --git a/localize/translations/tidy.pot b/localize/translations/tidy.pot index 3a5c734..3201df1 100644 --- a/localize/translations/tidy.pot +++ b/localize/translations/tidy.pot @@ -5,7 +5,7 @@ msgstr "" "Plural-Forms: nplurals=2; plural=n != 1;\n" "X-Generator: HTML Tidy poconvert.rb\n" "Project-Id-Version: \n" -"POT-Creation-Date: 2017-09-23 07:53:22\n" +"POT-Creation-Date: 2017-09-24 18:12:27\n" "Last-Translator: jderry\n" "Language-Team: \n" @@ -466,10 +466,12 @@ msgid "" "This option specifies if Tidy should replace unexpected hyphens with " "= characters when it comes across adjacent hyphens. " "
" -"The default is yes. " +"The default is no. " "
" -"This option is provided for users of Cold Fusion which uses the " -"comment syntax: <!--- --->. " +"HTML has abandonded SGML comment syntax, and allows adjacent hypens " +"for all versions of HTML, although XML and XHTML do not. If you plan " +"to support older browsers that require SGML comment syntax, then " +"consider setting this value to yes." msgstr "" #. Important notes for translators: @@ -2300,7 +2302,19 @@ msgid "%s joining values of repeated attribute \"%s\"" msgstr "" msgctxt "MALFORMED_COMMENT" -msgid "adjacent hyphens within comment" +msgid "tidy replaced adjacent \"-\" with \"=\"" +msgstr "" + +msgctxt "MALFORMED_COMMENT_DROPPING" +msgid "dropping a possible comment due to a missing hyphen" +msgstr "" + +msgctxt "MALFORMED_COMMENT_EOS" +msgid "the end of the document was reached before the end of the comment" +msgstr "" + +msgctxt "MALFORMED_COMMENT_WARN" +msgid "detected adjacent hyphens within the comment; consider fix-bad-comments" msgstr "" msgctxt "MALFORMED_DOCTYPE" diff --git a/src/config.c b/src/config.c index 8de74c6..9a27b37 100644 --- a/src/config.c +++ b/src/config.c @@ -241,7 +241,7 @@ static const TidyOptionImpl option_defs[] = { TidyEscapeCdata, MU, "escape-cdata", BL, no, ParsePickList, &boolPicks }, { TidyEscapeScripts, PP, "escape-scripts", BL, yes, ParsePickList, &boolPicks }, /* 20160227 - Issue #348 */ { TidyFixBackslash, MU, "fix-backslash", BL, yes, ParsePickList, &boolPicks }, - { TidyFixComments, MU, "fix-bad-comments", BL, yes, ParsePickList, &boolPicks }, + { TidyFixComments, MU, "fix-bad-comments", BL, no, ParsePickList, &boolPicks }, { TidyFixUri, MU, "fix-uri", BL, yes, ParsePickList, &boolPicks }, { TidyForceOutput, MS, "force-output", BL, no, ParsePickList, &boolPicks }, { TidyGDocClean, MU, "gdoc", BL, no, ParsePickList, &boolPicks }, diff --git a/src/language_en.h b/src/language_en.h index 18db40c..f97bfd4 100755 --- a/src/language_en.h +++ b/src/language_en.h @@ -476,10 +476,12 @@ static languageDefinition language_en = { whichPluralForm_en, { "This option specifies if Tidy should replace unexpected hyphens with " "= characters when it comes across adjacent hyphens. " "
" - "The default is yes. " + "The default is no. " "
" - "This option is provided for users of Cold Fusion which uses the " - "comment syntax: <!--- --->. " + "HTML has abandonded SGML comment syntax, and allows adjacent hypens " + "for all versions of HTML, although XML and XHTML do not. If you plan " + "to support older browsers that require SGML comment syntax, then " + "consider setting this value to yes." }, {/* Important notes for translators: - Use only , , , , and @@ -1886,7 +1888,10 @@ static languageDefinition language_en = { whichPluralForm_en, { { INVALID_UTF16, 0, "%s invalid UTF-16 surrogate pair (char. code %s)" }, { INVALID_XML_ID, 0, "%s cannot copy name attribute to id" }, { JOINING_ATTRIBUTE, 0, "%s joining values of repeated attribute \"%s\"" }, - { MALFORMED_COMMENT, 0, "adjacent hyphens within comment" }, + { MALFORMED_COMMENT, 0, "tidy replaced adjacent \"-\" with \"=\"" }, + { MALFORMED_COMMENT_DROPPING, 0, "dropping a possible comment due to a missing hyphen" }, + { MALFORMED_COMMENT_EOS, 0, "the end of the document was reached before the end of the comment" }, + { MALFORMED_COMMENT_WARN, 0, "detected adjacent hyphens within the comment; consider fix-bad-comments" }, { MALFORMED_DOCTYPE, 0, "discarding malformed " }, { MISMATCHED_ATTRIBUTE_ERROR, 0, "%s attribute \"%s\" not allowed for %s" }, { MISMATCHED_ATTRIBUTE_WARN, 0, "%s attribute \"%s\" not allowed for %s" }, diff --git a/src/language_fr.h b/src/language_fr.h index 0aa4d45..aca77db 100644 --- a/src/language_fr.h +++ b/src/language_fr.h @@ -1,4 +1,4 @@ -#ifndef language_fr_h +#ifndef language_fr_h #define language_fr_h /* * language_fr.h @@ -201,7 +201,7 @@ static languageDefinition language_fr = { whichPluralForm_fr, { { TidyFixComments, 0, "Cette option précise si Tidy doit remplacer les traits d'unions par le caractère = " "s'il rencontre des traits-d'unions adjacents.
La valeur par défaut est yes.
Cette option est fourni pour les utilisateurs de Cold Fusion qui utilisent la syntaxe de " + ">Cette option est fourni pour les utilisateurs de ColdFusion qui utilisent la syntaxe de " "commentaire: <!--- --->. " }, { TidyFixUri, 0, diff --git a/src/lexer.c b/src/lexer.c index 4ff4388..4cc1f35 100755 --- a/src/lexer.c +++ b/src/lexer.c @@ -2613,6 +2613,7 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode ) Bool isempty = no; AttVal *attributes = NULL; Node *node; + Bool fixComments = cfgBool(doc, TidyFixComments); /* Lexer->token must be set on return. Nullify it for safety. */ lexer->token = NULL; @@ -2772,7 +2773,11 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode ) continue; } - TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT ); + /* + We only print this message if there's a missing + starting hyphen; this comment will be dropped. + */ + TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT_DROPPING ); } else if (c == 'd' || c == 'D') { @@ -3045,6 +3050,13 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode ) continue; c = TY_(ReadChar)(doc->docIn); + + /* Fix hyphens at beginning of tag */ + if ( c != '-' && fixComments && lexer->txtstart - lexer->txtend == 0 ) + { + lexer->lexbuf[lexer->lexsize - 1] = '='; + } + TY_(AddCharToLexer)(lexer, c); if (c != '-') @@ -3056,7 +3068,26 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode ) if (c == '>') { if (badcomment) - TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT ); + { + /* + We've got bad comments that we either fixed or + ignored; provide proper user feedback based on + doctype and whether or not we fixed them. + */ + if ( (TY_(HTMLVersion)(doc) & HT50) ) + { + if ( fixComments ) + TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT ); + /* Otherwise for HTML5, it's safe to ignore. */ + } + else + { + if ( fixComments ) + TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT ); + else + TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT_WARN ); + } + } /* do not store closing -- in lexbuf */ lexer->lexsize -= 2; @@ -3089,7 +3120,8 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode ) badcomment++; - if ( cfgBool(doc, TidyFixComments) ) + /* fix hyphens in the middle */ + if ( fixComments ) lexer->lexbuf[lexer->lexsize - 2] = '='; /* if '-' then look for '>' to end the comment */ @@ -3099,8 +3131,9 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode ) goto end_comment; } - /* otherwise continue to look for --> */ - lexer->lexbuf[lexer->lexsize - 1] = '='; + /* fix hyphens end, and continue to look for --> */ + if ( fixComments ) + lexer->lexbuf[lexer->lexsize - 1] = '='; /* http://tidy.sf.net/bug/1266647 */ TY_(AddCharToLexer)(lexer, c); @@ -3482,7 +3515,10 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode ) else if (lexer->state == LEX_COMMENT) /* comment */ { if (c == EndOfStream) - TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT ); + { + /* We print this if we reached end of the stream mid-comment. */ + TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT_EOS ); + } lexer->txtend = lexer->lexsize; lexer->lexbuf[lexer->lexsize] = '\0'; diff --git a/src/message.c b/src/message.c index 409f571..dbfd303 100755 --- a/src/message.c +++ b/src/message.c @@ -298,7 +298,10 @@ static struct _dispatchTable { { INVALID_UTF16, TidyWarning, formatEncodingReport }, { INVALID_XML_ID, TidyWarning, formatAttributeReport }, { JOINING_ATTRIBUTE, TidyWarning, formatAttributeReport }, - { MALFORMED_COMMENT, TidyWarning, formatStandard }, + { MALFORMED_COMMENT, TidyInfo, formatStandard }, + { MALFORMED_COMMENT_EOS, TidyError, formatStandard }, + { MALFORMED_COMMENT_DROPPING, TidyWarning, formatStandard }, + { MALFORMED_COMMENT_WARN, TidyWarning, formatStandard }, { MALFORMED_DOCTYPE, TidyWarning, formatStandard }, { MISMATCHED_ATTRIBUTE_ERROR, TidyError, formatAttributeReport }, { MISMATCHED_ATTRIBUTE_WARN, TidyWarning, formatAttributeReport }, @@ -777,6 +780,9 @@ TidyMessageImpl *formatStandard(TidyDocImpl* doc, Node *element, Node *node, uin case DOCTYPE_AFTER_TAGS: case DUPLICATE_FRAMESET: case MALFORMED_COMMENT: + case MALFORMED_COMMENT_DROPPING: + case MALFORMED_COMMENT_EOS: + case MALFORMED_COMMENT_WARN: case MALFORMED_DOCTYPE: case MISSING_DOCTYPE: case MISSING_TITLE_ELEMENT: diff --git a/src/parser.c b/src/parser.c index 168823c..f27e726 100644 --- a/src/parser.c +++ b/src/parser.c @@ -69,7 +69,7 @@ Bool TY_(CheckNodeIntegrity)(Node *node) used to determine how attributes without values should be printed this was introduced to deal with - user defined tags e.g. Cold Fusion + user defined tags e.g. ColdFusion */ Bool TY_(IsNewNode)(Node *node) { diff --git a/src/parser.h b/src/parser.h index 3ad42b8..c8e1b61 100644 --- a/src/parser.h +++ b/src/parser.h @@ -18,7 +18,7 @@ Bool TY_(TextNodeEndWithSpace)( Lexer *lexer, Node *node ); used to determine how attributes without values should be printed this was introduced to deal with - user defined tags e.g. Cold Fusion + user defined tags e.g. ColdFusion */ Bool TY_(IsNewNode)(Node *node);