Address issue #476, which started out as a simple message update.

- Change default value of `--fix-bad-comments` to `no`.
  - Ensure that when _not_ fixing, nothing is actually fixed.
  - Ensure that when fixing, initial adjacent hyphens actually are fixed.
  - Issue tidyinfo for all fixes made.
  - Issue tidywarning when when not making fixes for non-HTML5 doctypes.
This commit is contained in:
Jim Derry 2017-09-24 18:15:40 -04:00
parent cd9d46b53d
commit 483e0fec9e
9 changed files with 85 additions and 21 deletions

View file

@ -210,6 +210,9 @@ extern "C" {
FN(INVALID_XML_ID) \ FN(INVALID_XML_ID) \
FN(JOINING_ATTRIBUTE) \ FN(JOINING_ATTRIBUTE) \
FN(MALFORMED_COMMENT) \ FN(MALFORMED_COMMENT) \
FN(MALFORMED_COMMENT_DROPPING) \
FN(MALFORMED_COMMENT_EOS) \
FN(MALFORMED_COMMENT_WARN) \
FN(MALFORMED_DOCTYPE) \ FN(MALFORMED_DOCTYPE) \
FN(MISMATCHED_ATTRIBUTE_ERROR) \ FN(MISMATCHED_ATTRIBUTE_ERROR) \
FN(MISMATCHED_ATTRIBUTE_WARN) \ FN(MISMATCHED_ATTRIBUTE_WARN) \

View file

@ -5,7 +5,7 @@ msgstr ""
"Plural-Forms: nplurals=2; plural=n != 1;\n" "Plural-Forms: nplurals=2; plural=n != 1;\n"
"X-Generator: HTML Tidy poconvert.rb\n" "X-Generator: HTML Tidy poconvert.rb\n"
"Project-Id-Version: \n" "Project-Id-Version: \n"
"POT-Creation-Date: 2017-09-23 07:53:22\n" "POT-Creation-Date: 2017-09-24 18:12:27\n"
"Last-Translator: jderry\n" "Last-Translator: jderry\n"
"Language-Team: \n" "Language-Team: \n"
@ -466,10 +466,12 @@ msgid ""
"This option specifies if Tidy should replace unexpected hyphens with " "This option specifies if Tidy should replace unexpected hyphens with "
"<code>=</code> characters when it comes across adjacent hyphens. " "<code>=</code> characters when it comes across adjacent hyphens. "
"<br/>" "<br/>"
"The default is <var>yes</var>. " "The default is <var>no</var>. "
"<br/>" "<br/>"
"This option is provided for users of Cold Fusion which uses the " "HTML has abandonded SGML comment syntax, and allows adjacent hypens "
"comment syntax: <code>&lt;!--- ---&gt;</code>. " "for all versions of HTML, although XML and XHTML do not. If you plan "
"to support older browsers that require SGML comment syntax, then "
"consider setting this value to <var>yes</var>."
msgstr "" msgstr ""
#. Important notes for translators: #. Important notes for translators:
@ -2300,7 +2302,19 @@ msgid "%s joining values of repeated attribute \"%s\""
msgstr "" msgstr ""
msgctxt "MALFORMED_COMMENT" msgctxt "MALFORMED_COMMENT"
msgid "adjacent hyphens within comment" msgid "tidy replaced adjacent \"-\" with \"=\""
msgstr ""
msgctxt "MALFORMED_COMMENT_DROPPING"
msgid "dropping a possible comment due to a missing hyphen"
msgstr ""
msgctxt "MALFORMED_COMMENT_EOS"
msgid "the end of the document was reached before the end of the comment"
msgstr ""
msgctxt "MALFORMED_COMMENT_WARN"
msgid "detected adjacent hyphens within the comment; consider fix-bad-comments"
msgstr "" msgstr ""
msgctxt "MALFORMED_DOCTYPE" msgctxt "MALFORMED_DOCTYPE"

View file

@ -241,7 +241,7 @@ static const TidyOptionImpl option_defs[] =
{ TidyEscapeCdata, MU, "escape-cdata", BL, no, ParsePickList, &boolPicks }, { TidyEscapeCdata, MU, "escape-cdata", BL, no, ParsePickList, &boolPicks },
{ TidyEscapeScripts, PP, "escape-scripts", BL, yes, ParsePickList, &boolPicks }, /* 20160227 - Issue #348 */ { TidyEscapeScripts, PP, "escape-scripts", BL, yes, ParsePickList, &boolPicks }, /* 20160227 - Issue #348 */
{ TidyFixBackslash, MU, "fix-backslash", BL, yes, ParsePickList, &boolPicks }, { TidyFixBackslash, MU, "fix-backslash", BL, yes, ParsePickList, &boolPicks },
{ TidyFixComments, MU, "fix-bad-comments", BL, yes, ParsePickList, &boolPicks }, { TidyFixComments, MU, "fix-bad-comments", BL, no, ParsePickList, &boolPicks },
{ TidyFixUri, MU, "fix-uri", BL, yes, ParsePickList, &boolPicks }, { TidyFixUri, MU, "fix-uri", BL, yes, ParsePickList, &boolPicks },
{ TidyForceOutput, MS, "force-output", BL, no, ParsePickList, &boolPicks }, { TidyForceOutput, MS, "force-output", BL, no, ParsePickList, &boolPicks },
{ TidyGDocClean, MU, "gdoc", BL, no, ParsePickList, &boolPicks }, { TidyGDocClean, MU, "gdoc", BL, no, ParsePickList, &boolPicks },

View file

@ -476,10 +476,12 @@ static languageDefinition language_en = { whichPluralForm_en, {
"This option specifies if Tidy should replace unexpected hyphens with " "This option specifies if Tidy should replace unexpected hyphens with "
"<code>=</code> characters when it comes across adjacent hyphens. " "<code>=</code> characters when it comes across adjacent hyphens. "
"<br/>" "<br/>"
"The default is <var>yes</var>. " "The default is <var>no</var>. "
"<br/>" "<br/>"
"This option is provided for users of Cold Fusion which uses the " "HTML has abandonded SGML comment syntax, and allows adjacent hypens "
"comment syntax: <code>&lt;!--- ---&gt;</code>. " "for all versions of HTML, although XML and XHTML do not. If you plan "
"to support older browsers that require SGML comment syntax, then "
"consider setting this value to <var>yes</var>."
}, },
{/* Important notes for translators: {/* Important notes for translators:
- Use only <code></code>, <var></var>, <em></em>, <strong></strong>, and - Use only <code></code>, <var></var>, <em></em>, <strong></strong>, and
@ -1886,7 +1888,10 @@ static languageDefinition language_en = { whichPluralForm_en, {
{ INVALID_UTF16, 0, "%s invalid UTF-16 surrogate pair (char. code %s)" }, { INVALID_UTF16, 0, "%s invalid UTF-16 surrogate pair (char. code %s)" },
{ INVALID_XML_ID, 0, "%s cannot copy name attribute to id" }, { INVALID_XML_ID, 0, "%s cannot copy name attribute to id" },
{ JOINING_ATTRIBUTE, 0, "%s joining values of repeated attribute \"%s\"" }, { JOINING_ATTRIBUTE, 0, "%s joining values of repeated attribute \"%s\"" },
{ MALFORMED_COMMENT, 0, "adjacent hyphens within comment" }, { MALFORMED_COMMENT, 0, "tidy replaced adjacent \"-\" with \"=\"" },
{ MALFORMED_COMMENT_DROPPING, 0, "dropping a possible comment due to a missing hyphen" },
{ MALFORMED_COMMENT_EOS, 0, "the end of the document was reached before the end of the comment" },
{ MALFORMED_COMMENT_WARN, 0, "detected adjacent hyphens within the comment; consider fix-bad-comments" },
{ MALFORMED_DOCTYPE, 0, "discarding malformed <!DOCTYPE>" }, { MALFORMED_DOCTYPE, 0, "discarding malformed <!DOCTYPE>" },
{ MISMATCHED_ATTRIBUTE_ERROR, 0, "%s attribute \"%s\" not allowed for %s" }, { MISMATCHED_ATTRIBUTE_ERROR, 0, "%s attribute \"%s\" not allowed for %s" },
{ MISMATCHED_ATTRIBUTE_WARN, 0, "%s attribute \"%s\" not allowed for %s" }, { MISMATCHED_ATTRIBUTE_WARN, 0, "%s attribute \"%s\" not allowed for %s" },

View file

@ -1,4 +1,4 @@
#ifndef language_fr_h #ifndef language_fr_h
#define language_fr_h #define language_fr_h
/* /*
* language_fr.h * language_fr.h

View file

@ -2613,6 +2613,7 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
Bool isempty = no; Bool isempty = no;
AttVal *attributes = NULL; AttVal *attributes = NULL;
Node *node; Node *node;
Bool fixComments = cfgBool(doc, TidyFixComments);
/* Lexer->token must be set on return. Nullify it for safety. */ /* Lexer->token must be set on return. Nullify it for safety. */
lexer->token = NULL; lexer->token = NULL;
@ -2772,7 +2773,11 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
continue; continue;
} }
TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT ); /*
We only print this message if there's a missing
starting hyphen; this comment will be dropped.
*/
TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT_DROPPING );
} }
else if (c == 'd' || c == 'D') else if (c == 'd' || c == 'D')
{ {
@ -3045,6 +3050,13 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
continue; continue;
c = TY_(ReadChar)(doc->docIn); c = TY_(ReadChar)(doc->docIn);
/* Fix hyphens at beginning of tag */
if ( c != '-' && fixComments && lexer->txtstart - lexer->txtend == 0 )
{
lexer->lexbuf[lexer->lexsize - 1] = '=';
}
TY_(AddCharToLexer)(lexer, c); TY_(AddCharToLexer)(lexer, c);
if (c != '-') if (c != '-')
@ -3056,7 +3068,26 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
if (c == '>') if (c == '>')
{ {
if (badcomment) if (badcomment)
{
/*
We've got bad comments that we either fixed or
ignored; provide proper user feedback based on
doctype and whether or not we fixed them.
*/
if ( (TY_(HTMLVersion)(doc) & HT50) )
{
if ( fixComments )
TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT ); TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT );
/* Otherwise for HTML5, it's safe to ignore. */
}
else
{
if ( fixComments )
TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT );
else
TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT_WARN );
}
}
/* do not store closing -- in lexbuf */ /* do not store closing -- in lexbuf */
lexer->lexsize -= 2; lexer->lexsize -= 2;
@ -3089,7 +3120,8 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
badcomment++; badcomment++;
if ( cfgBool(doc, TidyFixComments) ) /* fix hyphens in the middle */
if ( fixComments )
lexer->lexbuf[lexer->lexsize - 2] = '='; lexer->lexbuf[lexer->lexsize - 2] = '=';
/* if '-' then look for '>' to end the comment */ /* if '-' then look for '>' to end the comment */
@ -3099,7 +3131,8 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
goto end_comment; goto end_comment;
} }
/* otherwise continue to look for --> */ /* fix hyphens end, and continue to look for --> */
if ( fixComments )
lexer->lexbuf[lexer->lexsize - 1] = '='; lexer->lexbuf[lexer->lexsize - 1] = '=';
/* http://tidy.sf.net/bug/1266647 */ /* http://tidy.sf.net/bug/1266647 */
@ -3482,7 +3515,10 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
else if (lexer->state == LEX_COMMENT) /* comment */ else if (lexer->state == LEX_COMMENT) /* comment */
{ {
if (c == EndOfStream) if (c == EndOfStream)
TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT ); {
/* We print this if we reached end of the stream mid-comment. */
TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT_EOS );
}
lexer->txtend = lexer->lexsize; lexer->txtend = lexer->lexsize;
lexer->lexbuf[lexer->lexsize] = '\0'; lexer->lexbuf[lexer->lexsize] = '\0';

View file

@ -298,7 +298,10 @@ static struct _dispatchTable {
{ INVALID_UTF16, TidyWarning, formatEncodingReport }, { INVALID_UTF16, TidyWarning, formatEncodingReport },
{ INVALID_XML_ID, TidyWarning, formatAttributeReport }, { INVALID_XML_ID, TidyWarning, formatAttributeReport },
{ JOINING_ATTRIBUTE, TidyWarning, formatAttributeReport }, { JOINING_ATTRIBUTE, TidyWarning, formatAttributeReport },
{ MALFORMED_COMMENT, TidyWarning, formatStandard }, { MALFORMED_COMMENT, TidyInfo, formatStandard },
{ MALFORMED_COMMENT_EOS, TidyError, formatStandard },
{ MALFORMED_COMMENT_DROPPING, TidyWarning, formatStandard },
{ MALFORMED_COMMENT_WARN, TidyWarning, formatStandard },
{ MALFORMED_DOCTYPE, TidyWarning, formatStandard }, { MALFORMED_DOCTYPE, TidyWarning, formatStandard },
{ MISMATCHED_ATTRIBUTE_ERROR, TidyError, formatAttributeReport }, { MISMATCHED_ATTRIBUTE_ERROR, TidyError, formatAttributeReport },
{ MISMATCHED_ATTRIBUTE_WARN, TidyWarning, formatAttributeReport }, { MISMATCHED_ATTRIBUTE_WARN, TidyWarning, formatAttributeReport },
@ -777,6 +780,9 @@ TidyMessageImpl *formatStandard(TidyDocImpl* doc, Node *element, Node *node, uin
case DOCTYPE_AFTER_TAGS: case DOCTYPE_AFTER_TAGS:
case DUPLICATE_FRAMESET: case DUPLICATE_FRAMESET:
case MALFORMED_COMMENT: case MALFORMED_COMMENT:
case MALFORMED_COMMENT_DROPPING:
case MALFORMED_COMMENT_EOS:
case MALFORMED_COMMENT_WARN:
case MALFORMED_DOCTYPE: case MALFORMED_DOCTYPE:
case MISSING_DOCTYPE: case MISSING_DOCTYPE:
case MISSING_TITLE_ELEMENT: case MISSING_TITLE_ELEMENT: