Address issue #476, which started out as a simple message update.

- Change default value of `--fix-bad-comments` to `no`.
  - Ensure that when _not_ fixing, nothing is actually fixed.
  - Ensure that when fixing, initial adjacent hyphens actually are fixed.
  - Issue tidyinfo for all fixes made.
  - Issue tidywarning when when not making fixes for non-HTML5 doctypes.
This commit is contained in:
Jim Derry 2017-09-24 18:15:40 -04:00
parent cd9d46b53d
commit 483e0fec9e
9 changed files with 85 additions and 21 deletions

View file

@ -210,6 +210,9 @@ extern "C" {
FN(INVALID_XML_ID) \ FN(INVALID_XML_ID) \
FN(JOINING_ATTRIBUTE) \ FN(JOINING_ATTRIBUTE) \
FN(MALFORMED_COMMENT) \ FN(MALFORMED_COMMENT) \
FN(MALFORMED_COMMENT_DROPPING) \
FN(MALFORMED_COMMENT_EOS) \
FN(MALFORMED_COMMENT_WARN) \
FN(MALFORMED_DOCTYPE) \ FN(MALFORMED_DOCTYPE) \
FN(MISMATCHED_ATTRIBUTE_ERROR) \ FN(MISMATCHED_ATTRIBUTE_ERROR) \
FN(MISMATCHED_ATTRIBUTE_WARN) \ FN(MISMATCHED_ATTRIBUTE_WARN) \

View file

@ -5,7 +5,7 @@ msgstr ""
"Plural-Forms: nplurals=2; plural=n != 1;\n" "Plural-Forms: nplurals=2; plural=n != 1;\n"
"X-Generator: HTML Tidy poconvert.rb\n" "X-Generator: HTML Tidy poconvert.rb\n"
"Project-Id-Version: \n" "Project-Id-Version: \n"
"POT-Creation-Date: 2017-09-23 07:53:22\n" "POT-Creation-Date: 2017-09-24 18:12:27\n"
"Last-Translator: jderry\n" "Last-Translator: jderry\n"
"Language-Team: \n" "Language-Team: \n"
@ -466,10 +466,12 @@ msgid ""
"This option specifies if Tidy should replace unexpected hyphens with " "This option specifies if Tidy should replace unexpected hyphens with "
"<code>=</code> characters when it comes across adjacent hyphens. " "<code>=</code> characters when it comes across adjacent hyphens. "
"<br/>" "<br/>"
"The default is <var>yes</var>. " "The default is <var>no</var>. "
"<br/>" "<br/>"
"This option is provided for users of Cold Fusion which uses the " "HTML has abandonded SGML comment syntax, and allows adjacent hypens "
"comment syntax: <code>&lt;!--- ---&gt;</code>. " "for all versions of HTML, although XML and XHTML do not. If you plan "
"to support older browsers that require SGML comment syntax, then "
"consider setting this value to <var>yes</var>."
msgstr "" msgstr ""
#. Important notes for translators: #. Important notes for translators:
@ -2300,7 +2302,19 @@ msgid "%s joining values of repeated attribute \"%s\""
msgstr "" msgstr ""
msgctxt "MALFORMED_COMMENT" msgctxt "MALFORMED_COMMENT"
msgid "adjacent hyphens within comment" msgid "tidy replaced adjacent \"-\" with \"=\""
msgstr ""
msgctxt "MALFORMED_COMMENT_DROPPING"
msgid "dropping a possible comment due to a missing hyphen"
msgstr ""
msgctxt "MALFORMED_COMMENT_EOS"
msgid "the end of the document was reached before the end of the comment"
msgstr ""
msgctxt "MALFORMED_COMMENT_WARN"
msgid "detected adjacent hyphens within the comment; consider fix-bad-comments"
msgstr "" msgstr ""
msgctxt "MALFORMED_DOCTYPE" msgctxt "MALFORMED_DOCTYPE"

View file

@ -241,7 +241,7 @@ static const TidyOptionImpl option_defs[] =
{ TidyEscapeCdata, MU, "escape-cdata", BL, no, ParsePickList, &boolPicks }, { TidyEscapeCdata, MU, "escape-cdata", BL, no, ParsePickList, &boolPicks },
{ TidyEscapeScripts, PP, "escape-scripts", BL, yes, ParsePickList, &boolPicks }, /* 20160227 - Issue #348 */ { TidyEscapeScripts, PP, "escape-scripts", BL, yes, ParsePickList, &boolPicks }, /* 20160227 - Issue #348 */
{ TidyFixBackslash, MU, "fix-backslash", BL, yes, ParsePickList, &boolPicks }, { TidyFixBackslash, MU, "fix-backslash", BL, yes, ParsePickList, &boolPicks },
{ TidyFixComments, MU, "fix-bad-comments", BL, yes, ParsePickList, &boolPicks }, { TidyFixComments, MU, "fix-bad-comments", BL, no, ParsePickList, &boolPicks },
{ TidyFixUri, MU, "fix-uri", BL, yes, ParsePickList, &boolPicks }, { TidyFixUri, MU, "fix-uri", BL, yes, ParsePickList, &boolPicks },
{ TidyForceOutput, MS, "force-output", BL, no, ParsePickList, &boolPicks }, { TidyForceOutput, MS, "force-output", BL, no, ParsePickList, &boolPicks },
{ TidyGDocClean, MU, "gdoc", BL, no, ParsePickList, &boolPicks }, { TidyGDocClean, MU, "gdoc", BL, no, ParsePickList, &boolPicks },

View file

@ -476,10 +476,12 @@ static languageDefinition language_en = { whichPluralForm_en, {
"This option specifies if Tidy should replace unexpected hyphens with " "This option specifies if Tidy should replace unexpected hyphens with "
"<code>=</code> characters when it comes across adjacent hyphens. " "<code>=</code> characters when it comes across adjacent hyphens. "
"<br/>" "<br/>"
"The default is <var>yes</var>. " "The default is <var>no</var>. "
"<br/>" "<br/>"
"This option is provided for users of Cold Fusion which uses the " "HTML has abandonded SGML comment syntax, and allows adjacent hypens "
"comment syntax: <code>&lt;!--- ---&gt;</code>. " "for all versions of HTML, although XML and XHTML do not. If you plan "
"to support older browsers that require SGML comment syntax, then "
"consider setting this value to <var>yes</var>."
}, },
{/* Important notes for translators: {/* Important notes for translators:
- Use only <code></code>, <var></var>, <em></em>, <strong></strong>, and - Use only <code></code>, <var></var>, <em></em>, <strong></strong>, and
@ -1886,7 +1888,10 @@ static languageDefinition language_en = { whichPluralForm_en, {
{ INVALID_UTF16, 0, "%s invalid UTF-16 surrogate pair (char. code %s)" }, { INVALID_UTF16, 0, "%s invalid UTF-16 surrogate pair (char. code %s)" },
{ INVALID_XML_ID, 0, "%s cannot copy name attribute to id" }, { INVALID_XML_ID, 0, "%s cannot copy name attribute to id" },
{ JOINING_ATTRIBUTE, 0, "%s joining values of repeated attribute \"%s\"" }, { JOINING_ATTRIBUTE, 0, "%s joining values of repeated attribute \"%s\"" },
{ MALFORMED_COMMENT, 0, "adjacent hyphens within comment" }, { MALFORMED_COMMENT, 0, "tidy replaced adjacent \"-\" with \"=\"" },
{ MALFORMED_COMMENT_DROPPING, 0, "dropping a possible comment due to a missing hyphen" },
{ MALFORMED_COMMENT_EOS, 0, "the end of the document was reached before the end of the comment" },
{ MALFORMED_COMMENT_WARN, 0, "detected adjacent hyphens within the comment; consider fix-bad-comments" },
{ MALFORMED_DOCTYPE, 0, "discarding malformed <!DOCTYPE>" }, { MALFORMED_DOCTYPE, 0, "discarding malformed <!DOCTYPE>" },
{ MISMATCHED_ATTRIBUTE_ERROR, 0, "%s attribute \"%s\" not allowed for %s" }, { MISMATCHED_ATTRIBUTE_ERROR, 0, "%s attribute \"%s\" not allowed for %s" },
{ MISMATCHED_ATTRIBUTE_WARN, 0, "%s attribute \"%s\" not allowed for %s" }, { MISMATCHED_ATTRIBUTE_WARN, 0, "%s attribute \"%s\" not allowed for %s" },

View file

@ -1,4 +1,4 @@
#ifndef language_fr_h #ifndef language_fr_h
#define language_fr_h #define language_fr_h
/* /*
* language_fr.h * language_fr.h
@ -201,7 +201,7 @@ static languageDefinition language_fr = { whichPluralForm_fr, {
{ TidyFixComments, 0, { TidyFixComments, 0,
"Cette option précise si Tidy doit remplacer les traits d'unions par le caractère <code>=</code> " "Cette option précise si Tidy doit remplacer les traits d'unions par le caractère <code>=</code> "
"s'il rencontre des traits-d'unions adjacents. <br/>La valeur par défaut est <var>yes</var>. <br/" "s'il rencontre des traits-d'unions adjacents. <br/>La valeur par défaut est <var>yes</var>. <br/"
">Cette option est fourni pour les utilisateurs de Cold Fusion qui utilisent la syntaxe de " ">Cette option est fourni pour les utilisateurs de ColdFusion qui utilisent la syntaxe de "
"commentaire: <code>&lt;!--- ---&gt;</code>. " "commentaire: <code>&lt;!--- ---&gt;</code>. "
}, },
{ TidyFixUri, 0, { TidyFixUri, 0,

View file

@ -2613,6 +2613,7 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
Bool isempty = no; Bool isempty = no;
AttVal *attributes = NULL; AttVal *attributes = NULL;
Node *node; Node *node;
Bool fixComments = cfgBool(doc, TidyFixComments);
/* Lexer->token must be set on return. Nullify it for safety. */ /* Lexer->token must be set on return. Nullify it for safety. */
lexer->token = NULL; lexer->token = NULL;
@ -2772,7 +2773,11 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
continue; continue;
} }
TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT ); /*
We only print this message if there's a missing
starting hyphen; this comment will be dropped.
*/
TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT_DROPPING );
} }
else if (c == 'd' || c == 'D') else if (c == 'd' || c == 'D')
{ {
@ -3045,6 +3050,13 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
continue; continue;
c = TY_(ReadChar)(doc->docIn); c = TY_(ReadChar)(doc->docIn);
/* Fix hyphens at beginning of tag */
if ( c != '-' && fixComments && lexer->txtstart - lexer->txtend == 0 )
{
lexer->lexbuf[lexer->lexsize - 1] = '=';
}
TY_(AddCharToLexer)(lexer, c); TY_(AddCharToLexer)(lexer, c);
if (c != '-') if (c != '-')
@ -3056,7 +3068,26 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
if (c == '>') if (c == '>')
{ {
if (badcomment) if (badcomment)
{
/*
We've got bad comments that we either fixed or
ignored; provide proper user feedback based on
doctype and whether or not we fixed them.
*/
if ( (TY_(HTMLVersion)(doc) & HT50) )
{
if ( fixComments )
TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT ); TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT );
/* Otherwise for HTML5, it's safe to ignore. */
}
else
{
if ( fixComments )
TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT );
else
TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT_WARN );
}
}
/* do not store closing -- in lexbuf */ /* do not store closing -- in lexbuf */
lexer->lexsize -= 2; lexer->lexsize -= 2;
@ -3089,7 +3120,8 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
badcomment++; badcomment++;
if ( cfgBool(doc, TidyFixComments) ) /* fix hyphens in the middle */
if ( fixComments )
lexer->lexbuf[lexer->lexsize - 2] = '='; lexer->lexbuf[lexer->lexsize - 2] = '=';
/* if '-' then look for '>' to end the comment */ /* if '-' then look for '>' to end the comment */
@ -3099,7 +3131,8 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
goto end_comment; goto end_comment;
} }
/* otherwise continue to look for --> */ /* fix hyphens end, and continue to look for --> */
if ( fixComments )
lexer->lexbuf[lexer->lexsize - 1] = '='; lexer->lexbuf[lexer->lexsize - 1] = '=';
/* http://tidy.sf.net/bug/1266647 */ /* http://tidy.sf.net/bug/1266647 */
@ -3482,7 +3515,10 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
else if (lexer->state == LEX_COMMENT) /* comment */ else if (lexer->state == LEX_COMMENT) /* comment */
{ {
if (c == EndOfStream) if (c == EndOfStream)
TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT ); {
/* We print this if we reached end of the stream mid-comment. */
TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT_EOS );
}
lexer->txtend = lexer->lexsize; lexer->txtend = lexer->lexsize;
lexer->lexbuf[lexer->lexsize] = '\0'; lexer->lexbuf[lexer->lexsize] = '\0';

View file

@ -298,7 +298,10 @@ static struct _dispatchTable {
{ INVALID_UTF16, TidyWarning, formatEncodingReport }, { INVALID_UTF16, TidyWarning, formatEncodingReport },
{ INVALID_XML_ID, TidyWarning, formatAttributeReport }, { INVALID_XML_ID, TidyWarning, formatAttributeReport },
{ JOINING_ATTRIBUTE, TidyWarning, formatAttributeReport }, { JOINING_ATTRIBUTE, TidyWarning, formatAttributeReport },
{ MALFORMED_COMMENT, TidyWarning, formatStandard }, { MALFORMED_COMMENT, TidyInfo, formatStandard },
{ MALFORMED_COMMENT_EOS, TidyError, formatStandard },
{ MALFORMED_COMMENT_DROPPING, TidyWarning, formatStandard },
{ MALFORMED_COMMENT_WARN, TidyWarning, formatStandard },
{ MALFORMED_DOCTYPE, TidyWarning, formatStandard }, { MALFORMED_DOCTYPE, TidyWarning, formatStandard },
{ MISMATCHED_ATTRIBUTE_ERROR, TidyError, formatAttributeReport }, { MISMATCHED_ATTRIBUTE_ERROR, TidyError, formatAttributeReport },
{ MISMATCHED_ATTRIBUTE_WARN, TidyWarning, formatAttributeReport }, { MISMATCHED_ATTRIBUTE_WARN, TidyWarning, formatAttributeReport },
@ -777,6 +780,9 @@ TidyMessageImpl *formatStandard(TidyDocImpl* doc, Node *element, Node *node, uin
case DOCTYPE_AFTER_TAGS: case DOCTYPE_AFTER_TAGS:
case DUPLICATE_FRAMESET: case DUPLICATE_FRAMESET:
case MALFORMED_COMMENT: case MALFORMED_COMMENT:
case MALFORMED_COMMENT_DROPPING:
case MALFORMED_COMMENT_EOS:
case MALFORMED_COMMENT_WARN:
case MALFORMED_DOCTYPE: case MALFORMED_DOCTYPE:
case MISSING_DOCTYPE: case MISSING_DOCTYPE:
case MISSING_TITLE_ELEMENT: case MISSING_TITLE_ELEMENT:

View file

@ -69,7 +69,7 @@ Bool TY_(CheckNodeIntegrity)(Node *node)
used to determine how attributes used to determine how attributes
without values should be printed without values should be printed
this was introduced to deal with this was introduced to deal with
user defined tags e.g. Cold Fusion user defined tags e.g. ColdFusion
*/ */
Bool TY_(IsNewNode)(Node *node) Bool TY_(IsNewNode)(Node *node)
{ {

View file

@ -18,7 +18,7 @@ Bool TY_(TextNodeEndWithSpace)( Lexer *lexer, Node *node );
used to determine how attributes used to determine how attributes
without values should be printed without values should be printed
this was introduced to deal with this was introduced to deal with
user defined tags e.g. Cold Fusion user defined tags e.g. ColdFusion
*/ */
Bool TY_(IsNewNode)(Node *node); Bool TY_(IsNewNode)(Node *node);