Address issue #476, which started out as a simple message update.

- Change default value of `--fix-bad-comments` to `no`.
  - Ensure that when _not_ fixing, nothing is actually fixed.
  - Ensure that when fixing, initial adjacent hyphens actually are fixed.
  - Issue tidyinfo for all fixes made.
  - Issue tidywarning when when not making fixes for non-HTML5 doctypes.
This commit is contained in:
Jim Derry 2017-09-24 18:15:40 -04:00
parent cd9d46b53d
commit 483e0fec9e
9 changed files with 85 additions and 21 deletions

View file

@ -210,6 +210,9 @@ extern "C" {
FN(INVALID_XML_ID) \
FN(JOINING_ATTRIBUTE) \
FN(MALFORMED_COMMENT) \
FN(MALFORMED_COMMENT_DROPPING) \
FN(MALFORMED_COMMENT_EOS) \
FN(MALFORMED_COMMENT_WARN) \
FN(MALFORMED_DOCTYPE) \
FN(MISMATCHED_ATTRIBUTE_ERROR) \
FN(MISMATCHED_ATTRIBUTE_WARN) \

View file

@ -5,7 +5,7 @@ msgstr ""
"Plural-Forms: nplurals=2; plural=n != 1;\n"
"X-Generator: HTML Tidy poconvert.rb\n"
"Project-Id-Version: \n"
"POT-Creation-Date: 2017-09-23 07:53:22\n"
"POT-Creation-Date: 2017-09-24 18:12:27\n"
"Last-Translator: jderry\n"
"Language-Team: \n"
@ -466,10 +466,12 @@ msgid ""
"This option specifies if Tidy should replace unexpected hyphens with "
"<code>=</code> characters when it comes across adjacent hyphens. "
"<br/>"
"The default is <var>yes</var>. "
"The default is <var>no</var>. "
"<br/>"
"This option is provided for users of Cold Fusion which uses the "
"comment syntax: <code>&lt;!--- ---&gt;</code>. "
"HTML has abandonded SGML comment syntax, and allows adjacent hypens "
"for all versions of HTML, although XML and XHTML do not. If you plan "
"to support older browsers that require SGML comment syntax, then "
"consider setting this value to <var>yes</var>."
msgstr ""
#. Important notes for translators:
@ -2300,7 +2302,19 @@ msgid "%s joining values of repeated attribute \"%s\""
msgstr ""
msgctxt "MALFORMED_COMMENT"
msgid "adjacent hyphens within comment"
msgid "tidy replaced adjacent \"-\" with \"=\""
msgstr ""
msgctxt "MALFORMED_COMMENT_DROPPING"
msgid "dropping a possible comment due to a missing hyphen"
msgstr ""
msgctxt "MALFORMED_COMMENT_EOS"
msgid "the end of the document was reached before the end of the comment"
msgstr ""
msgctxt "MALFORMED_COMMENT_WARN"
msgid "detected adjacent hyphens within the comment; consider fix-bad-comments"
msgstr ""
msgctxt "MALFORMED_DOCTYPE"

View file

@ -241,7 +241,7 @@ static const TidyOptionImpl option_defs[] =
{ TidyEscapeCdata, MU, "escape-cdata", BL, no, ParsePickList, &boolPicks },
{ TidyEscapeScripts, PP, "escape-scripts", BL, yes, ParsePickList, &boolPicks }, /* 20160227 - Issue #348 */
{ TidyFixBackslash, MU, "fix-backslash", BL, yes, ParsePickList, &boolPicks },
{ TidyFixComments, MU, "fix-bad-comments", BL, yes, ParsePickList, &boolPicks },
{ TidyFixComments, MU, "fix-bad-comments", BL, no, ParsePickList, &boolPicks },
{ TidyFixUri, MU, "fix-uri", BL, yes, ParsePickList, &boolPicks },
{ TidyForceOutput, MS, "force-output", BL, no, ParsePickList, &boolPicks },
{ TidyGDocClean, MU, "gdoc", BL, no, ParsePickList, &boolPicks },

View file

@ -476,10 +476,12 @@ static languageDefinition language_en = { whichPluralForm_en, {
"This option specifies if Tidy should replace unexpected hyphens with "
"<code>=</code> characters when it comes across adjacent hyphens. "
"<br/>"
"The default is <var>yes</var>. "
"The default is <var>no</var>. "
"<br/>"
"This option is provided for users of Cold Fusion which uses the "
"comment syntax: <code>&lt;!--- ---&gt;</code>. "
"HTML has abandonded SGML comment syntax, and allows adjacent hypens "
"for all versions of HTML, although XML and XHTML do not. If you plan "
"to support older browsers that require SGML comment syntax, then "
"consider setting this value to <var>yes</var>."
},
{/* Important notes for translators:
- Use only <code></code>, <var></var>, <em></em>, <strong></strong>, and
@ -1886,7 +1888,10 @@ static languageDefinition language_en = { whichPluralForm_en, {
{ INVALID_UTF16, 0, "%s invalid UTF-16 surrogate pair (char. code %s)" },
{ INVALID_XML_ID, 0, "%s cannot copy name attribute to id" },
{ JOINING_ATTRIBUTE, 0, "%s joining values of repeated attribute \"%s\"" },
{ MALFORMED_COMMENT, 0, "adjacent hyphens within comment" },
{ MALFORMED_COMMENT, 0, "tidy replaced adjacent \"-\" with \"=\"" },
{ MALFORMED_COMMENT_DROPPING, 0, "dropping a possible comment due to a missing hyphen" },
{ MALFORMED_COMMENT_EOS, 0, "the end of the document was reached before the end of the comment" },
{ MALFORMED_COMMENT_WARN, 0, "detected adjacent hyphens within the comment; consider fix-bad-comments" },
{ MALFORMED_DOCTYPE, 0, "discarding malformed <!DOCTYPE>" },
{ MISMATCHED_ATTRIBUTE_ERROR, 0, "%s attribute \"%s\" not allowed for %s" },
{ MISMATCHED_ATTRIBUTE_WARN, 0, "%s attribute \"%s\" not allowed for %s" },

View file

@ -1,4 +1,4 @@
#ifndef language_fr_h
#ifndef language_fr_h
#define language_fr_h
/*
* language_fr.h
@ -201,7 +201,7 @@ static languageDefinition language_fr = { whichPluralForm_fr, {
{ TidyFixComments, 0,
"Cette option précise si Tidy doit remplacer les traits d'unions par le caractère <code>=</code> "
"s'il rencontre des traits-d'unions adjacents. <br/>La valeur par défaut est <var>yes</var>. <br/"
">Cette option est fourni pour les utilisateurs de Cold Fusion qui utilisent la syntaxe de "
">Cette option est fourni pour les utilisateurs de ColdFusion qui utilisent la syntaxe de "
"commentaire: <code>&lt;!--- ---&gt;</code>. "
},
{ TidyFixUri, 0,

View file

@ -2613,6 +2613,7 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
Bool isempty = no;
AttVal *attributes = NULL;
Node *node;
Bool fixComments = cfgBool(doc, TidyFixComments);
/* Lexer->token must be set on return. Nullify it for safety. */
lexer->token = NULL;
@ -2772,7 +2773,11 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
continue;
}
TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT );
/*
We only print this message if there's a missing
starting hyphen; this comment will be dropped.
*/
TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT_DROPPING );
}
else if (c == 'd' || c == 'D')
{
@ -3045,6 +3050,13 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
continue;
c = TY_(ReadChar)(doc->docIn);
/* Fix hyphens at beginning of tag */
if ( c != '-' && fixComments && lexer->txtstart - lexer->txtend == 0 )
{
lexer->lexbuf[lexer->lexsize - 1] = '=';
}
TY_(AddCharToLexer)(lexer, c);
if (c != '-')
@ -3056,7 +3068,26 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
if (c == '>')
{
if (badcomment)
TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT );
{
/*
We've got bad comments that we either fixed or
ignored; provide proper user feedback based on
doctype and whether or not we fixed them.
*/
if ( (TY_(HTMLVersion)(doc) & HT50) )
{
if ( fixComments )
TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT );
/* Otherwise for HTML5, it's safe to ignore. */
}
else
{
if ( fixComments )
TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT );
else
TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT_WARN );
}
}
/* do not store closing -- in lexbuf */
lexer->lexsize -= 2;
@ -3089,7 +3120,8 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
badcomment++;
if ( cfgBool(doc, TidyFixComments) )
/* fix hyphens in the middle */
if ( fixComments )
lexer->lexbuf[lexer->lexsize - 2] = '=';
/* if '-' then look for '>' to end the comment */
@ -3099,8 +3131,9 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
goto end_comment;
}
/* otherwise continue to look for --> */
lexer->lexbuf[lexer->lexsize - 1] = '=';
/* fix hyphens end, and continue to look for --> */
if ( fixComments )
lexer->lexbuf[lexer->lexsize - 1] = '=';
/* http://tidy.sf.net/bug/1266647 */
TY_(AddCharToLexer)(lexer, c);
@ -3482,7 +3515,10 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
else if (lexer->state == LEX_COMMENT) /* comment */
{
if (c == EndOfStream)
TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT );
{
/* We print this if we reached end of the stream mid-comment. */
TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT_EOS );
}
lexer->txtend = lexer->lexsize;
lexer->lexbuf[lexer->lexsize] = '\0';

View file

@ -298,7 +298,10 @@ static struct _dispatchTable {
{ INVALID_UTF16, TidyWarning, formatEncodingReport },
{ INVALID_XML_ID, TidyWarning, formatAttributeReport },
{ JOINING_ATTRIBUTE, TidyWarning, formatAttributeReport },
{ MALFORMED_COMMENT, TidyWarning, formatStandard },
{ MALFORMED_COMMENT, TidyInfo, formatStandard },
{ MALFORMED_COMMENT_EOS, TidyError, formatStandard },
{ MALFORMED_COMMENT_DROPPING, TidyWarning, formatStandard },
{ MALFORMED_COMMENT_WARN, TidyWarning, formatStandard },
{ MALFORMED_DOCTYPE, TidyWarning, formatStandard },
{ MISMATCHED_ATTRIBUTE_ERROR, TidyError, formatAttributeReport },
{ MISMATCHED_ATTRIBUTE_WARN, TidyWarning, formatAttributeReport },
@ -777,6 +780,9 @@ TidyMessageImpl *formatStandard(TidyDocImpl* doc, Node *element, Node *node, uin
case DOCTYPE_AFTER_TAGS:
case DUPLICATE_FRAMESET:
case MALFORMED_COMMENT:
case MALFORMED_COMMENT_DROPPING:
case MALFORMED_COMMENT_EOS:
case MALFORMED_COMMENT_WARN:
case MALFORMED_DOCTYPE:
case MISSING_DOCTYPE:
case MISSING_TITLE_ELEMENT:

View file

@ -69,7 +69,7 @@ Bool TY_(CheckNodeIntegrity)(Node *node)
used to determine how attributes
without values should be printed
this was introduced to deal with
user defined tags e.g. Cold Fusion
user defined tags e.g. ColdFusion
*/
Bool TY_(IsNewNode)(Node *node)
{

View file

@ -18,7 +18,7 @@ Bool TY_(TextNodeEndWithSpace)( Lexer *lexer, Node *node );
used to determine how attributes
without values should be printed
this was introduced to deal with
user defined tags e.g. Cold Fusion
user defined tags e.g. ColdFusion
*/
Bool TY_(IsNewNode)(Node *node);