* Is. #896 - make 'bear' docs match code * Is. #487 #462 add warn msg and do not get stuck until eof The warning message could perhaps be better worded, and maybe there should be another msg when a '>' is encountered while looking for a ']' in a MS Word section, and perhaps the section should be discarded... And perhaps it should be an error, to force the user to fix... But the fix is good as it is, and these issues can be dealt with later... And this fix is piggy backed on this PR, but it is likewise related to 'word-2000' option...
This commit is contained in:
parent
7cda3aba38
commit
c6e0ccce1f
|
@ -610,7 +610,7 @@ typedef enum
|
||||||
TidyLiteralAttribs, /**< If true attributes may use newlines */
|
TidyLiteralAttribs, /**< If true attributes may use newlines */
|
||||||
TidyLogicalEmphasis, /**< Replace i by em and b by strong */
|
TidyLogicalEmphasis, /**< Replace i by em and b by strong */
|
||||||
TidyLowerLiterals, /**< Folds known attribute values to lower case */
|
TidyLowerLiterals, /**< Folds known attribute values to lower case */
|
||||||
TidyMakeBare, /**< Make bare HTML: remove Microsoft cruft */
|
TidyMakeBare, /**< Replace smart quotes, em dashes, etc with ASCII */
|
||||||
TidyMakeClean, /**< Replace presentational clutter by style rules */
|
TidyMakeClean, /**< Replace presentational clutter by style rules */
|
||||||
TidyMark, /**< Add meta element indicating tidied doc */
|
TidyMark, /**< Add meta element indicating tidied doc */
|
||||||
TidyMergeDivs, /**< Merge multiple DIVs */
|
TidyMergeDivs, /**< Merge multiple DIVs */
|
||||||
|
|
|
@ -1893,8 +1893,7 @@ void TY_(CleanWord2000)( TidyDocImpl* doc, Node *node)
|
||||||
if ( nodeIsHTML(node) )
|
if ( nodeIsHTML(node) )
|
||||||
{
|
{
|
||||||
/* check that it's a Word 2000 document */
|
/* check that it's a Word 2000 document */
|
||||||
if ( !TY_(GetAttrByName)(node, "xmlns:o") &&
|
if ( !TY_(IsWord2000) (doc) ) /* Is. #896 */
|
||||||
!cfgBool(doc, TidyMakeBare) )
|
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Output proprietary attributes to maintain errout compatability
|
/* Output proprietary attributes to maintain errout compatability
|
||||||
|
|
|
@ -786,9 +786,9 @@ static languageDefinition language_en = { whichPluralForm_en, {
|
||||||
- The strings "Tidy" and "HTML Tidy" are the program name and must not
|
- The strings "Tidy" and "HTML Tidy" are the program name and must not
|
||||||
be translated. */
|
be translated. */
|
||||||
TidyMakeBare, 0,
|
TidyMakeBare, 0,
|
||||||
"This option specifies if Tidy should strip Microsoft specific HTML "
|
"This option specifies if Tidy should replace smart quotes and em dashes with "
|
||||||
"from Word 2000 documents, and output spaces rather than non-breaking "
|
"ASCII, and output spaces rather than non-breaking "
|
||||||
"spaces where they exist in the input. "
|
"spaces, where they exist in the input. "
|
||||||
},
|
},
|
||||||
{/* Important notes for translators:
|
{/* Important notes for translators:
|
||||||
- Use only <code></code>, <var></var>, <em></em>, <strong></strong>, and
|
- Use only <code></code>, <var></var>, <em></em>, <strong></strong>, and
|
||||||
|
|
|
@ -2777,6 +2777,7 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT_DROPPING); /* Is. #487 */
|
||||||
|
|
||||||
/* else swallow characters up to and including next '>' */
|
/* else swallow characters up to and including next '>' */
|
||||||
while ((c = TY_(ReadChar)(doc->docIn)) != '>')
|
while ((c = TY_(ReadChar)(doc->docIn)) != '>')
|
||||||
|
@ -3340,7 +3341,11 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (c != ']')
|
if (c == '>')
|
||||||
|
{
|
||||||
|
/* Is. #462 - reached '>' before ']' */
|
||||||
|
TY_(UngetChar)(c, doc->docIn);
|
||||||
|
} else if (c != ']')
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
/* now look for '>' */
|
/* now look for '>' */
|
||||||
|
|
Loading…
Reference in a new issue