* Is. #896 - make 'bear' docs match code * Is. #487 #462 add warn msg and do not get stuck until eof The warning message could perhaps be better worded, and maybe there should be another msg when a '>' is encountered while looking for a ']' in a MS Word section, and perhaps the section should be discarded... And perhaps it should be an error, to force the user to fix... But the fix is good as it is, and these issues can be dealt with later... And this fix is piggy backed on this PR, but it is likewise related to 'word-2000' option...
This commit is contained in:
parent
7cda3aba38
commit
c6e0ccce1f
|
@ -610,7 +610,7 @@ typedef enum
|
|||
TidyLiteralAttribs, /**< If true attributes may use newlines */
|
||||
TidyLogicalEmphasis, /**< Replace i by em and b by strong */
|
||||
TidyLowerLiterals, /**< Folds known attribute values to lower case */
|
||||
TidyMakeBare, /**< Make bare HTML: remove Microsoft cruft */
|
||||
TidyMakeBare, /**< Replace smart quotes, em dashes, etc with ASCII */
|
||||
TidyMakeClean, /**< Replace presentational clutter by style rules */
|
||||
TidyMark, /**< Add meta element indicating tidied doc */
|
||||
TidyMergeDivs, /**< Merge multiple DIVs */
|
||||
|
|
|
@ -1893,8 +1893,7 @@ void TY_(CleanWord2000)( TidyDocImpl* doc, Node *node)
|
|||
if ( nodeIsHTML(node) )
|
||||
{
|
||||
/* check that it's a Word 2000 document */
|
||||
if ( !TY_(GetAttrByName)(node, "xmlns:o") &&
|
||||
!cfgBool(doc, TidyMakeBare) )
|
||||
if ( !TY_(IsWord2000) (doc) ) /* Is. #896 */
|
||||
return;
|
||||
|
||||
/* Output proprietary attributes to maintain errout compatability
|
||||
|
|
|
@ -786,9 +786,9 @@ static languageDefinition language_en = { whichPluralForm_en, {
|
|||
- The strings "Tidy" and "HTML Tidy" are the program name and must not
|
||||
be translated. */
|
||||
TidyMakeBare, 0,
|
||||
"This option specifies if Tidy should strip Microsoft specific HTML "
|
||||
"from Word 2000 documents, and output spaces rather than non-breaking "
|
||||
"spaces where they exist in the input. "
|
||||
"This option specifies if Tidy should replace smart quotes and em dashes with "
|
||||
"ASCII, and output spaces rather than non-breaking "
|
||||
"spaces, where they exist in the input. "
|
||||
},
|
||||
{/* Important notes for translators:
|
||||
- Use only <code></code>, <var></var>, <em></em>, <strong></strong>, and
|
||||
|
|
|
@ -2777,6 +2777,7 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
|
|||
}
|
||||
|
||||
|
||||
TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT_DROPPING); /* Is. #487 */
|
||||
|
||||
/* else swallow characters up to and including next '>' */
|
||||
while ((c = TY_(ReadChar)(doc->docIn)) != '>')
|
||||
|
@ -3340,7 +3341,11 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
|
|||
}
|
||||
}
|
||||
|
||||
if (c != ']')
|
||||
if (c == '>')
|
||||
{
|
||||
/* Is. #462 - reached '>' before ']' */
|
||||
TY_(UngetChar)(c, doc->docIn);
|
||||
} else if (c != ']')
|
||||
continue;
|
||||
|
||||
/* now look for '>' */
|
||||
|
|
Loading…
Reference in a new issue