Is. #896 - make 'bear' docs match code (#898)

* Is. #896 - make 'bear' docs match code

* Is. #487 #462 add warn msg and do not get stuck until eof

The warning message could perhaps be better worded, and maybe there
should be another msg when a '>' is encountered while looking for a ']'
in a MS Word section, and perhaps the section should be discarded...

And perhaps it should be an error, to force the user to fix...

But the fix is good as it is, and these issues can be dealt with
later...

And this fix is piggy backed on this PR, but it is likewise related to
'word-2000' option...
This commit is contained in:
Geoff McLane 2020-11-21 16:47:58 +01:00 committed by GitHub
parent 7cda3aba38
commit c6e0ccce1f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 11 additions and 7 deletions

View file

@ -610,7 +610,7 @@ typedef enum
TidyLiteralAttribs, /**< If true attributes may use newlines */
TidyLogicalEmphasis, /**< Replace i by em and b by strong */
TidyLowerLiterals, /**< Folds known attribute values to lower case */
TidyMakeBare, /**< Make bare HTML: remove Microsoft cruft */
TidyMakeBare, /**< Replace smart quotes, em dashes, etc with ASCII */
TidyMakeClean, /**< Replace presentational clutter by style rules */
TidyMark, /**< Add meta element indicating tidied doc */
TidyMergeDivs, /**< Merge multiple DIVs */

View file

@ -1893,8 +1893,7 @@ void TY_(CleanWord2000)( TidyDocImpl* doc, Node *node)
if ( nodeIsHTML(node) )
{
/* check that it's a Word 2000 document */
if ( !TY_(GetAttrByName)(node, "xmlns:o") &&
!cfgBool(doc, TidyMakeBare) )
if ( !TY_(IsWord2000) (doc) ) /* Is. #896 */
return;
/* Output proprietary attributes to maintain errout compatability

View file

@ -786,9 +786,9 @@ static languageDefinition language_en = { whichPluralForm_en, {
- The strings "Tidy" and "HTML Tidy" are the program name and must not
be translated. */
TidyMakeBare, 0,
"This option specifies if Tidy should strip Microsoft specific HTML "
"from Word 2000 documents, and output spaces rather than non-breaking "
"spaces where they exist in the input. "
"This option specifies if Tidy should replace smart quotes and em dashes with "
"ASCII, and output spaces rather than non-breaking "
"spaces, where they exist in the input. "
},
{/* Important notes for translators:
- Use only <code></code>, <var></var>, <em></em>, <strong></strong>, and

View file

@ -2777,6 +2777,7 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
}
TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT_DROPPING); /* Is. #487 */
/* else swallow characters up to and including next '>' */
while ((c = TY_(ReadChar)(doc->docIn)) != '>')
@ -3340,7 +3341,11 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
}
}
if (c != ']')
if (c == '>')
{
/* Is. #462 - reached '>' before ']' */
TY_(UngetChar)(c, doc->docIn);
} else if (c != ']')
continue;
/* now look for '>' */