diff --git a/include/tidyenum.h b/include/tidyenum.h
index 7b35fbb..e34d1ca 100644
--- a/include/tidyenum.h
+++ b/include/tidyenum.h
@@ -610,7 +610,7 @@ typedef enum
TidyLiteralAttribs, /**< If true attributes may use newlines */
TidyLogicalEmphasis, /**< Replace i by em and b by strong */
TidyLowerLiterals, /**< Folds known attribute values to lower case */
- TidyMakeBare, /**< Make bare HTML: remove Microsoft cruft */
+ TidyMakeBare, /**< Replace smart quotes, em dashes, etc with ASCII */
TidyMakeClean, /**< Replace presentational clutter by style rules */
TidyMark, /**< Add meta element indicating tidied doc */
TidyMergeDivs, /**< Merge multiple DIVs */
diff --git a/src/clean.c b/src/clean.c
index dc6cac1..6602ff9 100644
--- a/src/clean.c
+++ b/src/clean.c
@@ -1893,8 +1893,7 @@ void TY_(CleanWord2000)( TidyDocImpl* doc, Node *node)
if ( nodeIsHTML(node) )
{
/* check that it's a Word 2000 document */
- if ( !TY_(GetAttrByName)(node, "xmlns:o") &&
- !cfgBool(doc, TidyMakeBare) )
+ if ( !TY_(IsWord2000) (doc) ) /* Is. #896 */
return;
/* Output proprietary attributes to maintain errout compatability
diff --git a/src/language_en.h b/src/language_en.h
index 8d0eb7a..eab5567 100644
--- a/src/language_en.h
+++ b/src/language_en.h
@@ -786,9 +786,9 @@ static languageDefinition language_en = { whichPluralForm_en, {
- The strings "Tidy" and "HTML Tidy" are the program name and must not
be translated. */
TidyMakeBare, 0,
- "This option specifies if Tidy should strip Microsoft specific HTML "
- "from Word 2000 documents, and output spaces rather than non-breaking "
- "spaces where they exist in the input. "
+ "This option specifies if Tidy should replace smart quotes and em dashes with "
+ "ASCII, and output spaces rather than non-breaking "
+ "spaces, where they exist in the input. "
},
{/* Important notes for translators:
- Use only
, , , , and
diff --git a/src/lexer.c b/src/lexer.c
index ef70e13..49b74f5 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -2777,6 +2777,7 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
}
+ TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT_DROPPING); /* Is. #487 */
/* else swallow characters up to and including next '>' */
while ((c = TY_(ReadChar)(doc->docIn)) != '>')
@@ -3340,7 +3341,11 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
}
}
- if (c != ']')
+ if (c == '>')
+ {
+ /* Is. #462 - reached '>' before ']' */
+ TY_(UngetChar)(c, doc->docIn);
+ } else if (c != ']')
continue;
/* now look for '>' */