diff --git a/include/tidyenum.h b/include/tidyenum.h index 7b35fbb..e34d1ca 100644 --- a/include/tidyenum.h +++ b/include/tidyenum.h @@ -610,7 +610,7 @@ typedef enum TidyLiteralAttribs, /**< If true attributes may use newlines */ TidyLogicalEmphasis, /**< Replace i by em and b by strong */ TidyLowerLiterals, /**< Folds known attribute values to lower case */ - TidyMakeBare, /**< Make bare HTML: remove Microsoft cruft */ + TidyMakeBare, /**< Replace smart quotes, em dashes, etc with ASCII */ TidyMakeClean, /**< Replace presentational clutter by style rules */ TidyMark, /**< Add meta element indicating tidied doc */ TidyMergeDivs, /**< Merge multiple DIVs */ diff --git a/src/clean.c b/src/clean.c index dc6cac1..6602ff9 100644 --- a/src/clean.c +++ b/src/clean.c @@ -1893,8 +1893,7 @@ void TY_(CleanWord2000)( TidyDocImpl* doc, Node *node) if ( nodeIsHTML(node) ) { /* check that it's a Word 2000 document */ - if ( !TY_(GetAttrByName)(node, "xmlns:o") && - !cfgBool(doc, TidyMakeBare) ) + if ( !TY_(IsWord2000) (doc) ) /* Is. #896 */ return; /* Output proprietary attributes to maintain errout compatability diff --git a/src/language_en.h b/src/language_en.h index 8d0eb7a..eab5567 100644 --- a/src/language_en.h +++ b/src/language_en.h @@ -786,9 +786,9 @@ static languageDefinition language_en = { whichPluralForm_en, { - The strings "Tidy" and "HTML Tidy" are the program name and must not be translated. */ TidyMakeBare, 0, - "This option specifies if Tidy should strip Microsoft specific HTML " - "from Word 2000 documents, and output spaces rather than non-breaking " - "spaces where they exist in the input. " + "This option specifies if Tidy should replace smart quotes and em dashes with " + "ASCII, and output spaces rather than non-breaking " + "spaces, where they exist in the input. " }, {/* Important notes for translators: - Use only , , , , and diff --git a/src/lexer.c b/src/lexer.c index ef70e13..49b74f5 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -2777,6 +2777,7 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode ) } + TY_(Report)(doc, NULL, NULL, MALFORMED_COMMENT_DROPPING); /* Is. #487 */ /* else swallow characters up to and including next '>' */ while ((c = TY_(ReadChar)(doc->docIn)) != '>') @@ -3340,7 +3341,11 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode ) } } - if (c != ']') + if (c == '>') + { + /* Is. #462 - reached '>' before ']' */ + TY_(UngetChar)(c, doc->docIn); + } else if (c != ']') continue; /* now look for '>' */