Fixes Mac OS X encoding issues and harmonizes output across platforms.

Previously Tidy produced different output based on the compilation target, NOT based on
the file encoding and specified options. Every platform was equal except Mac OS. Now unless
the encoding is specifically set to a Mac file type, all encoding assumptions are the same
across platforms.
This commit is contained in:
Jim Derry 2015-12-31 13:57:34 +08:00
parent 0005841cfe
commit 26e7d9d4b0
4 changed files with 8 additions and 36 deletions

View file

@ -92,9 +92,6 @@ extern "C" {
#define MAC_OS #define MAC_OS
#define FILENAMES_CASE_SENSITIVE 0 #define FILENAMES_CASE_SENSITIVE 0
#define strcasecmp strcmp #define strcasecmp strcmp
#ifndef DFLT_REPL_CHARENC
#define DFLT_REPL_CHARENC MACROMAN
#endif
#endif #endif
/* Convenience defines for BSD like platforms */ /* Convenience defines for BSD like platforms */
@ -362,13 +359,6 @@ extern "C" {
#include <unistd.h> /* needed for unlink on some Unix systems */ #include <unistd.h> /* needed for unlink on some Unix systems */
#endif #endif
/* This can be set at compile time. Usually Windows,
** except for Macintosh builds.
*/
#ifndef DFLT_REPL_CHARENC
#define DFLT_REPL_CHARENC WIN1252
#endif
/* By default, use case-sensitive filename comparison. /* By default, use case-sensitive filename comparison.
*/ */
#ifndef FILENAMES_CASE_SENSITIVE #ifndef FILENAMES_CASE_SENSITIVE

View file

@ -1037,10 +1037,8 @@ static void ParseEntity( TidyDocImpl* doc, GetTokenMode mode )
uint c1 = 0; uint c1 = 0;
int replaceMode = DISCARDED_CHAR; int replaceMode = DISCARDED_CHAR;
if ( TY_(ReplacementCharEncoding) == WIN1252 ) /* Always assume Win1252 in this circumstance. */
c1 = TY_(DecodeWin1252)( ch ); c1 = TY_(DecodeWin1252)( ch );
else if ( TY_(ReplacementCharEncoding) == MACROMAN )
c1 = TY_(DecodeMacRoman)( ch );
if ( c1 ) if ( c1 )
replaceMode = REPLACED_CHAR; replaceMode = REPLACED_CHAR;

View file

@ -464,10 +464,7 @@ uint TY_(ReadChar)( StreamIn *in )
uint c1 = 0, replMode = DISCARDED_CHAR; uint c1 = 0, replMode = DISCARDED_CHAR;
Bool isVendorChar = ( in->encoding == WIN1252 || Bool isVendorChar = ( in->encoding == WIN1252 ||
in->encoding == MACROMAN ); in->encoding == MACROMAN );
Bool isWinChar = ( in->encoding == WIN1252 || Bool isMacChar = ( in->encoding == MACROMAN );
TY_(ReplacementCharEncoding) == WIN1252 );
Bool isMacChar = ( in->encoding == MACROMAN ||
TY_(ReplacementCharEncoding) == MACROMAN );
/* set error position just before offending character */ /* set error position just before offending character */
if (in->doc->lexer) if (in->doc->lexer)
@ -476,10 +473,10 @@ uint TY_(ReadChar)( StreamIn *in )
in->doc->lexer->columns = in->curcol; in->doc->lexer->columns = in->curcol;
} }
if ( isWinChar ) if ( isMacChar )
c1 = TY_(DecodeWin1252)( c ); c1 = TY_(DecodeMacRoman)( c );
else if ( isMacChar ) else
c1 = TY_(DecodeMacRoman)( c ); c1 = TY_(DecodeWin1252)( c );
if ( c1 ) if ( c1 )
replMode = REPLACED_CHAR; replMode = REPLACED_CHAR;
@ -748,13 +745,6 @@ void TY_(WriteChar)( uint c, StreamOut* out )
** Miscellaneous / Helpers ** Miscellaneous / Helpers
****************************/ ****************************/
/* char encoding used when replacing illegal SGML chars,
** regardless of specified encoding. Set at compile time
** to either Windows or Mac.
*/
const int TY_(ReplacementCharEncoding) = DFLT_REPL_CHARENC;
/* Mapping for Windows Western character set CP 1252 /* Mapping for Windows Western character set CP 1252
** (chars 128-159/U+0080-U+009F) to Unicode. ** (chars 128-159/U+0080-U+009F) to Unicode.
*/ */

View file

@ -181,12 +181,6 @@ int TY_(GetCharEncodingFromOptName)(ctmbstr charenc);
#endif #endif
/* char encoding used when replacing illegal SGML chars,
** regardless of specified encoding. Set at compile time
** to either Windows or Mac.
*/
extern const int TY_(ReplacementCharEncoding);
/* Function for conversion from Windows-1252 to Unicode */ /* Function for conversion from Windows-1252 to Unicode */
uint TY_(DecodeWin1252)(uint c); uint TY_(DecodeWin1252)(uint c);