Merge pull request #340 from htacg/encoding_fixes

Fixes Mac OS X encoding issues and harmonizes output across platforms.
This commit is contained in:
Jim Derry 2016-01-07 11:51:46 +08:00
commit 680adfd964
4 changed files with 8 additions and 36 deletions

View file

@ -92,9 +92,6 @@ extern "C" {
#define MAC_OS #define MAC_OS
#define FILENAMES_CASE_SENSITIVE 0 #define FILENAMES_CASE_SENSITIVE 0
#define strcasecmp strcmp #define strcasecmp strcmp
#ifndef DFLT_REPL_CHARENC
#define DFLT_REPL_CHARENC MACROMAN
#endif
#endif #endif
/* Convenience defines for BSD like platforms */ /* Convenience defines for BSD like platforms */
@ -362,13 +359,6 @@ extern "C" {
#include <unistd.h> /* needed for unlink on some Unix systems */ #include <unistd.h> /* needed for unlink on some Unix systems */
#endif #endif
/* This can be set at compile time. Usually Windows,
** except for Macintosh builds.
*/
#ifndef DFLT_REPL_CHARENC
#define DFLT_REPL_CHARENC WIN1252
#endif
/* By default, use case-sensitive filename comparison. /* By default, use case-sensitive filename comparison.
*/ */
#ifndef FILENAMES_CASE_SENSITIVE #ifndef FILENAMES_CASE_SENSITIVE

View file

@ -1037,10 +1037,8 @@ static void ParseEntity( TidyDocImpl* doc, GetTokenMode mode )
uint c1 = 0; uint c1 = 0;
int replaceMode = DISCARDED_CHAR; int replaceMode = DISCARDED_CHAR;
if ( TY_(ReplacementCharEncoding) == WIN1252 ) /* Always assume Win1252 in this circumstance. */
c1 = TY_(DecodeWin1252)( ch ); c1 = TY_(DecodeWin1252)( ch );
else if ( TY_(ReplacementCharEncoding) == MACROMAN )
c1 = TY_(DecodeMacRoman)( ch );
if ( c1 ) if ( c1 )
replaceMode = REPLACED_CHAR; replaceMode = REPLACED_CHAR;

View file

@ -464,10 +464,7 @@ uint TY_(ReadChar)( StreamIn *in )
uint c1 = 0, replMode = DISCARDED_CHAR; uint c1 = 0, replMode = DISCARDED_CHAR;
Bool isVendorChar = ( in->encoding == WIN1252 || Bool isVendorChar = ( in->encoding == WIN1252 ||
in->encoding == MACROMAN ); in->encoding == MACROMAN );
Bool isWinChar = ( in->encoding == WIN1252 || Bool isMacChar = ( in->encoding == MACROMAN );
TY_(ReplacementCharEncoding) == WIN1252 );
Bool isMacChar = ( in->encoding == MACROMAN ||
TY_(ReplacementCharEncoding) == MACROMAN );
/* set error position just before offending character */ /* set error position just before offending character */
if (in->doc->lexer) if (in->doc->lexer)
@ -476,10 +473,10 @@ uint TY_(ReadChar)( StreamIn *in )
in->doc->lexer->columns = in->curcol; in->doc->lexer->columns = in->curcol;
} }
if ( isWinChar ) if ( isMacChar )
c1 = TY_(DecodeWin1252)( c ); c1 = TY_(DecodeMacRoman)( c );
else if ( isMacChar ) else
c1 = TY_(DecodeMacRoman)( c ); c1 = TY_(DecodeWin1252)( c );
if ( c1 ) if ( c1 )
replMode = REPLACED_CHAR; replMode = REPLACED_CHAR;
@ -748,14 +745,7 @@ void TY_(WriteChar)( uint c, StreamOut* out )
** Miscellaneous / Helpers ** Miscellaneous / Helpers
****************************/ ****************************/
/* char encoding used when replacing illegal SGML chars, /* Mapping for Windows Western character set CP 1252
** regardless of specified encoding. Set at compile time
** to either Windows or Mac.
*/
const int TY_(ReplacementCharEncoding) = DFLT_REPL_CHARENC;
/* Mapping for Windows Western character set CP 1252
** (chars 128-159/U+0080-U+009F) to Unicode. ** (chars 128-159/U+0080-U+009F) to Unicode.
*/ */
static const uint Win2Unicode[32] = static const uint Win2Unicode[32] =

View file

@ -181,12 +181,6 @@ int TY_(GetCharEncodingFromOptName)(ctmbstr charenc);
#endif #endif
/* char encoding used when replacing illegal SGML chars,
** regardless of specified encoding. Set at compile time
** to either Windows or Mac.
*/
extern const int TY_(ReplacementCharEncoding);
/* Function for conversion from Windows-1252 to Unicode */ /* Function for conversion from Windows-1252 to Unicode */
uint TY_(DecodeWin1252)(uint c); uint TY_(DecodeWin1252)(uint c);