Merge pull request #340 from htacg/encoding_fixes
Fixes Mac OS X encoding issues and harmonizes output across platforms.
This commit is contained in:
commit
680adfd964
|
@ -92,9 +92,6 @@ extern "C" {
|
|||
#define MAC_OS
|
||||
#define FILENAMES_CASE_SENSITIVE 0
|
||||
#define strcasecmp strcmp
|
||||
#ifndef DFLT_REPL_CHARENC
|
||||
#define DFLT_REPL_CHARENC MACROMAN
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Convenience defines for BSD like platforms */
|
||||
|
@ -362,13 +359,6 @@ extern "C" {
|
|||
#include <unistd.h> /* needed for unlink on some Unix systems */
|
||||
#endif
|
||||
|
||||
/* This can be set at compile time. Usually Windows,
|
||||
** except for Macintosh builds.
|
||||
*/
|
||||
#ifndef DFLT_REPL_CHARENC
|
||||
#define DFLT_REPL_CHARENC WIN1252
|
||||
#endif
|
||||
|
||||
/* By default, use case-sensitive filename comparison.
|
||||
*/
|
||||
#ifndef FILENAMES_CASE_SENSITIVE
|
||||
|
|
|
@ -1037,10 +1037,8 @@ static void ParseEntity( TidyDocImpl* doc, GetTokenMode mode )
|
|||
uint c1 = 0;
|
||||
int replaceMode = DISCARDED_CHAR;
|
||||
|
||||
if ( TY_(ReplacementCharEncoding) == WIN1252 )
|
||||
c1 = TY_(DecodeWin1252)( ch );
|
||||
else if ( TY_(ReplacementCharEncoding) == MACROMAN )
|
||||
c1 = TY_(DecodeMacRoman)( ch );
|
||||
/* Always assume Win1252 in this circumstance. */
|
||||
c1 = TY_(DecodeWin1252)( ch );
|
||||
|
||||
if ( c1 )
|
||||
replaceMode = REPLACED_CHAR;
|
||||
|
|
|
@ -464,10 +464,7 @@ uint TY_(ReadChar)( StreamIn *in )
|
|||
uint c1 = 0, replMode = DISCARDED_CHAR;
|
||||
Bool isVendorChar = ( in->encoding == WIN1252 ||
|
||||
in->encoding == MACROMAN );
|
||||
Bool isWinChar = ( in->encoding == WIN1252 ||
|
||||
TY_(ReplacementCharEncoding) == WIN1252 );
|
||||
Bool isMacChar = ( in->encoding == MACROMAN ||
|
||||
TY_(ReplacementCharEncoding) == MACROMAN );
|
||||
Bool isMacChar = ( in->encoding == MACROMAN );
|
||||
|
||||
/* set error position just before offending character */
|
||||
if (in->doc->lexer)
|
||||
|
@ -476,10 +473,10 @@ uint TY_(ReadChar)( StreamIn *in )
|
|||
in->doc->lexer->columns = in->curcol;
|
||||
}
|
||||
|
||||
if ( isWinChar )
|
||||
c1 = TY_(DecodeWin1252)( c );
|
||||
else if ( isMacChar )
|
||||
c1 = TY_(DecodeMacRoman)( c );
|
||||
if ( isMacChar )
|
||||
c1 = TY_(DecodeMacRoman)( c );
|
||||
else
|
||||
c1 = TY_(DecodeWin1252)( c );
|
||||
if ( c1 )
|
||||
replMode = REPLACED_CHAR;
|
||||
|
||||
|
@ -748,13 +745,6 @@ void TY_(WriteChar)( uint c, StreamOut* out )
|
|||
** Miscellaneous / Helpers
|
||||
****************************/
|
||||
|
||||
/* char encoding used when replacing illegal SGML chars,
|
||||
** regardless of specified encoding. Set at compile time
|
||||
** to either Windows or Mac.
|
||||
*/
|
||||
const int TY_(ReplacementCharEncoding) = DFLT_REPL_CHARENC;
|
||||
|
||||
|
||||
/* Mapping for Windows Western character set CP 1252
|
||||
** (chars 128-159/U+0080-U+009F) to Unicode.
|
||||
*/
|
||||
|
|
|
@ -181,12 +181,6 @@ int TY_(GetCharEncodingFromOptName)(ctmbstr charenc);
|
|||
#endif
|
||||
|
||||
|
||||
/* char encoding used when replacing illegal SGML chars,
|
||||
** regardless of specified encoding. Set at compile time
|
||||
** to either Windows or Mac.
|
||||
*/
|
||||
extern const int TY_(ReplacementCharEncoding);
|
||||
|
||||
/* Function for conversion from Windows-1252 to Unicode */
|
||||
uint TY_(DecodeWin1252)(uint c);
|
||||
|
||||
|
|
Loading…
Reference in a new issue