Merge pull request #340 from htacg/encoding_fixes
Fixes Mac OS X encoding issues and harmonizes output across platforms.
This commit is contained in:
commit
680adfd964
|
@ -92,9 +92,6 @@ extern "C" {
|
||||||
#define MAC_OS
|
#define MAC_OS
|
||||||
#define FILENAMES_CASE_SENSITIVE 0
|
#define FILENAMES_CASE_SENSITIVE 0
|
||||||
#define strcasecmp strcmp
|
#define strcasecmp strcmp
|
||||||
#ifndef DFLT_REPL_CHARENC
|
|
||||||
#define DFLT_REPL_CHARENC MACROMAN
|
|
||||||
#endif
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Convenience defines for BSD like platforms */
|
/* Convenience defines for BSD like platforms */
|
||||||
|
@ -362,13 +359,6 @@ extern "C" {
|
||||||
#include <unistd.h> /* needed for unlink on some Unix systems */
|
#include <unistd.h> /* needed for unlink on some Unix systems */
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* This can be set at compile time. Usually Windows,
|
|
||||||
** except for Macintosh builds.
|
|
||||||
*/
|
|
||||||
#ifndef DFLT_REPL_CHARENC
|
|
||||||
#define DFLT_REPL_CHARENC WIN1252
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* By default, use case-sensitive filename comparison.
|
/* By default, use case-sensitive filename comparison.
|
||||||
*/
|
*/
|
||||||
#ifndef FILENAMES_CASE_SENSITIVE
|
#ifndef FILENAMES_CASE_SENSITIVE
|
||||||
|
|
|
@ -1037,10 +1037,8 @@ static void ParseEntity( TidyDocImpl* doc, GetTokenMode mode )
|
||||||
uint c1 = 0;
|
uint c1 = 0;
|
||||||
int replaceMode = DISCARDED_CHAR;
|
int replaceMode = DISCARDED_CHAR;
|
||||||
|
|
||||||
if ( TY_(ReplacementCharEncoding) == WIN1252 )
|
/* Always assume Win1252 in this circumstance. */
|
||||||
c1 = TY_(DecodeWin1252)( ch );
|
c1 = TY_(DecodeWin1252)( ch );
|
||||||
else if ( TY_(ReplacementCharEncoding) == MACROMAN )
|
|
||||||
c1 = TY_(DecodeMacRoman)( ch );
|
|
||||||
|
|
||||||
if ( c1 )
|
if ( c1 )
|
||||||
replaceMode = REPLACED_CHAR;
|
replaceMode = REPLACED_CHAR;
|
||||||
|
|
|
@ -464,10 +464,7 @@ uint TY_(ReadChar)( StreamIn *in )
|
||||||
uint c1 = 0, replMode = DISCARDED_CHAR;
|
uint c1 = 0, replMode = DISCARDED_CHAR;
|
||||||
Bool isVendorChar = ( in->encoding == WIN1252 ||
|
Bool isVendorChar = ( in->encoding == WIN1252 ||
|
||||||
in->encoding == MACROMAN );
|
in->encoding == MACROMAN );
|
||||||
Bool isWinChar = ( in->encoding == WIN1252 ||
|
Bool isMacChar = ( in->encoding == MACROMAN );
|
||||||
TY_(ReplacementCharEncoding) == WIN1252 );
|
|
||||||
Bool isMacChar = ( in->encoding == MACROMAN ||
|
|
||||||
TY_(ReplacementCharEncoding) == MACROMAN );
|
|
||||||
|
|
||||||
/* set error position just before offending character */
|
/* set error position just before offending character */
|
||||||
if (in->doc->lexer)
|
if (in->doc->lexer)
|
||||||
|
@ -476,10 +473,10 @@ uint TY_(ReadChar)( StreamIn *in )
|
||||||
in->doc->lexer->columns = in->curcol;
|
in->doc->lexer->columns = in->curcol;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( isWinChar )
|
if ( isMacChar )
|
||||||
c1 = TY_(DecodeWin1252)( c );
|
c1 = TY_(DecodeMacRoman)( c );
|
||||||
else if ( isMacChar )
|
else
|
||||||
c1 = TY_(DecodeMacRoman)( c );
|
c1 = TY_(DecodeWin1252)( c );
|
||||||
if ( c1 )
|
if ( c1 )
|
||||||
replMode = REPLACED_CHAR;
|
replMode = REPLACED_CHAR;
|
||||||
|
|
||||||
|
@ -748,14 +745,7 @@ void TY_(WriteChar)( uint c, StreamOut* out )
|
||||||
** Miscellaneous / Helpers
|
** Miscellaneous / Helpers
|
||||||
****************************/
|
****************************/
|
||||||
|
|
||||||
/* char encoding used when replacing illegal SGML chars,
|
/* Mapping for Windows Western character set CP 1252
|
||||||
** regardless of specified encoding. Set at compile time
|
|
||||||
** to either Windows or Mac.
|
|
||||||
*/
|
|
||||||
const int TY_(ReplacementCharEncoding) = DFLT_REPL_CHARENC;
|
|
||||||
|
|
||||||
|
|
||||||
/* Mapping for Windows Western character set CP 1252
|
|
||||||
** (chars 128-159/U+0080-U+009F) to Unicode.
|
** (chars 128-159/U+0080-U+009F) to Unicode.
|
||||||
*/
|
*/
|
||||||
static const uint Win2Unicode[32] =
|
static const uint Win2Unicode[32] =
|
||||||
|
|
|
@ -181,12 +181,6 @@ int TY_(GetCharEncodingFromOptName)(ctmbstr charenc);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
/* char encoding used when replacing illegal SGML chars,
|
|
||||||
** regardless of specified encoding. Set at compile time
|
|
||||||
** to either Windows or Mac.
|
|
||||||
*/
|
|
||||||
extern const int TY_(ReplacementCharEncoding);
|
|
||||||
|
|
||||||
/* Function for conversion from Windows-1252 to Unicode */
|
/* Function for conversion from Windows-1252 to Unicode */
|
||||||
uint TY_(DecodeWin1252)(uint c);
|
uint TY_(DecodeWin1252)(uint c);
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue