Fixes Mac OS X encoding issues and harmonizes output across platforms.
Previously Tidy produced different output based on the compilation target, NOT based on the file encoding and specified options. Every platform was equal except Mac OS. Now unless the encoding is specifically set to a Mac file type, all encoding assumptions are the same across platforms.
This commit is contained in:
parent
0005841cfe
commit
26e7d9d4b0
|
@ -92,9 +92,6 @@ extern "C" {
|
|||
#define MAC_OS
|
||||
#define FILENAMES_CASE_SENSITIVE 0
|
||||
#define strcasecmp strcmp
|
||||
#ifndef DFLT_REPL_CHARENC
|
||||
#define DFLT_REPL_CHARENC MACROMAN
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Convenience defines for BSD like platforms */
|
||||
|
@ -362,13 +359,6 @@ extern "C" {
|
|||
#include <unistd.h> /* needed for unlink on some Unix systems */
|
||||
#endif
|
||||
|
||||
/* This can be set at compile time. Usually Windows,
|
||||
** except for Macintosh builds.
|
||||
*/
|
||||
#ifndef DFLT_REPL_CHARENC
|
||||
#define DFLT_REPL_CHARENC WIN1252
|
||||
#endif
|
||||
|
||||
/* By default, use case-sensitive filename comparison.
|
||||
*/
|
||||
#ifndef FILENAMES_CASE_SENSITIVE
|
||||
|
|
|
@ -1037,10 +1037,8 @@ static void ParseEntity( TidyDocImpl* doc, GetTokenMode mode )
|
|||
uint c1 = 0;
|
||||
int replaceMode = DISCARDED_CHAR;
|
||||
|
||||
if ( TY_(ReplacementCharEncoding) == WIN1252 )
|
||||
c1 = TY_(DecodeWin1252)( ch );
|
||||
else if ( TY_(ReplacementCharEncoding) == MACROMAN )
|
||||
c1 = TY_(DecodeMacRoman)( ch );
|
||||
/* Always assume Win1252 in this circumstance. */
|
||||
c1 = TY_(DecodeWin1252)( ch );
|
||||
|
||||
if ( c1 )
|
||||
replaceMode = REPLACED_CHAR;
|
||||
|
|
|
@ -464,10 +464,7 @@ uint TY_(ReadChar)( StreamIn *in )
|
|||
uint c1 = 0, replMode = DISCARDED_CHAR;
|
||||
Bool isVendorChar = ( in->encoding == WIN1252 ||
|
||||
in->encoding == MACROMAN );
|
||||
Bool isWinChar = ( in->encoding == WIN1252 ||
|
||||
TY_(ReplacementCharEncoding) == WIN1252 );
|
||||
Bool isMacChar = ( in->encoding == MACROMAN ||
|
||||
TY_(ReplacementCharEncoding) == MACROMAN );
|
||||
Bool isMacChar = ( in->encoding == MACROMAN );
|
||||
|
||||
/* set error position just before offending character */
|
||||
if (in->doc->lexer)
|
||||
|
@ -476,10 +473,10 @@ uint TY_(ReadChar)( StreamIn *in )
|
|||
in->doc->lexer->columns = in->curcol;
|
||||
}
|
||||
|
||||
if ( isWinChar )
|
||||
c1 = TY_(DecodeWin1252)( c );
|
||||
else if ( isMacChar )
|
||||
c1 = TY_(DecodeMacRoman)( c );
|
||||
if ( isMacChar )
|
||||
c1 = TY_(DecodeMacRoman)( c );
|
||||
else
|
||||
c1 = TY_(DecodeWin1252)( c );
|
||||
if ( c1 )
|
||||
replMode = REPLACED_CHAR;
|
||||
|
||||
|
@ -748,14 +745,7 @@ void TY_(WriteChar)( uint c, StreamOut* out )
|
|||
** Miscellaneous / Helpers
|
||||
****************************/
|
||||
|
||||
/* char encoding used when replacing illegal SGML chars,
|
||||
** regardless of specified encoding. Set at compile time
|
||||
** to either Windows or Mac.
|
||||
*/
|
||||
const int TY_(ReplacementCharEncoding) = DFLT_REPL_CHARENC;
|
||||
|
||||
|
||||
/* Mapping for Windows Western character set CP 1252
|
||||
/* Mapping for Windows Western character set CP 1252
|
||||
** (chars 128-159/U+0080-U+009F) to Unicode.
|
||||
*/
|
||||
static const uint Win2Unicode[32] =
|
||||
|
|
|
@ -181,12 +181,6 @@ int TY_(GetCharEncodingFromOptName)(ctmbstr charenc);
|
|||
#endif
|
||||
|
||||
|
||||
/* char encoding used when replacing illegal SGML chars,
|
||||
** regardless of specified encoding. Set at compile time
|
||||
** to either Windows or Mac.
|
||||
*/
|
||||
extern const int TY_(ReplacementCharEncoding);
|
||||
|
||||
/* Function for conversion from Windows-1252 to Unicode */
|
||||
uint TY_(DecodeWin1252)(uint c);
|
||||
|
||||
|
|
Loading…
Reference in a new issue