From f49c4199083b1aca2089b3298c71800081b3c579 Mon Sep 17 00:00:00 2001 From: Jim Derry Date: Mon, 4 Sep 2017 15:50:45 -0400 Subject: [PATCH] Implement formatter for encoding reports. --- src/message.c | 113 +++++++++++++++++++++++++++---------------------- src/message.h | 2 +- src/streamio.c | 3 -- 3 files changed, 64 insertions(+), 54 deletions(-) diff --git a/src/message.c b/src/message.c index 36fcf5c..63e819f 100755 --- a/src/message.c +++ b/src/message.c @@ -236,6 +236,7 @@ typedef TidyMessageImpl*(messageFormatter)(TidyDocImpl* doc, Node *element, Node /* Forward declarations of messageFormatter functions. */ static messageFormatter formatAttributeReport; +static messageFormatter formatEncodingReport; static messageFormatter formatStandard; static messageFormatter formatStandardDynamic; @@ -290,10 +291,10 @@ static struct _dispatchTable { { INSERTING_AUTO_ATTRIBUTE, TidyWarning, formatAttributeReport }, { INSERTING_TAG, TidyWarning, formatStandard }, { INVALID_ATTRIBUTE, TidyWarning, formatAttributeReport }, - { INVALID_NCR, TidyWarning, NULL }, - { INVALID_SGML_CHARS, TidyWarning, NULL }, - { INVALID_UTF8, TidyWarning, NULL }, - { INVALID_UTF16, TidyWarning, NULL }, + { INVALID_NCR, TidyWarning, formatEncodingReport }, + { INVALID_SGML_CHARS, TidyWarning, formatEncodingReport }, + { INVALID_UTF8, TidyWarning, formatEncodingReport }, + { INVALID_UTF16, TidyWarning, formatEncodingReport }, { INVALID_XML_ID, TidyWarning, formatAttributeReport }, { JOINING_ATTRIBUTE, TidyWarning, formatAttributeReport }, { MALFORMED_COMMENT, TidyWarning, formatStandard }, @@ -346,7 +347,7 @@ static struct _dispatchTable { { UNKNOWN_ELEMENT, TidyError, formatStandard }, { UNKNOWN_ENTITY, TidyWarning, formatStandard }, { USING_BR_INPLACE_OF, TidyWarning, formatStandard }, - { VENDOR_SPECIFIC_CHARS, TidyWarning, NULL }, + { VENDOR_SPECIFIC_CHARS, TidyWarning, formatEncodingReport }, { WHITE_IN_URI, TidyWarning, formatAttributeReport }, { XML_DECLARATION_DETECTED, TidyWarning, formatStandard }, { XML_ID_SYNTAX, TidyWarning, formatAttributeReport }, @@ -441,6 +442,54 @@ TidyMessageImpl *formatAttributeReport(TidyDocImpl* doc, Node *element, Node *no } +/* Provides report formatting *and* additional status settings for Tidy's +** encoding reports. +** @todo: These status changes probably SHOULD be made in the calling code; +** however these states are captured to generate future output, which may be +** useful here in the long run. +*/ +TidyMessageImpl *formatEncodingReport(TidyDocImpl* doc, Node *element, Node *node, uint code, uint level, va_list args) +{ + char buf[ 32 ] = {'\0'}; + uint c = va_arg( args, uint ); + Bool discarded = va_arg( args, Bool ); + ctmbstr action = tidyLocalizedString(discarded ? STRING_DISCARDING : STRING_REPLACING); + + switch (code) + { + case INVALID_NCR: + NtoS(c, buf); + doc->badChars |= BC_INVALID_NCR; + break; + + case INVALID_SGML_CHARS: + NtoS(c, buf); + doc->badChars |= BC_INVALID_SGML_CHARS; + break; + + case INVALID_UTF8: + TY_(tmbsnprintf)(buf, sizeof(buf), "U+%04X", c); + doc->badChars |= BC_INVALID_UTF8; + break; + +#if SUPPORT_UTF16_ENCODINGS + case INVALID_UTF16: + TY_(tmbsnprintf)(buf, sizeof(buf), "U+%04X", c); + doc->badChars |= BC_INVALID_UTF16; + break; +#endif + + case VENDOR_SPECIFIC_CHARS: + NtoS(c, buf); + doc->badChars |= BC_VENDOR_SPECIFIC_CHARS; + break; + } + + return TY_(tidyMessageCreateWithLexer)(doc, code, level, action, buf ); + +} + + /* Provides general formatting for the majority of Tidy's reports. Because most ** reports use the same basic data derived from the element and node, this ** formatter covers the vast majority of Tidy's report messages. Note that this @@ -700,14 +749,14 @@ void TY_(Report)(TidyDocImpl* doc, Node *element, Node *node, uint code, ...) void TY_(ReportAttrError)(TidyDocImpl* doc, Node *node, AttVal *av, uint code) { - TY_(Report)(doc, NULL, node, code, av); + TY_(Report)( doc, NULL, node, code, av ); } void TY_(ReportBadArgument)( TidyDocImpl* doc, ctmbstr option ) { assert( option != NULL ); - TY_(Report)(doc, NULL, NULL, STRING_MISSING_MALFORMED, option); + TY_(Report)( doc, NULL, NULL, STRING_MISSING_MALFORMED, option ); } @@ -720,7 +769,13 @@ void TY_(ReportEntityError)( TidyDocImpl* doc, uint code, ctmbstr entity, int AR void TY_(ReportFileError)( TidyDocImpl* doc, ctmbstr file, uint code ) { - TY_(Report)(doc, NULL, NULL, code, file); + TY_(Report)( doc, NULL, NULL, code, file ); +} + + +void TY_(ReportEncodingError)(TidyDocImpl* doc, uint code, uint c, Bool discarded) +{ + TY_(Report)( doc, NULL, NULL, code, c, discarded ); } @@ -733,48 +788,6 @@ void TY_(ReportFileError)( TidyDocImpl* doc, ctmbstr file, uint code ) *********************************************************************/ -void TY_(ReportEncodingError)(TidyDocImpl* doc, uint code, uint c, Bool discarded) -{ - TidyMessageImpl *message = NULL; - char buf[ 32 ] = {'\0'}; - ctmbstr action = tidyLocalizedString(discarded ? STRING_DISCARDING : STRING_REPLACING); - - /* An encoding mismatch is currently treated as a non-fatal error */ - switch (code) - { - case INVALID_NCR: - NtoS(c, buf); - doc->badChars |= BC_INVALID_NCR; - break; - - case INVALID_SGML_CHARS: - NtoS(c, buf); - doc->badChars |= BC_INVALID_SGML_CHARS; - break; - - case INVALID_UTF8: - TY_(tmbsnprintf)(buf, sizeof(buf), "U+%04X", c); - doc->badChars |= BC_INVALID_UTF8; - break; - -#if SUPPORT_UTF16_ENCODINGS - case INVALID_UTF16: - TY_(tmbsnprintf)(buf, sizeof(buf), "U+%04X", c); - doc->badChars |= BC_INVALID_UTF16; - break; -#endif - - case VENDOR_SPECIFIC_CHARS: - NtoS(c, buf); - doc->badChars |= BC_VENDOR_SPECIFIC_CHARS; - break; - } - - message = TY_(tidyMessageCreateWithLexer)(doc, code, TidyWarning, action, buf ); - messageOut( message ); -} - - void TY_(ReportEncodingWarning)(TidyDocImpl* doc, uint code, uint encoding) { TidyMessageImpl *message = NULL; diff --git a/src/message.h b/src/message.h index c2b6184..69902b3 100644 --- a/src/message.h +++ b/src/message.h @@ -59,6 +59,7 @@ void TY_(ReportAttrError)(TidyDocImpl* doc, Node *node, AttVal *av, uint code); void TY_(ReportBadArgument)( TidyDocImpl* doc, ctmbstr option ); void TY_(ReportEntityError)( TidyDocImpl* doc, uint code, ctmbstr entity, int c ); void TY_(ReportFileError)( TidyDocImpl* doc, ctmbstr file, uint code ); +void TY_(ReportEncodingError)(TidyDocImpl* doc, uint code, uint c, Bool discarded); /** @} */ @@ -66,7 +67,6 @@ void TY_(ReportFileError)( TidyDocImpl* doc, ctmbstr file, uint code ); /** @{ */ -void TY_(ReportEncodingError)(TidyDocImpl* doc, uint code, uint c, Bool discarded); void TY_(ReportEncodingWarning)(TidyDocImpl* doc, uint code, uint encoding); void TY_(ReportMarkupVersion)( TidyDocImpl* doc ); void TY_(ReportMissingAttr)( TidyDocImpl* doc, Node* node, ctmbstr name ); diff --git a/src/streamio.c b/src/streamio.c index 866d9d6..2fd3ae5 100644 --- a/src/streamio.c +++ b/src/streamio.c @@ -620,7 +620,6 @@ void TY_(WriteChar)( uint c, StreamOut* out ) TY_(EncodeCharToUTF8Bytes)( c, NULL, &out->sink, &count ); if (count <= 0) { - /* TY_(ReportEncodingError)(in->lexer, INVALID_UTF8 | REPLACED_CHAR, c); */ /* replacement char 0xFFFD encoded as UTF-8 */ PutByte(0xEF, out); PutByte(0xBF, out); PutByte(0xBF, out); } @@ -682,7 +681,6 @@ void TY_(WriteChar)( uint c, StreamOut* out ) if ( !TY_(IsValidUTF16FromUCS4)(c) ) { /* invalid UTF-16 value */ - /* TY_(ReportEncodingError)(in->lexer, INVALID_UTF16 | DISCARDED_CHAR, c); */ c = 0; numChars = 0; } @@ -692,7 +690,6 @@ void TY_(WriteChar)( uint c, StreamOut* out ) numChars = 2; if ( !TY_(SplitSurrogatePair)(c, &theChars[0], &theChars[1]) ) { - /* TY_(ReportEncodingError)(in->lexer, INVALID_UTF16 | DISCARDED_CHAR, c); */ c = 0; numChars = 0; }