Implement formatter for encoding reports.

This commit is contained in:
Jim Derry 2017-09-04 15:50:45 -04:00
parent 8cb4198724
commit f49c419908
3 changed files with 64 additions and 54 deletions

View file

@ -236,6 +236,7 @@ typedef TidyMessageImpl*(messageFormatter)(TidyDocImpl* doc, Node *element, Node
/* Forward declarations of messageFormatter functions. */ /* Forward declarations of messageFormatter functions. */
static messageFormatter formatAttributeReport; static messageFormatter formatAttributeReport;
static messageFormatter formatEncodingReport;
static messageFormatter formatStandard; static messageFormatter formatStandard;
static messageFormatter formatStandardDynamic; static messageFormatter formatStandardDynamic;
@ -290,10 +291,10 @@ static struct _dispatchTable {
{ INSERTING_AUTO_ATTRIBUTE, TidyWarning, formatAttributeReport }, { INSERTING_AUTO_ATTRIBUTE, TidyWarning, formatAttributeReport },
{ INSERTING_TAG, TidyWarning, formatStandard }, { INSERTING_TAG, TidyWarning, formatStandard },
{ INVALID_ATTRIBUTE, TidyWarning, formatAttributeReport }, { INVALID_ATTRIBUTE, TidyWarning, formatAttributeReport },
{ INVALID_NCR, TidyWarning, NULL }, { INVALID_NCR, TidyWarning, formatEncodingReport },
{ INVALID_SGML_CHARS, TidyWarning, NULL }, { INVALID_SGML_CHARS, TidyWarning, formatEncodingReport },
{ INVALID_UTF8, TidyWarning, NULL }, { INVALID_UTF8, TidyWarning, formatEncodingReport },
{ INVALID_UTF16, TidyWarning, NULL }, { INVALID_UTF16, TidyWarning, formatEncodingReport },
{ INVALID_XML_ID, TidyWarning, formatAttributeReport }, { INVALID_XML_ID, TidyWarning, formatAttributeReport },
{ JOINING_ATTRIBUTE, TidyWarning, formatAttributeReport }, { JOINING_ATTRIBUTE, TidyWarning, formatAttributeReport },
{ MALFORMED_COMMENT, TidyWarning, formatStandard }, { MALFORMED_COMMENT, TidyWarning, formatStandard },
@ -346,7 +347,7 @@ static struct _dispatchTable {
{ UNKNOWN_ELEMENT, TidyError, formatStandard }, { UNKNOWN_ELEMENT, TidyError, formatStandard },
{ UNKNOWN_ENTITY, TidyWarning, formatStandard }, { UNKNOWN_ENTITY, TidyWarning, formatStandard },
{ USING_BR_INPLACE_OF, TidyWarning, formatStandard }, { USING_BR_INPLACE_OF, TidyWarning, formatStandard },
{ VENDOR_SPECIFIC_CHARS, TidyWarning, NULL }, { VENDOR_SPECIFIC_CHARS, TidyWarning, formatEncodingReport },
{ WHITE_IN_URI, TidyWarning, formatAttributeReport }, { WHITE_IN_URI, TidyWarning, formatAttributeReport },
{ XML_DECLARATION_DETECTED, TidyWarning, formatStandard }, { XML_DECLARATION_DETECTED, TidyWarning, formatStandard },
{ XML_ID_SYNTAX, TidyWarning, formatAttributeReport }, { XML_ID_SYNTAX, TidyWarning, formatAttributeReport },
@ -441,6 +442,54 @@ TidyMessageImpl *formatAttributeReport(TidyDocImpl* doc, Node *element, Node *no
} }
/* Provides report formatting *and* additional status settings for Tidy's
** encoding reports.
** @todo: These status changes probably SHOULD be made in the calling code;
** however these states are captured to generate future output, which may be
** useful here in the long run.
*/
TidyMessageImpl *formatEncodingReport(TidyDocImpl* doc, Node *element, Node *node, uint code, uint level, va_list args)
{
char buf[ 32 ] = {'\0'};
uint c = va_arg( args, uint );
Bool discarded = va_arg( args, Bool );
ctmbstr action = tidyLocalizedString(discarded ? STRING_DISCARDING : STRING_REPLACING);
switch (code)
{
case INVALID_NCR:
NtoS(c, buf);
doc->badChars |= BC_INVALID_NCR;
break;
case INVALID_SGML_CHARS:
NtoS(c, buf);
doc->badChars |= BC_INVALID_SGML_CHARS;
break;
case INVALID_UTF8:
TY_(tmbsnprintf)(buf, sizeof(buf), "U+%04X", c);
doc->badChars |= BC_INVALID_UTF8;
break;
#if SUPPORT_UTF16_ENCODINGS
case INVALID_UTF16:
TY_(tmbsnprintf)(buf, sizeof(buf), "U+%04X", c);
doc->badChars |= BC_INVALID_UTF16;
break;
#endif
case VENDOR_SPECIFIC_CHARS:
NtoS(c, buf);
doc->badChars |= BC_VENDOR_SPECIFIC_CHARS;
break;
}
return TY_(tidyMessageCreateWithLexer)(doc, code, level, action, buf );
}
/* Provides general formatting for the majority of Tidy's reports. Because most /* Provides general formatting for the majority of Tidy's reports. Because most
** reports use the same basic data derived from the element and node, this ** reports use the same basic data derived from the element and node, this
** formatter covers the vast majority of Tidy's report messages. Note that this ** formatter covers the vast majority of Tidy's report messages. Note that this
@ -700,14 +749,14 @@ void TY_(Report)(TidyDocImpl* doc, Node *element, Node *node, uint code, ...)
void TY_(ReportAttrError)(TidyDocImpl* doc, Node *node, AttVal *av, uint code) void TY_(ReportAttrError)(TidyDocImpl* doc, Node *node, AttVal *av, uint code)
{ {
TY_(Report)(doc, NULL, node, code, av); TY_(Report)( doc, NULL, node, code, av );
} }
void TY_(ReportBadArgument)( TidyDocImpl* doc, ctmbstr option ) void TY_(ReportBadArgument)( TidyDocImpl* doc, ctmbstr option )
{ {
assert( option != NULL ); assert( option != NULL );
TY_(Report)(doc, NULL, NULL, STRING_MISSING_MALFORMED, option); TY_(Report)( doc, NULL, NULL, STRING_MISSING_MALFORMED, option );
} }
@ -720,7 +769,13 @@ void TY_(ReportEntityError)( TidyDocImpl* doc, uint code, ctmbstr entity, int AR
void TY_(ReportFileError)( TidyDocImpl* doc, ctmbstr file, uint code ) void TY_(ReportFileError)( TidyDocImpl* doc, ctmbstr file, uint code )
{ {
TY_(Report)(doc, NULL, NULL, code, file); TY_(Report)( doc, NULL, NULL, code, file );
}
void TY_(ReportEncodingError)(TidyDocImpl* doc, uint code, uint c, Bool discarded)
{
TY_(Report)( doc, NULL, NULL, code, c, discarded );
} }
@ -733,48 +788,6 @@ void TY_(ReportFileError)( TidyDocImpl* doc, ctmbstr file, uint code )
*********************************************************************/ *********************************************************************/
void TY_(ReportEncodingError)(TidyDocImpl* doc, uint code, uint c, Bool discarded)
{
TidyMessageImpl *message = NULL;
char buf[ 32 ] = {'\0'};
ctmbstr action = tidyLocalizedString(discarded ? STRING_DISCARDING : STRING_REPLACING);
/* An encoding mismatch is currently treated as a non-fatal error */
switch (code)
{
case INVALID_NCR:
NtoS(c, buf);
doc->badChars |= BC_INVALID_NCR;
break;
case INVALID_SGML_CHARS:
NtoS(c, buf);
doc->badChars |= BC_INVALID_SGML_CHARS;
break;
case INVALID_UTF8:
TY_(tmbsnprintf)(buf, sizeof(buf), "U+%04X", c);
doc->badChars |= BC_INVALID_UTF8;
break;
#if SUPPORT_UTF16_ENCODINGS
case INVALID_UTF16:
TY_(tmbsnprintf)(buf, sizeof(buf), "U+%04X", c);
doc->badChars |= BC_INVALID_UTF16;
break;
#endif
case VENDOR_SPECIFIC_CHARS:
NtoS(c, buf);
doc->badChars |= BC_VENDOR_SPECIFIC_CHARS;
break;
}
message = TY_(tidyMessageCreateWithLexer)(doc, code, TidyWarning, action, buf );
messageOut( message );
}
void TY_(ReportEncodingWarning)(TidyDocImpl* doc, uint code, uint encoding) void TY_(ReportEncodingWarning)(TidyDocImpl* doc, uint code, uint encoding)
{ {
TidyMessageImpl *message = NULL; TidyMessageImpl *message = NULL;

View file

@ -59,6 +59,7 @@ void TY_(ReportAttrError)(TidyDocImpl* doc, Node *node, AttVal *av, uint code);
void TY_(ReportBadArgument)( TidyDocImpl* doc, ctmbstr option ); void TY_(ReportBadArgument)( TidyDocImpl* doc, ctmbstr option );
void TY_(ReportEntityError)( TidyDocImpl* doc, uint code, ctmbstr entity, int c ); void TY_(ReportEntityError)( TidyDocImpl* doc, uint code, ctmbstr entity, int c );
void TY_(ReportFileError)( TidyDocImpl* doc, ctmbstr file, uint code ); void TY_(ReportFileError)( TidyDocImpl* doc, ctmbstr file, uint code );
void TY_(ReportEncodingError)(TidyDocImpl* doc, uint code, uint c, Bool discarded);
/** @} */ /** @} */
@ -66,7 +67,6 @@ void TY_(ReportFileError)( TidyDocImpl* doc, ctmbstr file, uint code );
/** @{ */ /** @{ */
void TY_(ReportEncodingError)(TidyDocImpl* doc, uint code, uint c, Bool discarded);
void TY_(ReportEncodingWarning)(TidyDocImpl* doc, uint code, uint encoding); void TY_(ReportEncodingWarning)(TidyDocImpl* doc, uint code, uint encoding);
void TY_(ReportMarkupVersion)( TidyDocImpl* doc ); void TY_(ReportMarkupVersion)( TidyDocImpl* doc );
void TY_(ReportMissingAttr)( TidyDocImpl* doc, Node* node, ctmbstr name ); void TY_(ReportMissingAttr)( TidyDocImpl* doc, Node* node, ctmbstr name );

View file

@ -620,7 +620,6 @@ void TY_(WriteChar)( uint c, StreamOut* out )
TY_(EncodeCharToUTF8Bytes)( c, NULL, &out->sink, &count ); TY_(EncodeCharToUTF8Bytes)( c, NULL, &out->sink, &count );
if (count <= 0) if (count <= 0)
{ {
/* TY_(ReportEncodingError)(in->lexer, INVALID_UTF8 | REPLACED_CHAR, c); */
/* replacement char 0xFFFD encoded as UTF-8 */ /* replacement char 0xFFFD encoded as UTF-8 */
PutByte(0xEF, out); PutByte(0xBF, out); PutByte(0xBF, out); PutByte(0xEF, out); PutByte(0xBF, out); PutByte(0xBF, out);
} }
@ -682,7 +681,6 @@ void TY_(WriteChar)( uint c, StreamOut* out )
if ( !TY_(IsValidUTF16FromUCS4)(c) ) if ( !TY_(IsValidUTF16FromUCS4)(c) )
{ {
/* invalid UTF-16 value */ /* invalid UTF-16 value */
/* TY_(ReportEncodingError)(in->lexer, INVALID_UTF16 | DISCARDED_CHAR, c); */
c = 0; c = 0;
numChars = 0; numChars = 0;
} }
@ -692,7 +690,6 @@ void TY_(WriteChar)( uint c, StreamOut* out )
numChars = 2; numChars = 2;
if ( !TY_(SplitSurrogatePair)(c, &theChars[0], &theChars[1]) ) if ( !TY_(SplitSurrogatePair)(c, &theChars[0], &theChars[1]) )
{ {
/* TY_(ReportEncodingError)(in->lexer, INVALID_UTF16 | DISCARDED_CHAR, c); */
c = 0; c = 0;
numChars = 0; numChars = 0;
} }