Implement formatter for encoding reports.

This commit is contained in:
Jim Derry 2017-09-04 15:50:45 -04:00
parent 8cb4198724
commit f49c419908
3 changed files with 64 additions and 54 deletions

View file

@ -236,6 +236,7 @@ typedef TidyMessageImpl*(messageFormatter)(TidyDocImpl* doc, Node *element, Node
/* Forward declarations of messageFormatter functions. */
static messageFormatter formatAttributeReport;
static messageFormatter formatEncodingReport;
static messageFormatter formatStandard;
static messageFormatter formatStandardDynamic;
@ -290,10 +291,10 @@ static struct _dispatchTable {
{ INSERTING_AUTO_ATTRIBUTE, TidyWarning, formatAttributeReport },
{ INSERTING_TAG, TidyWarning, formatStandard },
{ INVALID_ATTRIBUTE, TidyWarning, formatAttributeReport },
{ INVALID_NCR, TidyWarning, NULL },
{ INVALID_SGML_CHARS, TidyWarning, NULL },
{ INVALID_UTF8, TidyWarning, NULL },
{ INVALID_UTF16, TidyWarning, NULL },
{ INVALID_NCR, TidyWarning, formatEncodingReport },
{ INVALID_SGML_CHARS, TidyWarning, formatEncodingReport },
{ INVALID_UTF8, TidyWarning, formatEncodingReport },
{ INVALID_UTF16, TidyWarning, formatEncodingReport },
{ INVALID_XML_ID, TidyWarning, formatAttributeReport },
{ JOINING_ATTRIBUTE, TidyWarning, formatAttributeReport },
{ MALFORMED_COMMENT, TidyWarning, formatStandard },
@ -346,7 +347,7 @@ static struct _dispatchTable {
{ UNKNOWN_ELEMENT, TidyError, formatStandard },
{ UNKNOWN_ENTITY, TidyWarning, formatStandard },
{ USING_BR_INPLACE_OF, TidyWarning, formatStandard },
{ VENDOR_SPECIFIC_CHARS, TidyWarning, NULL },
{ VENDOR_SPECIFIC_CHARS, TidyWarning, formatEncodingReport },
{ WHITE_IN_URI, TidyWarning, formatAttributeReport },
{ XML_DECLARATION_DETECTED, TidyWarning, formatStandard },
{ XML_ID_SYNTAX, TidyWarning, formatAttributeReport },
@ -441,6 +442,54 @@ TidyMessageImpl *formatAttributeReport(TidyDocImpl* doc, Node *element, Node *no
}
/* Provides report formatting *and* additional status settings for Tidy's
** encoding reports.
** @todo: These status changes probably SHOULD be made in the calling code;
** however these states are captured to generate future output, which may be
** useful here in the long run.
*/
TidyMessageImpl *formatEncodingReport(TidyDocImpl* doc, Node *element, Node *node, uint code, uint level, va_list args)
{
char buf[ 32 ] = {'\0'};
uint c = va_arg( args, uint );
Bool discarded = va_arg( args, Bool );
ctmbstr action = tidyLocalizedString(discarded ? STRING_DISCARDING : STRING_REPLACING);
switch (code)
{
case INVALID_NCR:
NtoS(c, buf);
doc->badChars |= BC_INVALID_NCR;
break;
case INVALID_SGML_CHARS:
NtoS(c, buf);
doc->badChars |= BC_INVALID_SGML_CHARS;
break;
case INVALID_UTF8:
TY_(tmbsnprintf)(buf, sizeof(buf), "U+%04X", c);
doc->badChars |= BC_INVALID_UTF8;
break;
#if SUPPORT_UTF16_ENCODINGS
case INVALID_UTF16:
TY_(tmbsnprintf)(buf, sizeof(buf), "U+%04X", c);
doc->badChars |= BC_INVALID_UTF16;
break;
#endif
case VENDOR_SPECIFIC_CHARS:
NtoS(c, buf);
doc->badChars |= BC_VENDOR_SPECIFIC_CHARS;
break;
}
return TY_(tidyMessageCreateWithLexer)(doc, code, level, action, buf );
}
/* Provides general formatting for the majority of Tidy's reports. Because most
** reports use the same basic data derived from the element and node, this
** formatter covers the vast majority of Tidy's report messages. Note that this
@ -700,14 +749,14 @@ void TY_(Report)(TidyDocImpl* doc, Node *element, Node *node, uint code, ...)
void TY_(ReportAttrError)(TidyDocImpl* doc, Node *node, AttVal *av, uint code)
{
TY_(Report)(doc, NULL, node, code, av);
TY_(Report)( doc, NULL, node, code, av );
}
void TY_(ReportBadArgument)( TidyDocImpl* doc, ctmbstr option )
{
assert( option != NULL );
TY_(Report)(doc, NULL, NULL, STRING_MISSING_MALFORMED, option);
TY_(Report)( doc, NULL, NULL, STRING_MISSING_MALFORMED, option );
}
@ -720,7 +769,13 @@ void TY_(ReportEntityError)( TidyDocImpl* doc, uint code, ctmbstr entity, int AR
void TY_(ReportFileError)( TidyDocImpl* doc, ctmbstr file, uint code )
{
TY_(Report)(doc, NULL, NULL, code, file);
TY_(Report)( doc, NULL, NULL, code, file );
}
void TY_(ReportEncodingError)(TidyDocImpl* doc, uint code, uint c, Bool discarded)
{
TY_(Report)( doc, NULL, NULL, code, c, discarded );
}
@ -733,48 +788,6 @@ void TY_(ReportFileError)( TidyDocImpl* doc, ctmbstr file, uint code )
*********************************************************************/
void TY_(ReportEncodingError)(TidyDocImpl* doc, uint code, uint c, Bool discarded)
{
TidyMessageImpl *message = NULL;
char buf[ 32 ] = {'\0'};
ctmbstr action = tidyLocalizedString(discarded ? STRING_DISCARDING : STRING_REPLACING);
/* An encoding mismatch is currently treated as a non-fatal error */
switch (code)
{
case INVALID_NCR:
NtoS(c, buf);
doc->badChars |= BC_INVALID_NCR;
break;
case INVALID_SGML_CHARS:
NtoS(c, buf);
doc->badChars |= BC_INVALID_SGML_CHARS;
break;
case INVALID_UTF8:
TY_(tmbsnprintf)(buf, sizeof(buf), "U+%04X", c);
doc->badChars |= BC_INVALID_UTF8;
break;
#if SUPPORT_UTF16_ENCODINGS
case INVALID_UTF16:
TY_(tmbsnprintf)(buf, sizeof(buf), "U+%04X", c);
doc->badChars |= BC_INVALID_UTF16;
break;
#endif
case VENDOR_SPECIFIC_CHARS:
NtoS(c, buf);
doc->badChars |= BC_VENDOR_SPECIFIC_CHARS;
break;
}
message = TY_(tidyMessageCreateWithLexer)(doc, code, TidyWarning, action, buf );
messageOut( message );
}
void TY_(ReportEncodingWarning)(TidyDocImpl* doc, uint code, uint encoding)
{
TidyMessageImpl *message = NULL;

View file

@ -59,6 +59,7 @@ void TY_(ReportAttrError)(TidyDocImpl* doc, Node *node, AttVal *av, uint code);
void TY_(ReportBadArgument)( TidyDocImpl* doc, ctmbstr option );
void TY_(ReportEntityError)( TidyDocImpl* doc, uint code, ctmbstr entity, int c );
void TY_(ReportFileError)( TidyDocImpl* doc, ctmbstr file, uint code );
void TY_(ReportEncodingError)(TidyDocImpl* doc, uint code, uint c, Bool discarded);
/** @} */
@ -66,7 +67,6 @@ void TY_(ReportFileError)( TidyDocImpl* doc, ctmbstr file, uint code );
/** @{ */
void TY_(ReportEncodingError)(TidyDocImpl* doc, uint code, uint c, Bool discarded);
void TY_(ReportEncodingWarning)(TidyDocImpl* doc, uint code, uint encoding);
void TY_(ReportMarkupVersion)( TidyDocImpl* doc );
void TY_(ReportMissingAttr)( TidyDocImpl* doc, Node* node, ctmbstr name );

View file

@ -620,7 +620,6 @@ void TY_(WriteChar)( uint c, StreamOut* out )
TY_(EncodeCharToUTF8Bytes)( c, NULL, &out->sink, &count );
if (count <= 0)
{
/* TY_(ReportEncodingError)(in->lexer, INVALID_UTF8 | REPLACED_CHAR, c); */
/* replacement char 0xFFFD encoded as UTF-8 */
PutByte(0xEF, out); PutByte(0xBF, out); PutByte(0xBF, out);
}
@ -682,7 +681,6 @@ void TY_(WriteChar)( uint c, StreamOut* out )
if ( !TY_(IsValidUTF16FromUCS4)(c) )
{
/* invalid UTF-16 value */
/* TY_(ReportEncodingError)(in->lexer, INVALID_UTF16 | DISCARDED_CHAR, c); */
c = 0;
numChars = 0;
}
@ -692,7 +690,6 @@ void TY_(WriteChar)( uint c, StreamOut* out )
numChars = 2;
if ( !TY_(SplitSurrogatePair)(c, &theChars[0], &theChars[1]) )
{
/* TY_(ReportEncodingError)(in->lexer, INVALID_UTF16 | DISCARDED_CHAR, c); */
c = 0;
numChars = 0;
}