Implement formatter for encoding reports.
This commit is contained in:
parent
8cb4198724
commit
f49c419908
107
src/message.c
107
src/message.c
|
@ -236,6 +236,7 @@ typedef TidyMessageImpl*(messageFormatter)(TidyDocImpl* doc, Node *element, Node
|
||||||
|
|
||||||
/* Forward declarations of messageFormatter functions. */
|
/* Forward declarations of messageFormatter functions. */
|
||||||
static messageFormatter formatAttributeReport;
|
static messageFormatter formatAttributeReport;
|
||||||
|
static messageFormatter formatEncodingReport;
|
||||||
static messageFormatter formatStandard;
|
static messageFormatter formatStandard;
|
||||||
static messageFormatter formatStandardDynamic;
|
static messageFormatter formatStandardDynamic;
|
||||||
|
|
||||||
|
@ -290,10 +291,10 @@ static struct _dispatchTable {
|
||||||
{ INSERTING_AUTO_ATTRIBUTE, TidyWarning, formatAttributeReport },
|
{ INSERTING_AUTO_ATTRIBUTE, TidyWarning, formatAttributeReport },
|
||||||
{ INSERTING_TAG, TidyWarning, formatStandard },
|
{ INSERTING_TAG, TidyWarning, formatStandard },
|
||||||
{ INVALID_ATTRIBUTE, TidyWarning, formatAttributeReport },
|
{ INVALID_ATTRIBUTE, TidyWarning, formatAttributeReport },
|
||||||
{ INVALID_NCR, TidyWarning, NULL },
|
{ INVALID_NCR, TidyWarning, formatEncodingReport },
|
||||||
{ INVALID_SGML_CHARS, TidyWarning, NULL },
|
{ INVALID_SGML_CHARS, TidyWarning, formatEncodingReport },
|
||||||
{ INVALID_UTF8, TidyWarning, NULL },
|
{ INVALID_UTF8, TidyWarning, formatEncodingReport },
|
||||||
{ INVALID_UTF16, TidyWarning, NULL },
|
{ INVALID_UTF16, TidyWarning, formatEncodingReport },
|
||||||
{ INVALID_XML_ID, TidyWarning, formatAttributeReport },
|
{ INVALID_XML_ID, TidyWarning, formatAttributeReport },
|
||||||
{ JOINING_ATTRIBUTE, TidyWarning, formatAttributeReport },
|
{ JOINING_ATTRIBUTE, TidyWarning, formatAttributeReport },
|
||||||
{ MALFORMED_COMMENT, TidyWarning, formatStandard },
|
{ MALFORMED_COMMENT, TidyWarning, formatStandard },
|
||||||
|
@ -346,7 +347,7 @@ static struct _dispatchTable {
|
||||||
{ UNKNOWN_ELEMENT, TidyError, formatStandard },
|
{ UNKNOWN_ELEMENT, TidyError, formatStandard },
|
||||||
{ UNKNOWN_ENTITY, TidyWarning, formatStandard },
|
{ UNKNOWN_ENTITY, TidyWarning, formatStandard },
|
||||||
{ USING_BR_INPLACE_OF, TidyWarning, formatStandard },
|
{ USING_BR_INPLACE_OF, TidyWarning, formatStandard },
|
||||||
{ VENDOR_SPECIFIC_CHARS, TidyWarning, NULL },
|
{ VENDOR_SPECIFIC_CHARS, TidyWarning, formatEncodingReport },
|
||||||
{ WHITE_IN_URI, TidyWarning, formatAttributeReport },
|
{ WHITE_IN_URI, TidyWarning, formatAttributeReport },
|
||||||
{ XML_DECLARATION_DETECTED, TidyWarning, formatStandard },
|
{ XML_DECLARATION_DETECTED, TidyWarning, formatStandard },
|
||||||
{ XML_ID_SYNTAX, TidyWarning, formatAttributeReport },
|
{ XML_ID_SYNTAX, TidyWarning, formatAttributeReport },
|
||||||
|
@ -441,6 +442,54 @@ TidyMessageImpl *formatAttributeReport(TidyDocImpl* doc, Node *element, Node *no
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Provides report formatting *and* additional status settings for Tidy's
|
||||||
|
** encoding reports.
|
||||||
|
** @todo: These status changes probably SHOULD be made in the calling code;
|
||||||
|
** however these states are captured to generate future output, which may be
|
||||||
|
** useful here in the long run.
|
||||||
|
*/
|
||||||
|
TidyMessageImpl *formatEncodingReport(TidyDocImpl* doc, Node *element, Node *node, uint code, uint level, va_list args)
|
||||||
|
{
|
||||||
|
char buf[ 32 ] = {'\0'};
|
||||||
|
uint c = va_arg( args, uint );
|
||||||
|
Bool discarded = va_arg( args, Bool );
|
||||||
|
ctmbstr action = tidyLocalizedString(discarded ? STRING_DISCARDING : STRING_REPLACING);
|
||||||
|
|
||||||
|
switch (code)
|
||||||
|
{
|
||||||
|
case INVALID_NCR:
|
||||||
|
NtoS(c, buf);
|
||||||
|
doc->badChars |= BC_INVALID_NCR;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case INVALID_SGML_CHARS:
|
||||||
|
NtoS(c, buf);
|
||||||
|
doc->badChars |= BC_INVALID_SGML_CHARS;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case INVALID_UTF8:
|
||||||
|
TY_(tmbsnprintf)(buf, sizeof(buf), "U+%04X", c);
|
||||||
|
doc->badChars |= BC_INVALID_UTF8;
|
||||||
|
break;
|
||||||
|
|
||||||
|
#if SUPPORT_UTF16_ENCODINGS
|
||||||
|
case INVALID_UTF16:
|
||||||
|
TY_(tmbsnprintf)(buf, sizeof(buf), "U+%04X", c);
|
||||||
|
doc->badChars |= BC_INVALID_UTF16;
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
case VENDOR_SPECIFIC_CHARS:
|
||||||
|
NtoS(c, buf);
|
||||||
|
doc->badChars |= BC_VENDOR_SPECIFIC_CHARS;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return TY_(tidyMessageCreateWithLexer)(doc, code, level, action, buf );
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Provides general formatting for the majority of Tidy's reports. Because most
|
/* Provides general formatting for the majority of Tidy's reports. Because most
|
||||||
** reports use the same basic data derived from the element and node, this
|
** reports use the same basic data derived from the element and node, this
|
||||||
** formatter covers the vast majority of Tidy's report messages. Note that this
|
** formatter covers the vast majority of Tidy's report messages. Note that this
|
||||||
|
@ -724,6 +773,12 @@ void TY_(ReportFileError)( TidyDocImpl* doc, ctmbstr file, uint code )
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void TY_(ReportEncodingError)(TidyDocImpl* doc, uint code, uint c, Bool discarded)
|
||||||
|
{
|
||||||
|
TY_(Report)( doc, NULL, NULL, code, c, discarded );
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*********************************************************************
|
/*********************************************************************
|
||||||
* Legacy High Level Message Writing Functions - Specific
|
* Legacy High Level Message Writing Functions - Specific
|
||||||
* When adding new reports to LibTidy, preference should be given
|
* When adding new reports to LibTidy, preference should be given
|
||||||
|
@ -733,48 +788,6 @@ void TY_(ReportFileError)( TidyDocImpl* doc, ctmbstr file, uint code )
|
||||||
*********************************************************************/
|
*********************************************************************/
|
||||||
|
|
||||||
|
|
||||||
void TY_(ReportEncodingError)(TidyDocImpl* doc, uint code, uint c, Bool discarded)
|
|
||||||
{
|
|
||||||
TidyMessageImpl *message = NULL;
|
|
||||||
char buf[ 32 ] = {'\0'};
|
|
||||||
ctmbstr action = tidyLocalizedString(discarded ? STRING_DISCARDING : STRING_REPLACING);
|
|
||||||
|
|
||||||
/* An encoding mismatch is currently treated as a non-fatal error */
|
|
||||||
switch (code)
|
|
||||||
{
|
|
||||||
case INVALID_NCR:
|
|
||||||
NtoS(c, buf);
|
|
||||||
doc->badChars |= BC_INVALID_NCR;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case INVALID_SGML_CHARS:
|
|
||||||
NtoS(c, buf);
|
|
||||||
doc->badChars |= BC_INVALID_SGML_CHARS;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case INVALID_UTF8:
|
|
||||||
TY_(tmbsnprintf)(buf, sizeof(buf), "U+%04X", c);
|
|
||||||
doc->badChars |= BC_INVALID_UTF8;
|
|
||||||
break;
|
|
||||||
|
|
||||||
#if SUPPORT_UTF16_ENCODINGS
|
|
||||||
case INVALID_UTF16:
|
|
||||||
TY_(tmbsnprintf)(buf, sizeof(buf), "U+%04X", c);
|
|
||||||
doc->badChars |= BC_INVALID_UTF16;
|
|
||||||
break;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
case VENDOR_SPECIFIC_CHARS:
|
|
||||||
NtoS(c, buf);
|
|
||||||
doc->badChars |= BC_VENDOR_SPECIFIC_CHARS;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
message = TY_(tidyMessageCreateWithLexer)(doc, code, TidyWarning, action, buf );
|
|
||||||
messageOut( message );
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void TY_(ReportEncodingWarning)(TidyDocImpl* doc, uint code, uint encoding)
|
void TY_(ReportEncodingWarning)(TidyDocImpl* doc, uint code, uint encoding)
|
||||||
{
|
{
|
||||||
TidyMessageImpl *message = NULL;
|
TidyMessageImpl *message = NULL;
|
||||||
|
|
|
@ -59,6 +59,7 @@ void TY_(ReportAttrError)(TidyDocImpl* doc, Node *node, AttVal *av, uint code);
|
||||||
void TY_(ReportBadArgument)( TidyDocImpl* doc, ctmbstr option );
|
void TY_(ReportBadArgument)( TidyDocImpl* doc, ctmbstr option );
|
||||||
void TY_(ReportEntityError)( TidyDocImpl* doc, uint code, ctmbstr entity, int c );
|
void TY_(ReportEntityError)( TidyDocImpl* doc, uint code, ctmbstr entity, int c );
|
||||||
void TY_(ReportFileError)( TidyDocImpl* doc, ctmbstr file, uint code );
|
void TY_(ReportFileError)( TidyDocImpl* doc, ctmbstr file, uint code );
|
||||||
|
void TY_(ReportEncodingError)(TidyDocImpl* doc, uint code, uint c, Bool discarded);
|
||||||
|
|
||||||
|
|
||||||
/** @} */
|
/** @} */
|
||||||
|
@ -66,7 +67,6 @@ void TY_(ReportFileError)( TidyDocImpl* doc, ctmbstr file, uint code );
|
||||||
/** @{ */
|
/** @{ */
|
||||||
|
|
||||||
|
|
||||||
void TY_(ReportEncodingError)(TidyDocImpl* doc, uint code, uint c, Bool discarded);
|
|
||||||
void TY_(ReportEncodingWarning)(TidyDocImpl* doc, uint code, uint encoding);
|
void TY_(ReportEncodingWarning)(TidyDocImpl* doc, uint code, uint encoding);
|
||||||
void TY_(ReportMarkupVersion)( TidyDocImpl* doc );
|
void TY_(ReportMarkupVersion)( TidyDocImpl* doc );
|
||||||
void TY_(ReportMissingAttr)( TidyDocImpl* doc, Node* node, ctmbstr name );
|
void TY_(ReportMissingAttr)( TidyDocImpl* doc, Node* node, ctmbstr name );
|
||||||
|
|
|
@ -620,7 +620,6 @@ void TY_(WriteChar)( uint c, StreamOut* out )
|
||||||
TY_(EncodeCharToUTF8Bytes)( c, NULL, &out->sink, &count );
|
TY_(EncodeCharToUTF8Bytes)( c, NULL, &out->sink, &count );
|
||||||
if (count <= 0)
|
if (count <= 0)
|
||||||
{
|
{
|
||||||
/* TY_(ReportEncodingError)(in->lexer, INVALID_UTF8 | REPLACED_CHAR, c); */
|
|
||||||
/* replacement char 0xFFFD encoded as UTF-8 */
|
/* replacement char 0xFFFD encoded as UTF-8 */
|
||||||
PutByte(0xEF, out); PutByte(0xBF, out); PutByte(0xBF, out);
|
PutByte(0xEF, out); PutByte(0xBF, out); PutByte(0xBF, out);
|
||||||
}
|
}
|
||||||
|
@ -682,7 +681,6 @@ void TY_(WriteChar)( uint c, StreamOut* out )
|
||||||
if ( !TY_(IsValidUTF16FromUCS4)(c) )
|
if ( !TY_(IsValidUTF16FromUCS4)(c) )
|
||||||
{
|
{
|
||||||
/* invalid UTF-16 value */
|
/* invalid UTF-16 value */
|
||||||
/* TY_(ReportEncodingError)(in->lexer, INVALID_UTF16 | DISCARDED_CHAR, c); */
|
|
||||||
c = 0;
|
c = 0;
|
||||||
numChars = 0;
|
numChars = 0;
|
||||||
}
|
}
|
||||||
|
@ -692,7 +690,6 @@ void TY_(WriteChar)( uint c, StreamOut* out )
|
||||||
numChars = 2;
|
numChars = 2;
|
||||||
if ( !TY_(SplitSurrogatePair)(c, &theChars[0], &theChars[1]) )
|
if ( !TY_(SplitSurrogatePair)(c, &theChars[0], &theChars[1]) )
|
||||||
{
|
{
|
||||||
/* TY_(ReportEncodingError)(in->lexer, INVALID_UTF16 | DISCARDED_CHAR, c); */
|
|
||||||
c = 0;
|
c = 0;
|
||||||
numChars = 0;
|
numChars = 0;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue