From 7f73d4f429cfe68a0c7384c7f9e577e13500d7fe Mon Sep 17 00:00:00 2001 From: Geoff McLane Date: Sat, 11 Feb 2017 18:33:45 +0100 Subject: [PATCH] Issue #483 - Add ReportSurrogateError() service and connect. --- src/language_en.h | 6 +++--- src/lexer.c | 12 ++++-------- src/message.c | 7 +++++++ src/message.h | 1 + 4 files changed, 15 insertions(+), 11 deletions(-) diff --git a/src/language_en.h b/src/language_en.h index b048c73..ab191e0 100644 --- a/src/language_en.h +++ b/src/language_en.h @@ -336,9 +336,9 @@ static languageDefinition language_en = { whichPluralForm_en, { { INVALID_UTF8, 0, "%s invalid UTF-8 bytes (char. code %s)" }, /* Error */ { INVALID_UTF16, 0, "%s invalid UTF-16 surrogate pair (char. code %s)" }, /* Error */ { INVALID_NCR, 0, "%s invalid numeric character reference %s" }, /* Error */ - { BAD_SURROGATE_PAIR, 0, "Have out-of-range surrogate pair U+%s:U+%s, replaced with 2 U+FFFD values."}, /* warning */ - { BAD_SURROGATE_TAIL, 0, "Leading (High) surrogate pair U+%s, with no trailing (Low) entity, replaced with U+FFFD." }, /* warning */ - { BAD_SURROGATE_LEAD, 0, "Trailing (Low) surrogate pair U+%s, with no leading (High) entity, replaced with U+FFFD." }, /* warning */ + { BAD_SURROGATE_PAIR, 0, "Have out-of-range surrogate pair U+%04X:U+%04X, replaced with U+FFFD value."}, /* warning */ + { BAD_SURROGATE_TAIL, 0, "Leading (High) surrogate pair U+%04X, with no trailing (Low) entity, replaced with U+FFFD." }, /* warning */ + { BAD_SURROGATE_LEAD, 0, "Trailing (Low) surrogate pair U+%04X, with no leading (High) entity, replaced with U+FFFD." }, /* warning */ /* ReportEntityError */ { MISSING_SEMICOLON, 0, "entity \"%s\" doesn't end in ';'" }, /* Warning in HTML, Error in XML/XHTML */ diff --git a/src/lexer.c b/src/lexer.c index e3fa267..210c9cf 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -1121,8 +1121,7 @@ static SPStatus GetSurrogatePair(TidyDocImpl* doc, Bool isXml, uint *pch) { status = SP_failed; /* is one of the 32 out-of-range pairs */ *pch = 0xFFFD; /* return substitute character */ - /* SP WARNING: - BAD_SURROGATE_PAIR */ - fprintf(stderr, "Warning: Have out-of-range surrogate pair U+%04X:U+%04X, replaced with U+FFFD value.\n", fch, ch); + TY_(ReportSurrogateError)(doc, BAD_SURROGATE_PAIR, fch, ch); /* SP WARNING: - */ } } } @@ -1291,8 +1290,7 @@ static void ParseEntity( TidyDocImpl* doc, GetTokenMode mode ) status = GetSurrogatePair(doc, isXml, &ch); if (status == SP_error) { - /* SP WARNING: BAD_SURROGATE_TAIL - use substitute character */ - fprintf(stderr, "Warning: Leading(High) surrogate pair U+%04X, with no trailing(Low) entity, replaced with U+FFFD.\n", c1); + TY_(ReportSurrogateError)(doc, BAD_SURROGATE_TAIL, c1, 0); /* SP WARNING: - using substitute character */ TY_(UngetChar)('&', doc->docIn); /* otherwise put it back */ } } @@ -1301,16 +1299,14 @@ static void ParseEntity( TidyDocImpl* doc, GetTokenMode mode ) /* put this non-entity lead char back */ TY_(UngetChar)(c1, doc->docIn); /* Have leading surrogate pair, with no tail */ - /* SP WARNING: BAD_SURROGATE_TAIL - use substitute character */ - fprintf(stderr, "Warning: Leading(High) surrogate pair U+%04X, with no trailing(Low) entity, replaced with U+FFFD.\n", ch); + TY_(ReportSurrogateError)(doc, BAD_SURROGATE_TAIL, ch, 0); /* SP WARNING: - using substitute character */ ch = 0xFFFD; } } else if (!preserveEntities && found && TY_(IsHighSurrogate)(ch)) { /* Have trailing surrogate pair, with no lead */ - /* SP WARNING: - BAD_SURROGATE_LEAD - - use substitute character */ - fprintf(stderr, "Warning: Trailing (Low) surrogate pair U+%04X, with no leading (High) entity, replaced with U+FFFD.\n", ch); + TY_(ReportSurrogateError)(doc, BAD_SURROGATE_LEAD, ch, 0); /* SP WARNING: - using substitute character */ ch = 0xFFFD; } diff --git a/src/message.c b/src/message.c index 2948deb..31bbb42 100755 --- a/src/message.c +++ b/src/message.c @@ -530,6 +530,13 @@ void TY_(ReportEntityError)( TidyDocImpl* doc, uint code, ctmbstr entity, messageLexer( doc, TidyWarning, code, fmt, entityname ); } +void TY_(ReportSurrogateError)(TidyDocImpl* doc, uint code, uint c1, uint c2) +{ + ctmbstr fmt = tidyLocalizedString(code); + if (fmt) + messageLexer(doc, TidyWarning, code, fmt, c1, c2); +} + void TY_(ReportAttrError)(TidyDocImpl* doc, Node *node, AttVal *av, uint code) { char const *name = "NULL", *value = "NULL"; diff --git a/src/message.h b/src/message.h index a1fc129..affc5af 100644 --- a/src/message.h +++ b/src/message.h @@ -46,6 +46,7 @@ void TY_(ReportEncodingError)(TidyDocImpl* doc, uint code, uint c, Bool discarde void TY_(ReportEntityError)( TidyDocImpl* doc, uint code, ctmbstr entity, int c ); void TY_(ReportAttrError)( TidyDocImpl* doc, Node* node, AttVal* av, uint code ); void TY_(ReportMissingAttr)( TidyDocImpl* doc, Node* node, ctmbstr name ); +void TY_(ReportSurrogateError)(TidyDocImpl* doc, uint code, uint c1, uint c2); #if SUPPORT_ACCESSIBILITY_CHECKS