Merge pull request #547 from htacg/issue_352
Attempt to address issue #352.
This commit is contained in:
commit
acaab679c5
|
@ -171,6 +171,7 @@ extern "C" {
|
||||||
FN(ESCAPED_ILLEGAL_URI) \
|
FN(ESCAPED_ILLEGAL_URI) \
|
||||||
FN(FIXED_BACKSLASH) \
|
FN(FIXED_BACKSLASH) \
|
||||||
FN(ID_NAME_MISMATCH) \
|
FN(ID_NAME_MISMATCH) \
|
||||||
|
FN(ILLEGAL_URI_CODEPOINT) \
|
||||||
FN(ILLEGAL_URI_REFERENCE) \
|
FN(ILLEGAL_URI_REFERENCE) \
|
||||||
FN(INSERTING_AUTO_ATTRIBUTE) \
|
FN(INSERTING_AUTO_ATTRIBUTE) \
|
||||||
FN(INVALID_ATTRIBUTE) \
|
FN(INVALID_ATTRIBUTE) \
|
||||||
|
|
71
src/attrs.c
71
src/attrs.c
|
@ -1475,14 +1475,71 @@ static void CheckLowerCaseAttrValue( TidyDocImpl* doc, Node *node, AttVal *attva
|
||||||
}
|
}
|
||||||
|
|
||||||
/* methods for checking value of a specific attribute */
|
/* methods for checking value of a specific attribute */
|
||||||
|
#ifdef _WIN32
|
||||||
|
#define ISUPPER(a) ((a >= 'A') && (a <= 'Z'))
|
||||||
|
#define ISLOWER(a) ((a >= 'a') && (a <= 'z'))
|
||||||
|
#define ISNUMERIC(a) ((a >= '0') && (a <= '9'))
|
||||||
|
#define ISALNUM(a) (ISUPPER(a) || ISLOWER(a) || ISNUMERIC(a))
|
||||||
|
#else
|
||||||
|
#define ISALNUM(a) isalnum(a)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static Bool IsURLCodePoint( ctmbstr p, uint *increment )
|
||||||
|
{
|
||||||
|
uint c;
|
||||||
|
*increment = TY_(GetUTF8)( p, &c ) + 1;
|
||||||
|
|
||||||
|
return ISALNUM( c ) ||
|
||||||
|
c == '%' || /* not a valid codepoint, but an escape sequence */
|
||||||
|
c == '#' || /* not a valid codepoint, but a delimiter */
|
||||||
|
c == '!' ||
|
||||||
|
c == '$' ||
|
||||||
|
c == '&' ||
|
||||||
|
c == '\'' ||
|
||||||
|
c == '(' ||
|
||||||
|
c == ')' ||
|
||||||
|
c == '*' ||
|
||||||
|
c == '+' ||
|
||||||
|
c == ',' ||
|
||||||
|
c == '-' ||
|
||||||
|
c == '.' ||
|
||||||
|
c == '/' ||
|
||||||
|
c == ':' ||
|
||||||
|
c == ';' ||
|
||||||
|
c == '=' ||
|
||||||
|
c == '?' ||
|
||||||
|
c == '@' ||
|
||||||
|
c == '_' ||
|
||||||
|
c == '~' ||
|
||||||
|
(c >= 0x00A0 && c <= 0xD7FF) ||
|
||||||
|
(c >= 0xE000 && c <= 0xFDCF) ||
|
||||||
|
(c >= 0xFDF0 && c <= 0xFFEF) ||
|
||||||
|
(c >= 0x10000 && c <= 0x1FFFD) ||
|
||||||
|
(c >= 0x20000 && c <= 0x2FFFD) ||
|
||||||
|
(c >= 0x30000 && c <= 0x3FFFD) ||
|
||||||
|
(c >= 0x40000 && c <= 0x4FFFD) ||
|
||||||
|
(c >= 0x50000 && c <= 0x5FFFD) ||
|
||||||
|
(c >= 0x60000 && c <= 0x6FFFD) ||
|
||||||
|
(c >= 0x70000 && c <= 0x7FFFD) ||
|
||||||
|
(c >= 0x80000 && c <= 0x8FFFD) ||
|
||||||
|
(c >= 0x90000 && c <= 0x9FFFD) ||
|
||||||
|
(c >= 0xA0000 && c <= 0xAFFFD) ||
|
||||||
|
(c >= 0xB0000 && c <= 0xBFFFD) ||
|
||||||
|
(c >= 0xC0000 && c <= 0xCFFFD) ||
|
||||||
|
(c >= 0xD0000 && c <= 0xDFFFD) ||
|
||||||
|
(c >= 0xE0000 && c <= 0xEFFFD) ||
|
||||||
|
(c >= 0xF0000 && c <= 0xFFFFD) ||
|
||||||
|
(c >= 0x100000 && c <= 0x10FFFD);
|
||||||
|
}
|
||||||
|
|
||||||
void TY_(CheckUrl)( TidyDocImpl* doc, Node *node, AttVal *attval)
|
void TY_(CheckUrl)( TidyDocImpl* doc, Node *node, AttVal *attval)
|
||||||
{
|
{
|
||||||
tmbchar c;
|
tmbchar c;
|
||||||
tmbstr dest, p;
|
tmbstr dest, p;
|
||||||
uint escape_count = 0, backslash_count = 0;
|
uint escape_count = 0, backslash_count = 0, bad_codepoint_count = 0;
|
||||||
uint i, pos = 0;
|
uint i, pos = 0;
|
||||||
uint len;
|
uint len;
|
||||||
|
uint increment;
|
||||||
Bool isJavascript = no;
|
Bool isJavascript = no;
|
||||||
|
|
||||||
if (!AttrHasValue(attval))
|
if (!AttrHasValue(attval))
|
||||||
|
@ -1508,6 +1565,14 @@ void TY_(CheckUrl)( TidyDocImpl* doc, Node *node, AttVal *attval)
|
||||||
++escape_count;
|
++escape_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
while ( *p != 0 )
|
||||||
|
{
|
||||||
|
if ( !IsURLCodePoint( p, &increment ) )
|
||||||
|
++bad_codepoint_count;
|
||||||
|
p = p + increment;
|
||||||
|
}
|
||||||
|
p = attval->value;
|
||||||
|
|
||||||
if ( cfgBool(doc, TidyFixUri) && escape_count )
|
if ( cfgBool(doc, TidyFixUri) && escape_count )
|
||||||
{
|
{
|
||||||
Bool hadnonspace = no;
|
Bool hadnonspace = no;
|
||||||
|
@ -1557,6 +1622,10 @@ void TY_(CheckUrl)( TidyDocImpl* doc, Node *node, AttVal *attval)
|
||||||
|
|
||||||
doc->badChars |= BC_INVALID_URI;
|
doc->badChars |= BC_INVALID_URI;
|
||||||
}
|
}
|
||||||
|
if ( bad_codepoint_count )
|
||||||
|
{
|
||||||
|
TY_(ReportAttrError)( doc, node, attval, ILLEGAL_URI_CODEPOINT );
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* RFC 2396, section 4.2 states:
|
/* RFC 2396, section 4.2 states:
|
||||||
|
|
|
@ -1786,6 +1786,7 @@ static languageDefinition language_en = { whichPluralForm_en, {
|
||||||
{ ESCAPED_ILLEGAL_URI, 0, "%s escaping malformed URI reference" }, /* ReportAttrError */
|
{ ESCAPED_ILLEGAL_URI, 0, "%s escaping malformed URI reference" }, /* ReportAttrError */
|
||||||
{ FIXED_BACKSLASH, 0, "%s converting backslash in URI to slash" }, /* ReportAttrError */
|
{ FIXED_BACKSLASH, 0, "%s converting backslash in URI to slash" }, /* ReportAttrError */
|
||||||
{ ID_NAME_MISMATCH, 0, "%s id and name attribute value mismatch" }, /* ReportAttrError */
|
{ ID_NAME_MISMATCH, 0, "%s id and name attribute value mismatch" }, /* ReportAttrError */
|
||||||
|
{ ILLEGAL_URI_CODEPOINT, 0, "%s illegal characters found in URI" }, /* ReportAttrError */
|
||||||
{ ILLEGAL_URI_REFERENCE, 0, "%s improperly escaped URI reference" }, /* ReportAttrError */
|
{ ILLEGAL_URI_REFERENCE, 0, "%s improperly escaped URI reference" }, /* ReportAttrError */
|
||||||
{ INSERTING_AUTO_ATTRIBUTE, 0, "%s inserting \"%s\" attribute using value \"%s\"" }, /* ReportAttrError */
|
{ INSERTING_AUTO_ATTRIBUTE, 0, "%s inserting \"%s\" attribute using value \"%s\"" }, /* ReportAttrError */
|
||||||
{ INVALID_ATTRIBUTE, 0, "%s attribute name \"%s\" (value=\"%s\") is invalid" }, /* ReportAttrError */
|
{ INVALID_ATTRIBUTE, 0, "%s attribute name \"%s\" (value=\"%s\") is invalid" }, /* ReportAttrError */
|
||||||
|
|
|
@ -525,6 +525,7 @@ void TY_(ReportAttrError)(TidyDocImpl* doc, Node *node, AttVal *av, uint code)
|
||||||
case ID_NAME_MISMATCH:
|
case ID_NAME_MISMATCH:
|
||||||
case BACKSLASH_IN_URI:
|
case BACKSLASH_IN_URI:
|
||||||
case FIXED_BACKSLASH:
|
case FIXED_BACKSLASH:
|
||||||
|
case ILLEGAL_URI_CODEPOINT:
|
||||||
case ILLEGAL_URI_REFERENCE:
|
case ILLEGAL_URI_REFERENCE:
|
||||||
case ESCAPED_ILLEGAL_URI:
|
case ESCAPED_ILLEGAL_URI:
|
||||||
case NEWLINE_IN_URI:
|
case NEWLINE_IN_URI:
|
||||||
|
|
Loading…
Reference in a new issue