HTML IDs can contain anything except whitespace.

Introduced TY_(IsHTMLSpace)(uint c), which checks to see if c is one of the
chars that the HTML spec (and browsers) treat as a space in attribute
values: 0x020 (space), 0x009 (tab), 0x00a (LF), 0x00c (FF), or 0x00d (CF).
Can't use ANSI C isspace(int c) here because like standard functions for
many other langs, it also treats 0x00b as a space.
This commit is contained in:
Michael[tm] Smith 2012-01-02 16:12:51 +09:00
parent e84a6d272e
commit 264c9bc043
3 changed files with 7 additions and 4 deletions

View file

@ -1367,11 +1367,8 @@ Bool TY_(IsValidHTMLID)(ctmbstr id)
if (!s)
return no;
if (!TY_(IsLetter)(*s++))
return no;
while (*s)
if (!TY_(IsNamechar)(*s++))
if (TY_(IsHTMLSpace)(*s++))
return no;
return yes;

View file

@ -230,6 +230,11 @@ Bool TY_(IsLetter)(uint c)
return (map & letter)!=0;
}
Bool TY_(IsHTMLSpace)(uint c)
{
return c == 0x020 || c == 0x009 || c == 0x00a || c == 0x00c || c == 0x00d;
}
Bool TY_(IsNamechar)(uint c)
{
uint map = MAP(c);

View file

@ -416,6 +416,7 @@ void TY_(ConstrainVersion)( TidyDocImpl* doc, uint vers );
Bool TY_(IsWhite)(uint c);
Bool TY_(IsDigit)(uint c);
Bool TY_(IsLetter)(uint c);
Bool TY_(IsHTMLSpace)(uint c);
Bool TY_(IsNewline)(uint c);
Bool TY_(IsNamechar)(uint c);
Bool TY_(IsXMLLetter)(uint c);