Issue #167 & #169 - default to HTML5 mode.

Revert TidyTag_A to HTML5 mode, but allow the table to be modified if the
DOCTYPE given is found to NOT be HTML5, through a service TY_(AdjustTags).
Care is taken to clear any previous hash cached tags.

At present this only effects the anchor tag, but could be applied to
others that need to change their parsing due to an identified DOCTYPE.
This commit is contained in:
Geoff McLane 2015-03-06 12:49:30 +01:00
parent 50e8d7fe69
commit 0dc68d6cb1
3 changed files with 51 additions and 5 deletions

View file

@ -1519,15 +1519,27 @@ Bool TY_(AddGenerator)( TidyDocImpl* doc )
return no;
}
/* examine <!DOCTYPE> to identify version */
/*\ examine <!DOCTYPE ...> to identify version
* Issue #167 and #169
* If HTML5
* <!DOCTYPE html>
* <!DOCTYPE html SYSTEM "about:legacy-compat">
* else others
\*/
static uint FindGivenVersion( TidyDocImpl* doc, Node* doctype )
{
AttVal * fpi = TY_(GetAttrByName)(doctype, "PUBLIC");
uint vers;
if (!fpi || !fpi->value)
if (!fpi || !fpi->value)
{
if (doctype->element && (TY_(tmbstrcmp)(doctype->element,"html") == 0))
{
return VERS_HTML5; /* TODO: do we need to check MORE? */
}
/* TODO: Consider warning, error message */
return VERS_UNKNOWN;
}
vers = GetVersFromFPI(fpi->value);
if (VERS_XHTML & vers)
@ -2693,7 +2705,13 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
/* make a note of the version named by the 1st doctype */
if (lexer->doctype == VERS_UNKNOWN && lexer->token && !cfgBool(doc, TidyXmlTags))
{
lexer->doctype = FindGivenVersion(doc, lexer->token);
if (lexer->doctype != VERS_HTML5)
{
TY_(AdjustTags)(doc); /* Issue #167 & #169 - Adjust TidyTag_A back to legacy mode */
}
}
node = lexer->token;
GTDBG(doc,"doctype", node);
return node;

View file

@ -158,12 +158,18 @@ static CheckAttribs CheckHTML;
#define VERS_ELEM_VIDEO (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
#define VERS_ELEM_WBR (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50)
static const Dict tag_defs[] =
/*\
* Issue #167 & #169
* Tody defaults to HTML5 mode
* but allow this table to be ADJUSTED if NOT HTML5
* was static const Dict tag_defs[] =
\*/
static Dict tag_defs[] =
{
{ TidyTag_UNKNOWN, "unknown!", VERS_UNKNOWN, NULL, (0), NULL, NULL },
/* W3C defined elements */
{ TidyTag_A, "a", VERS_ELEM_A, &TY_(W3CAttrsFor_A)[0], (CM_INLINE), TY_(ParseInline), NULL },
{ TidyTag_A, "a", VERS_ELEM_A, &TY_(W3CAttrsFor_A)[0], (CM_INLINE|CM_BLOCK|CM_MIXED), TY_(ParseBlock), NULL }, /* Issue #167 & #169 - default HTML5 */
{ TidyTag_ABBR, "abbr", VERS_ELEM_ABBR, &TY_(W3CAttrsFor_ABBR)[0], (CM_INLINE), TY_(ParseInline), NULL },
{ TidyTag_ACRONYM, "acronym", VERS_ELEM_ACRONYM, &TY_(W3CAttrsFor_ACRONYM)[0], (CM_INLINE), TY_(ParseInline), NULL },
{ TidyTag_ADDRESS, "address", VERS_ELEM_ADDRESS, &TY_(W3CAttrsFor_ADDRESS)[0], (CM_BLOCK), TY_(ParseBlock), NULL },
@ -721,6 +727,27 @@ void TY_(FreeDeclaredTags)( TidyDocImpl* doc, UserTagType tagType )
}
}
/*\
* Issue #167 & #169
* Tidy defaults to HTML5 mode
* If the <!DOCTYPE ...> is found to NOT be HTML5,
* then adjust tags to HTML4 mode
* At present only TidyTag_A, but could apply to others
\*/
void TY_(AdjustTags)( TidyDocImpl *doc )
{
Dict *np = (Dict *)TY_(LookupTagDef)( TidyTag_A );
TidyTagImpl* tags = &doc->tags;
if (np)
{
np->parser = TY_(ParseInline);
np->model = CM_INLINE;
#if ELEMENT_HASH_LOOKUP
tagsEmptyHash( doc, tags );
#endif
}
}
void TY_(FreeTags)( TidyDocImpl* doc )
{
TidyTagImpl* tags = &doc->tags;

View file

@ -86,6 +86,7 @@ ctmbstr TY_(GetNextDeclaredTag)( TidyDocImpl* doc, UserTagType tagType,
void TY_(InitTags)( TidyDocImpl* doc );
void TY_(FreeTags)( TidyDocImpl* doc );
void TY_(AdjustTags)( TidyDocImpl *doc ); /* if NOT HTML5 DOCTYPE, fall back to HTML4 legacy mode */
/* Parser methods for tags */