From 0dc68d6cb1afbd68c27102484eaf7d40e2d581b2 Mon Sep 17 00:00:00 2001 From: Geoff McLane Date: Fri, 6 Mar 2015 12:49:30 +0100 Subject: [PATCH] Issue #167 & #169 - default to HTML5 mode. Revert TidyTag_A to HTML5 mode, but allow the table to be modified if the DOCTYPE given is found to NOT be HTML5, through a service TY_(AdjustTags). Care is taken to clear any previous hash cached tags. At present this only effects the anchor tag, but could be applied to others that need to change their parsing due to an identified DOCTYPE. --- src/lexer.c | 24 +++++++++++++++++++++--- src/tags.c | 31 +++++++++++++++++++++++++++++-- src/tags.h | 1 + 3 files changed, 51 insertions(+), 5 deletions(-) diff --git a/src/lexer.c b/src/lexer.c index ef09bd0..6559814 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -1519,15 +1519,27 @@ Bool TY_(AddGenerator)( TidyDocImpl* doc ) return no; } -/* examine to identify version */ +/*\ examine to identify version + * Issue #167 and #169 + * If HTML5 + * + * + * else others +\*/ static uint FindGivenVersion( TidyDocImpl* doc, Node* doctype ) { AttVal * fpi = TY_(GetAttrByName)(doctype, "PUBLIC"); uint vers; - if (!fpi || !fpi->value) + if (!fpi || !fpi->value) + { + if (doctype->element && (TY_(tmbstrcmp)(doctype->element,"html") == 0)) + { + return VERS_HTML5; /* TODO: do we need to check MORE? */ + } + /* TODO: Consider warning, error message */ return VERS_UNKNOWN; - + } vers = GetVersFromFPI(fpi->value); if (VERS_XHTML & vers) @@ -2693,7 +2705,13 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode ) /* make a note of the version named by the 1st doctype */ if (lexer->doctype == VERS_UNKNOWN && lexer->token && !cfgBool(doc, TidyXmlTags)) + { lexer->doctype = FindGivenVersion(doc, lexer->token); + if (lexer->doctype != VERS_HTML5) + { + TY_(AdjustTags)(doc); /* Issue #167 & #169 - Adjust TidyTag_A back to legacy mode */ + } + } node = lexer->token; GTDBG(doc,"doctype", node); return node; diff --git a/src/tags.c b/src/tags.c index 8e4e98a..2a98fea 100644 --- a/src/tags.c +++ b/src/tags.c @@ -158,12 +158,18 @@ static CheckAttribs CheckHTML; #define VERS_ELEM_VIDEO (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50) #define VERS_ELEM_WBR (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50) -static const Dict tag_defs[] = +/*\ + * Issue #167 & #169 + * Tody defaults to HTML5 mode + * but allow this table to be ADJUSTED if NOT HTML5 + * was static const Dict tag_defs[] = +\*/ +static Dict tag_defs[] = { { TidyTag_UNKNOWN, "unknown!", VERS_UNKNOWN, NULL, (0), NULL, NULL }, /* W3C defined elements */ - { TidyTag_A, "a", VERS_ELEM_A, &TY_(W3CAttrsFor_A)[0], (CM_INLINE), TY_(ParseInline), NULL }, + { TidyTag_A, "a", VERS_ELEM_A, &TY_(W3CAttrsFor_A)[0], (CM_INLINE|CM_BLOCK|CM_MIXED), TY_(ParseBlock), NULL }, /* Issue #167 & #169 - default HTML5 */ { TidyTag_ABBR, "abbr", VERS_ELEM_ABBR, &TY_(W3CAttrsFor_ABBR)[0], (CM_INLINE), TY_(ParseInline), NULL }, { TidyTag_ACRONYM, "acronym", VERS_ELEM_ACRONYM, &TY_(W3CAttrsFor_ACRONYM)[0], (CM_INLINE), TY_(ParseInline), NULL }, { TidyTag_ADDRESS, "address", VERS_ELEM_ADDRESS, &TY_(W3CAttrsFor_ADDRESS)[0], (CM_BLOCK), TY_(ParseBlock), NULL }, @@ -721,6 +727,27 @@ void TY_(FreeDeclaredTags)( TidyDocImpl* doc, UserTagType tagType ) } } +/*\ + * Issue #167 & #169 + * Tidy defaults to HTML5 mode + * If the is found to NOT be HTML5, + * then adjust tags to HTML4 mode + * At present only TidyTag_A, but could apply to others +\*/ +void TY_(AdjustTags)( TidyDocImpl *doc ) +{ + Dict *np = (Dict *)TY_(LookupTagDef)( TidyTag_A ); + TidyTagImpl* tags = &doc->tags; + if (np) + { + np->parser = TY_(ParseInline); + np->model = CM_INLINE; +#if ELEMENT_HASH_LOOKUP + tagsEmptyHash( doc, tags ); +#endif + } +} + void TY_(FreeTags)( TidyDocImpl* doc ) { TidyTagImpl* tags = &doc->tags; diff --git a/src/tags.h b/src/tags.h index ddbca10..dde330e 100644 --- a/src/tags.h +++ b/src/tags.h @@ -86,6 +86,7 @@ ctmbstr TY_(GetNextDeclaredTag)( TidyDocImpl* doc, UserTagType tagType, void TY_(InitTags)( TidyDocImpl* doc ); void TY_(FreeTags)( TidyDocImpl* doc ); +void TY_(AdjustTags)( TidyDocImpl *doc ); /* if NOT HTML5 DOCTYPE, fall back to HTML4 legacy mode */ /* Parser methods for tags */