diff --git a/src/lexer.c b/src/lexer.c index ef09bd0..6559814 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -1519,15 +1519,27 @@ Bool TY_(AddGenerator)( TidyDocImpl* doc ) return no; } -/* examine to identify version */ +/*\ examine to identify version + * Issue #167 and #169 + * If HTML5 + * + * + * else others +\*/ static uint FindGivenVersion( TidyDocImpl* doc, Node* doctype ) { AttVal * fpi = TY_(GetAttrByName)(doctype, "PUBLIC"); uint vers; - if (!fpi || !fpi->value) + if (!fpi || !fpi->value) + { + if (doctype->element && (TY_(tmbstrcmp)(doctype->element,"html") == 0)) + { + return VERS_HTML5; /* TODO: do we need to check MORE? */ + } + /* TODO: Consider warning, error message */ return VERS_UNKNOWN; - + } vers = GetVersFromFPI(fpi->value); if (VERS_XHTML & vers) @@ -2693,7 +2705,13 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode ) /* make a note of the version named by the 1st doctype */ if (lexer->doctype == VERS_UNKNOWN && lexer->token && !cfgBool(doc, TidyXmlTags)) + { lexer->doctype = FindGivenVersion(doc, lexer->token); + if (lexer->doctype != VERS_HTML5) + { + TY_(AdjustTags)(doc); /* Issue #167 & #169 - Adjust TidyTag_A back to legacy mode */ + } + } node = lexer->token; GTDBG(doc,"doctype", node); return node; diff --git a/src/tags.c b/src/tags.c index 8e4e98a..2a98fea 100644 --- a/src/tags.c +++ b/src/tags.c @@ -158,12 +158,18 @@ static CheckAttribs CheckHTML; #define VERS_ELEM_VIDEO (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50) #define VERS_ELEM_WBR (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50) -static const Dict tag_defs[] = +/*\ + * Issue #167 & #169 + * Tody defaults to HTML5 mode + * but allow this table to be ADJUSTED if NOT HTML5 + * was static const Dict tag_defs[] = +\*/ +static Dict tag_defs[] = { { TidyTag_UNKNOWN, "unknown!", VERS_UNKNOWN, NULL, (0), NULL, NULL }, /* W3C defined elements */ - { TidyTag_A, "a", VERS_ELEM_A, &TY_(W3CAttrsFor_A)[0], (CM_INLINE), TY_(ParseInline), NULL }, + { TidyTag_A, "a", VERS_ELEM_A, &TY_(W3CAttrsFor_A)[0], (CM_INLINE|CM_BLOCK|CM_MIXED), TY_(ParseBlock), NULL }, /* Issue #167 & #169 - default HTML5 */ { TidyTag_ABBR, "abbr", VERS_ELEM_ABBR, &TY_(W3CAttrsFor_ABBR)[0], (CM_INLINE), TY_(ParseInline), NULL }, { TidyTag_ACRONYM, "acronym", VERS_ELEM_ACRONYM, &TY_(W3CAttrsFor_ACRONYM)[0], (CM_INLINE), TY_(ParseInline), NULL }, { TidyTag_ADDRESS, "address", VERS_ELEM_ADDRESS, &TY_(W3CAttrsFor_ADDRESS)[0], (CM_BLOCK), TY_(ParseBlock), NULL }, @@ -721,6 +727,27 @@ void TY_(FreeDeclaredTags)( TidyDocImpl* doc, UserTagType tagType ) } } +/*\ + * Issue #167 & #169 + * Tidy defaults to HTML5 mode + * If the is found to NOT be HTML5, + * then adjust tags to HTML4 mode + * At present only TidyTag_A, but could apply to others +\*/ +void TY_(AdjustTags)( TidyDocImpl *doc ) +{ + Dict *np = (Dict *)TY_(LookupTagDef)( TidyTag_A ); + TidyTagImpl* tags = &doc->tags; + if (np) + { + np->parser = TY_(ParseInline); + np->model = CM_INLINE; +#if ELEMENT_HASH_LOOKUP + tagsEmptyHash( doc, tags ); +#endif + } +} + void TY_(FreeTags)( TidyDocImpl* doc ) { TidyTagImpl* tags = &doc->tags; diff --git a/src/tags.h b/src/tags.h index ddbca10..dde330e 100644 --- a/src/tags.h +++ b/src/tags.h @@ -86,6 +86,7 @@ ctmbstr TY_(GetNextDeclaredTag)( TidyDocImpl* doc, UserTagType tagType, void TY_(InitTags)( TidyDocImpl* doc ); void TY_(FreeTags)( TidyDocImpl* doc ); +void TY_(AdjustTags)( TidyDocImpl *doc ); /* if NOT HTML5 DOCTYPE, fall back to HTML4 legacy mode */ /* Parser methods for tags */