From 1be5ccbb63ee8e599d022da3a90b6d93fd31e40f Mon Sep 17 00:00:00 2001 From: Geoff McLane Date: Thu, 5 Feb 2015 12:21:08 +0100 Subject: [PATCH] Issue #130 - initial MathML support --- include/tidyenum.h | 1 + src/attrdict.c | 26 ++++++++++++++++++++++++++ src/attrdict.h | 1 + src/lexer.c | 9 ++++++--- src/parser.c | 10 ++++++++-- src/tags.c | 4 +++- 6 files changed, 45 insertions(+), 6 deletions(-) diff --git a/include/tidyenum.h b/include/tidyenum.h index f0696c5..3fd0d07 100644 --- a/include/tidyenum.h +++ b/include/tidyenum.h @@ -384,6 +384,7 @@ typedef enum TidyTag_LINK, /**< LINK */ TidyTag_LISTING, /**< LISTING */ TidyTag_MAP, /**< MAP */ + TidyTag_MATHML, /**< MATH (HTML5) [i_a]2 MathML embedded in [X]HTML */ TidyTag_MARQUEE, /**< MARQUEE */ TidyTag_MENU, /**< MENU */ TidyTag_META, /**< META */ diff --git a/src/attrdict.c b/src/attrdict.c index ddddaca..7019757 100644 --- a/src/attrdict.c +++ b/src/attrdict.c @@ -7748,6 +7748,32 @@ const AttrVersion TY_(W3CAttrsFor_MAP)[] = { TidyAttr_UNKNOWN, 0 }, }; +const AttrVersion TY_(W3CAttrsFor_MATHML)[] = /* [i_a]2 */ +{ + { TidyAttr_ALIGN, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx|HT50|XH50 }, + { TidyAttr_CLASS, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx|HT50|XH50 }, + { TidyAttr_DIR, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx|HT50|XH50 }, + { TidyAttr_ID, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx|HT50|XH50 }, + { TidyAttr_HEIGHT, xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10|HT50|XH50 }, + { TidyAttr_LANG, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|xxxx|xxxx|HT50|XH50 }, + { TidyAttr_OnCLICK, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx|HT50|XH50 }, + { TidyAttr_OnDBLCLICK, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx|HT50|XH50 }, + { TidyAttr_OnKEYDOWN, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx|HT50|XH50 }, + { TidyAttr_OnKEYPRESS, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx|HT50|XH50 }, + { TidyAttr_OnKEYUP, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx|HT50|XH50 }, + { TidyAttr_OnMOUSEDOWN, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx|HT50|XH50 }, + { TidyAttr_OnMOUSEMOVE, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx|HT50|XH50 }, + { TidyAttr_OnMOUSEOUT, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx|HT50|XH50 }, + { TidyAttr_OnMOUSEOVER, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx|HT50|XH50 }, + { TidyAttr_OnMOUSEUP, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx|HT50|XH50 }, + { TidyAttr_STYLE, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx|HT50|XH50 }, + { TidyAttr_TITLE, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx|HT50|XH50 }, + { TidyAttr_WIDTH, xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10|HT50|XH50 }, + { TidyAttr_XML_LANG, xxxx|xxxx|xxxx|xxxx|X10T|xxxx|xxxx|X10F|xxxx|xxxx|X10S|XH11|xxxx|HT50|XH50 }, + { TidyAttr_XMLNS, xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|XH11|xxxx|HT50|XH50 }, + { TidyAttr_UNKNOWN, 0 }, +}; + const AttrVersion TY_(W3CAttrsFor_MARK)[] = { { TidyAttr_ACCESSKEY, xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50 }, diff --git a/src/attrdict.h b/src/attrdict.h index 1855fa6..1827443 100644 --- a/src/attrdict.h +++ b/src/attrdict.h @@ -72,6 +72,7 @@ extern const AttrVersion TY_(W3CAttrsFor_LI)[]; extern const AttrVersion TY_(W3CAttrsFor_LINK)[]; extern const AttrVersion TY_(W3CAttrsFor_LISTING)[]; extern const AttrVersion TY_(W3CAttrsFor_MAP)[]; +extern const AttrVersion TY_(W3CAttrsFor_MATHML)[]; /* [i_a]2 */ extern const AttrVersion TY_(W3CAttrsFor_MENU)[]; extern const AttrVersion TY_(W3CAttrsFor_META)[]; extern const AttrVersion TY_(W3CAttrsFor_NEXTID)[]; diff --git a/src/lexer.c b/src/lexer.c index e0c326a..4fc1484 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -2525,7 +2525,7 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode ) c = ParseTagName( doc ); isempty = no; attributes = NULL; - lexer->token = TagToken( doc, (isempty ? StartEndTag : StartTag) ); + lexer->token = TagToken( doc, StartTag ); /* [i_a]2 'isempty' is always false, thanks to code 2 lines above */ /* parse attributes, consuming closing ">" */ if (c != '>') @@ -2561,8 +2561,11 @@ static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode ) lexer->waswhite = no; lexer->state = LEX_CONTENT; - if (lexer->token->tag == NULL) - TY_(ReportFatal)( doc, NULL, lexer->token, UNKNOWN_ELEMENT ); + if (lexer->token->tag == NULL) + { + if (mode != OtherNamespace) /* [i_a]2 only issue warning if NOT 'OtherNamespace', and tag null */ + TY_(ReportFatal)( doc, NULL, lexer->token, UNKNOWN_ELEMENT ); + } else if ( !cfgBool(doc, TidyXmlTags) ) { Node* curr = lexer->token; diff --git a/src/parser.c b/src/parser.c index 6011046..32dfaaa 100644 --- a/src/parser.c +++ b/src/parser.c @@ -734,6 +734,10 @@ static Bool InsertMisc(Node *element, Node *node) static void ParseTag( TidyDocImpl* doc, Node *node, GetTokenMode mode ) { Lexer* lexer = doc->lexer; + + if (node->tag == NULL) /* [i_a]2 prevent crash for active content (php, asp) docs */ + return; + /* Fix by GLP 2000-12-21. Need to reset insertspace if this is both a non-inline and empty tag (base, link, meta, isindex, hr, area). @@ -753,6 +757,8 @@ static void ParseTag( TidyDocImpl* doc, Node *node, GetTokenMode mode ) if (node->type == StartEndTag) return; + lexer->parent = node; /* [i_a]2 added this - not sure why - CHECKME: */ + (*node->tag->parser)( doc, node, mode ); } @@ -3908,7 +3914,7 @@ void TY_(ParseBody)(TidyDocImpl* doc, Node *body, GetTokenMode mode) if (TY_(nodeIsElement)(node)) { - if ( TY_(nodeHasCM)(node, CM_INLINE) ) + if ( TY_(nodeHasCM)(node, CM_INLINE) && !TY_(nodeHasCM)(node, CM_MIXED) ) /* [i_a]2 add CM_MIXED */ { /* HTML4 strict doesn't allow inline content here */ /* but HTML2 does allow img elements as children of body */ @@ -4483,7 +4489,7 @@ static void AttributeChecks(TidyDocImpl* doc, Node* node) if (TY_(nodeIsElement)(node)) { - if (node->tag->chkattrs) + if (node->tag && node->tag->chkattrs) /* [i_a]2 fix crash after adding SVG support with alt/unknown tag subtree insertion there */ node->tag->chkattrs(doc, node); else TY_(CheckAttributes)(doc, node); diff --git a/src/tags.c b/src/tags.c index e6b8e0d..0b5a669 100644 --- a/src/tags.c +++ b/src/tags.c @@ -77,6 +77,7 @@ static CheckAttribs CheckHTML; #define VERS_ELEM_LINK (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10|HT50|XH50) #define VERS_ELEM_LISTING (HT20|HT32|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx) #define VERS_ELEM_MAP (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx|HT50|XH50) +#define VERS_ELEM_MATHML (xxxx|xxxx|xxxx|H41T|X10T|xxxx|H41F|X10F|xxxx|H41S|X10S|XH11|xxxx|HT50|XH50) /* [i_a]2 */ #define VERS_ELEM_MENU (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50) #define VERS_ELEM_META (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10|HT50|XH50) #define VERS_ELEM_NEXTID (HT20|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx) @@ -218,7 +219,8 @@ static const Dict tag_defs[] = { TidyTag_LINK, "link", VERS_ELEM_LINK, &TY_(W3CAttrsFor_LINK)[0], (CM_HEAD|CM_BLOCK|CM_EMPTY), TY_(ParseEmpty), CheckLINK }, { TidyTag_LISTING, "listing", VERS_ELEM_LISTING, &TY_(W3CAttrsFor_LISTING)[0], (CM_BLOCK|CM_OBSOLETE), TY_(ParsePre), NULL }, { TidyTag_MAP, "map", VERS_ELEM_MAP, &TY_(W3CAttrsFor_MAP)[0], (CM_INLINE), TY_(ParseBlock), NULL }, - // { TidyTag_MENU, "menu", VERS_ELEM_MENU, &TY_(W3CAttrsFor_MENU)[0], (CM_BLOCK|CM_OBSOLETE), TY_(ParseList), NULL }, + { TidyTag_MATHML, "math", VERS_ELEM_MATHML, &TY_(W3CAttrsFor_MATHML)[0], (CM_INLINE|CM_BLOCK|CM_MIXED), TY_(ParseNamespace),NULL }, /* [i_a]2 */ + /* { TidyTag_MENU, "menu", VERS_ELEM_MENU, &TY_(W3CAttrsFor_MENU)[0], (CM_BLOCK|CM_OBSOLETE), TY_(ParseList), NULL }, */ { TidyTag_META, "meta", VERS_ELEM_META, &TY_(W3CAttrsFor_META)[0], (CM_HEAD|CM_BLOCK|CM_EMPTY), TY_(ParseEmpty), NULL }, { TidyTag_NOFRAMES, "noframes", VERS_ELEM_NOFRAMES, &TY_(W3CAttrsFor_NOFRAMES)[0], (CM_BLOCK|CM_FRAMES), TY_(ParseNoFrames), NULL }, { TidyTag_NOSCRIPT, "noscript", VERS_ELEM_NOSCRIPT, &TY_(W3CAttrsFor_NOSCRIPT)[0], (CM_HEAD|CM_BLOCK|CM_INLINE|CM_MIXED), TY_(ParseBlock), NULL },