From 0aa81eb256dae0777df7055ba07eb6a0f14687ae Mon Sep 17 00:00:00 2001 From: Geoff McLane Date: Sun, 22 Feb 2015 18:44:14 +0100 Subject: [PATCH] Issue #130 - MathML attr and entity fix! This is a set of kludgy fixes for MathML attribute and entities support. It is intended that a full HTML5 entity table be added at some time, but at present ALL entities are accepted as written when within the math element. Likewise all attributes are accepted on MathML elements without any check of their name or value, even if they match attributes outside MathML. And in the pprinter such entities are written as is from the lexer, using a new PPrintMathML service added, using the new mode OtherNameSpace. It is hoped all these fixes will NOT effect tidy outside the math element. ALL fixes in the set a clearly marked '#130 - MathML attr and entity fix!' for easy searching, and improving if possible. --- src/lexer.c | 17 ++++++++++++++--- src/parser.c | 13 +++++++++++++ src/pprint.c | 20 +++++++++++++++++++- src/tags.h | 2 ++ 4 files changed, 48 insertions(+), 4 deletions(-) diff --git a/src/lexer.c b/src/lexer.c index 357c958..ef09bd0 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -949,9 +949,20 @@ static void ParseEntity( TidyDocImpl* doc, GetTokenMode mode ) && !cfgBool(doc, TidyXhtmlOut) ) TY_(ReportEntityError)( doc, APOS_UNDEFINED, lexer->lexbuf+start, 39 ); - /* Lookup entity code and version - */ - found = TY_(EntityInfo)( lexer->lexbuf+start, isXml, &ch, &entver ); + if (( mode == OtherNamespace ) && ( c == ';' )) + { + /* #130 MathML attr and entity fix! */ + found = yes; + ch = 255; + entver = XH50|HT50; + preserveEntities = yes; + } + else + { + /* Lookup entity code and version + */ + found = TY_(EntityInfo)( lexer->lexbuf+start, isXml, &ch, &entver ); + } /* deal with unrecognized or invalid entities */ /* #433012 - fix by Randy Waki 17 Feb 01 */ diff --git a/src/parser.c b/src/parser.c index fa4c511..d00532f 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1445,6 +1445,7 @@ void TY_(ParseNamespace)(TidyDocImpl* doc, Node *basenode, GetTokenMode mode) Node *node; Node *parent = basenode; uint istackbase; + AttVal* av; /* #130 MathML attr and entity fix! */ /* a la : defer popping elements off the inline stack */ TY_(DeferDup)( doc ); @@ -1531,12 +1532,24 @@ void TY_(ParseNamespace)(TidyDocImpl* doc, Node *basenode, GetTokenMode mode) } else if (node->type == StartTag) { + /* #130 MathML attr and entity fix! + care if it has attributes, and 'accidently' any of those attributes match known */ + for ( av = node->attributes; av; av = av->next ) + { + av->dict = 0; /* does something need to be freed? */ + } /* add another child to the current parent */ TY_(InsertNodeAtEnd)(parent, node); parent = node; } else { + /* #130 MathML attr and entity fix! + care if it has attributes, and 'accidently' any of those attributes match known */ + for ( av = node->attributes; av; av = av->next ) + { + av->dict = 0; /* does something need to be freed? */ + } TY_(InsertNodeAtEnd)(parent, node); } } diff --git a/src/pprint.c b/src/pprint.c index 4f43db4..d4459e9 100644 --- a/src/pprint.c +++ b/src/pprint.c @@ -748,7 +748,8 @@ static void PPrintChar( TidyDocImpl* doc, uint c, uint mode ) for XML where naked '&' are illegal. */ if ( c == '&' && cfgBool(doc, TidyQuoteAmpersand) - && !cfgBool(doc, TidyPreserveEntities) ) + && !cfgBool(doc, TidyPreserveEntities) + && ( mode != OtherNamespace) ) /* #130 MathML attr and entity fix! */ { AddString( pprint, "&" ); return; @@ -1955,6 +1956,21 @@ void TY_(PrintBody)( TidyDocImpl* doc ) } } +/* #130 MathML attr and entity fix! + Support MathML namepsace */ +static void PPrintMathML( TidyDocImpl* doc, uint indent, Node *node ) +{ + Node *content; + uint mode = OtherNamespace; + + PPrintTag( doc, mode, indent, node ); + + for ( content = node->content; content; content = content->next ) + TY_(PPrintTree)( doc, mode, indent, content ); + + PPrintEndTag( doc, mode, indent, node ); +} + void TY_(PPrintTree)( TidyDocImpl* doc, uint mode, uint indent, Node *node ) { Node *content, *last; @@ -1993,6 +2009,8 @@ void TY_(PPrintTree)( TidyDocImpl* doc, uint mode, uint indent, Node *node ) PPrintJste( doc, indent, node ); else if ( node->type == PhpTag) PPrintPhp( doc, indent, node ); + else if ( nodeIsMATHML(node) ) + PPrintMathML( doc, indent, node ); /* #130 MathML attr and entity fix! */ else if ( TY_(nodeCMIsEmpty)(node) || (node->type == StartEndTag && !xhtml) ) { diff --git a/src/tags.h b/src/tags.h index 8dc19a0..8f0ca57 100644 --- a/src/tags.h +++ b/src/tags.h @@ -234,6 +234,8 @@ uint TY_(nodeHeaderLevel)( Node* node ); /* 1, 2, ..., 6 */ /* HTML5 */ #define nodeIsDATALIST( node ) TagIsId( node, TidyTag_DATALIST ) +#define nodeIsMATHML( node ) TagIsId( node, TidyTag_MATHML ) /* #130 MathML attr and entity fix! */ + /* NOT in HTML 5 */ #define nodeIsACRONYM( node ) TagIsId( node, TidyTag_ACRONYM ) #define nodesIsFRAME( node ) TagIsId( node, TidyTag_FRAME )