Issue #130 - MathML attr and entity fix!

This is a set of kludgy fixes for MathML attribute and entities support.

It is intended that a full HTML5 entity table be added at some time, but
at present ALL entities are accepted as written when within the math
element.

Likewise all attributes are accepted on MathML elements without any check
of their name or value, even if they match attributes outside MathML.

And in the pprinter such entities are written as is from the lexer, using
a new PPrintMathML service added, using the new mode OtherNameSpace.

It is hoped all these fixes will NOT effect tidy outside the math element.

ALL fixes in the set a clearly marked '#130 - MathML attr and entity fix!'
for easy searching, and improving if possible.
This commit is contained in:
Geoff McLane 2015-02-22 18:44:14 +01:00
parent 84f1f4351b
commit 0aa81eb256
4 changed files with 48 additions and 4 deletions

View file

@ -949,9 +949,20 @@ static void ParseEntity( TidyDocImpl* doc, GetTokenMode mode )
&& !cfgBool(doc, TidyXhtmlOut) )
TY_(ReportEntityError)( doc, APOS_UNDEFINED, lexer->lexbuf+start, 39 );
/* Lookup entity code and version
*/
found = TY_(EntityInfo)( lexer->lexbuf+start, isXml, &ch, &entver );
if (( mode == OtherNamespace ) && ( c == ';' ))
{
/* #130 MathML attr and entity fix! */
found = yes;
ch = 255;
entver = XH50|HT50;
preserveEntities = yes;
}
else
{
/* Lookup entity code and version
*/
found = TY_(EntityInfo)( lexer->lexbuf+start, isXml, &ch, &entver );
}
/* deal with unrecognized or invalid entities */
/* #433012 - fix by Randy Waki 17 Feb 01 */

View file

@ -1445,6 +1445,7 @@ void TY_(ParseNamespace)(TidyDocImpl* doc, Node *basenode, GetTokenMode mode)
Node *node;
Node *parent = basenode;
uint istackbase;
AttVal* av; /* #130 MathML attr and entity fix! */
/* a la <table>: defer popping elements off the inline stack */
TY_(DeferDup)( doc );
@ -1531,12 +1532,24 @@ void TY_(ParseNamespace)(TidyDocImpl* doc, Node *basenode, GetTokenMode mode)
}
else if (node->type == StartTag)
{
/* #130 MathML attr and entity fix!
care if it has attributes, and 'accidently' any of those attributes match known */
for ( av = node->attributes; av; av = av->next )
{
av->dict = 0; /* does something need to be freed? */
}
/* add another child to the current parent */
TY_(InsertNodeAtEnd)(parent, node);
parent = node;
}
else
{
/* #130 MathML attr and entity fix!
care if it has attributes, and 'accidently' any of those attributes match known */
for ( av = node->attributes; av; av = av->next )
{
av->dict = 0; /* does something need to be freed? */
}
TY_(InsertNodeAtEnd)(parent, node);
}
}

View file

@ -748,7 +748,8 @@ static void PPrintChar( TidyDocImpl* doc, uint c, uint mode )
for XML where naked '&' are illegal.
*/
if ( c == '&' && cfgBool(doc, TidyQuoteAmpersand)
&& !cfgBool(doc, TidyPreserveEntities) )
&& !cfgBool(doc, TidyPreserveEntities)
&& ( mode != OtherNamespace) ) /* #130 MathML attr and entity fix! */
{
AddString( pprint, "&amp;" );
return;
@ -1955,6 +1956,21 @@ void TY_(PrintBody)( TidyDocImpl* doc )
}
}
/* #130 MathML attr and entity fix!
Support MathML namepsace */
static void PPrintMathML( TidyDocImpl* doc, uint indent, Node *node )
{
Node *content;
uint mode = OtherNamespace;
PPrintTag( doc, mode, indent, node );
for ( content = node->content; content; content = content->next )
TY_(PPrintTree)( doc, mode, indent, content );
PPrintEndTag( doc, mode, indent, node );
}
void TY_(PPrintTree)( TidyDocImpl* doc, uint mode, uint indent, Node *node )
{
Node *content, *last;
@ -1993,6 +2009,8 @@ void TY_(PPrintTree)( TidyDocImpl* doc, uint mode, uint indent, Node *node )
PPrintJste( doc, indent, node );
else if ( node->type == PhpTag)
PPrintPhp( doc, indent, node );
else if ( nodeIsMATHML(node) )
PPrintMathML( doc, indent, node ); /* #130 MathML attr and entity fix! */
else if ( TY_(nodeCMIsEmpty)(node) ||
(node->type == StartEndTag && !xhtml) )
{

View file

@ -234,6 +234,8 @@ uint TY_(nodeHeaderLevel)( Node* node ); /* 1, 2, ..., 6 */
/* HTML5 */
#define nodeIsDATALIST( node ) TagIsId( node, TidyTag_DATALIST )
#define nodeIsMATHML( node ) TagIsId( node, TidyTag_MATHML ) /* #130 MathML attr and entity fix! */
/* NOT in HTML 5 */
#define nodeIsACRONYM( node ) TagIsId( node, TidyTag_ACRONYM )
#define nodesIsFRAME( node ) TagIsId( node, TidyTag_FRAME )