Issue #70 - Initial implmentation of SVG support.

An immense thanks to Ger Hobbelt who had already done this
in his github.com/GerHobbelt/htmltidy fork.

The two sources have diverges so was not a simple cut
an paste. But again thanks Ger for this.
This commit is contained in:
Geoff McLane 2015-02-02 17:25:49 +01:00
parent 201f3cb49e
commit 885c7caab7
9 changed files with 298 additions and 2 deletions

View file

@ -423,6 +423,7 @@ typedef enum
TidyTag_STYLE, /**< STYLE */
TidyTag_SUB, /**< SUB */
TidyTag_SUP, /**< SUP */
TidyTag_SVG, /**< SVG (HTML5) */
TidyTag_TABLE, /**< TABLE */
TidyTag_TBODY, /**< TBODY */
TidyTag_TD, /**< TD */
@ -783,8 +784,15 @@ typedef enum
TidyAttr_ARIA_VALUENOW,
TidyAttr_ARIA_VALUETEXT,
/* SVG attributes (SVG 1.1) */
TidyAttr_X, /**< X= */
TidyAttr_Y, /**< Y= */
TidyAttr_VIEWBOX, /**< VIEWBOX= */
TidyAttr_PRESERVEASPECTRATIO, /**< PRESERVEASPECTRATIO= */
TidyAttr_ZOOMANDPAN, /**< ZOOMANDPAN= */
TidyAttr_BASEPROFILE, /**< BASEPROFILE= */
TidyAttr_CONTENTSCRIPTTYPE, /**< CONTENTSCRIPTTYPE= */
TidyAttr_CONTENTSTYLETYPE, /**< CONTENTSTYLETYPE= */
N_TIDY_ATTRIBS /**< Must be last */
} TidyAttrId;

View file

@ -11693,6 +11693,41 @@ const AttrVersion TY_(W3CAttrsFor_SUP)[] =
{ TidyAttr_UNKNOWN, 0 },
};
const AttrVersion TY_(W3CAttrsFor_SVG)[] =
{
{ TidyAttr_ALIGN, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx|HT50|XH50 },
{ TidyAttr_CLASS, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx|HT50|XH50 },
{ TidyAttr_DIR, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx|HT50|XH50 },
{ TidyAttr_ID, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx|HT50|XH50 },
{ TidyAttr_HEIGHT, xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10|HT50|XH50 },
{ TidyAttr_LANG, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|xxxx|xxxx|HT50|XH50 },
{ TidyAttr_OnCLICK, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx|HT50|XH50 },
{ TidyAttr_OnDBLCLICK, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx|HT50|XH50 },
{ TidyAttr_OnKEYDOWN, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx|HT50|XH50 },
{ TidyAttr_OnKEYPRESS, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx|HT50|XH50 },
{ TidyAttr_OnKEYUP, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx|HT50|XH50 },
{ TidyAttr_OnMOUSEDOWN, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx|HT50|XH50 },
{ TidyAttr_OnMOUSEMOVE, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx|HT50|XH50 },
{ TidyAttr_OnMOUSEOUT, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx|HT50|XH50 },
{ TidyAttr_OnMOUSEOVER, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx|HT50|XH50 },
{ TidyAttr_OnMOUSEUP, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx|HT50|XH50 },
{ TidyAttr_STYLE, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx|HT50|XH50 },
{ TidyAttr_TITLE, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx|HT50|XH50 },
{ TidyAttr_WIDTH, xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10|HT50|XH50 },
{ TidyAttr_XML_LANG, xxxx|xxxx|xxxx|xxxx|X10T|xxxx|xxxx|X10F|xxxx|xxxx|X10S|XH11|xxxx|HT50|XH50 },
{ TidyAttr_XMLNS, xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|XH11|xxxx|HT50|XH50 },
{ TidyAttr_X, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|H41S|X10S|XH11|xxxx|HT50|XH50 },
{ TidyAttr_Y, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|H41S|X10S|XH11|xxxx|HT50|XH50 },
{ TidyAttr_VIEWBOX, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|H41S|X10S|XH11|xxxx|HT50|XH50 },
{ TidyAttr_PRESERVEASPECTRATIO, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|H41S|X10S|XH11|xxxx|HT50|XH50 },
{ TidyAttr_ZOOMANDPAN, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|H41S|X10S|XH11|xxxx|HT50|XH50 },
{ TidyAttr_VERSION, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|H41S|X10S|XH11|xxxx|HT50|XH50 },
{ TidyAttr_BASEPROFILE, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|H41S|X10S|XH11|xxxx|HT50|XH50 },
{ TidyAttr_CONTENTSCRIPTTYPE, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|H41S|X10S|XH11|xxxx|HT50|XH50 },
{ TidyAttr_CONTENTSTYLETYPE, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|H41S|X10S|XH11|xxxx|HT50|XH50 },
{ TidyAttr_UNKNOWN, 0 },
};
const AttrVersion TY_(W3CAttrsFor_TABLE)[] =
{
{ TidyAttr_ACCESSKEY, xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50|XH50 },

View file

@ -103,6 +103,7 @@ extern const AttrVersion TY_(W3CAttrsFor_STRONG)[];
extern const AttrVersion TY_(W3CAttrsFor_STYLE)[];
extern const AttrVersion TY_(W3CAttrsFor_SUB)[];
extern const AttrVersion TY_(W3CAttrsFor_SUP)[];
extern const AttrVersion TY_(W3CAttrsFor_SVG)[];
extern const AttrVersion TY_(W3CAttrsFor_TABLE)[];
extern const AttrVersion TY_(W3CAttrsFor_TBODY)[];
extern const AttrVersion TY_(W3CAttrsFor_TD)[];

View file

@ -397,6 +397,24 @@ static const Attribute attribute_defs [] =
{ TidyAttr_ARIA_VALUENOW, "aria-valuenow", CH_PCDATA },
{ TidyAttr_ARIA_VALUETEXT, "aria-valuetext", CH_PCDATA },
{ TidyAttr_X, "x", CH_PCDATA }, /* for <svg> */
{ TidyAttr_Y, "y", CH_PCDATA }, /* for <svg> */
#if 0 /* with uppercase chars taken directly from W3C; are these case-insensitive everywhere? */
{ TidyAttr_VIEWBOX, "viewBox", VERS_INLINE_SVG, CH_PCDATA }, /* for <svg> */
{ TidyAttr_PRESERVEASPECTRATIO, "preserveAspectRatio", VERS_INLINE_SVG, CH_PCDATA }, /* for <svg> */
{ TidyAttr_ZOOMANDPAN, "zoomAndPan", VERS_INLINE_SVG, CH_PCDATA }, /* for <svg> */
{ TidyAttr_BASEPROFILE, "baseProfile", VERS_INLINE_SVG, CH_PCDATA }, /* for <svg> */
{ TidyAttr_CONTENTSCRIPTTYPE, "contentScriptType", VERS_INLINE_SVG, CH_PCDATA }, /* for <svg> */
{ TidyAttr_CONTENTSTYLETYPE, "contentStyleType", VERS_INLINE_SVG, CH_PCDATA }, /* for <svg> */
#else
{ TidyAttr_VIEWBOX, "viewbox", CH_PCDATA }, /* for <svg> */
{ TidyAttr_PRESERVEASPECTRATIO, "preserveaspectratio", CH_PCDATA }, /* for <svg> */
{ TidyAttr_ZOOMANDPAN, "zoomandpan", CH_PCDATA }, /* for <svg> */
{ TidyAttr_BASEPROFILE, "baseprofile", CH_PCDATA }, /* for <svg> */
{ TidyAttr_CONTENTSCRIPTTYPE, "contentscripttype", CH_PCDATA }, /* for <svg> */
{ TidyAttr_CONTENTSTYLETYPE, "contentstyletype", CH_PCDATA }, /* for <svg> */
#endif
/* this must be the final entry */
{ N_TIDY_ATTRIBS, NULL, NULL }
};

View file

@ -525,6 +525,7 @@ typedef enum
MixedContent,
Preformatted,
IgnoreMarkup,
OtherNamespace,
CdataContent
} GetTokenMode;

View file

@ -1359,6 +1359,179 @@ void TY_(ParseBlock)( TidyDocImpl* doc, Node *element, GetTokenMode mode)
#endif
}
/* [i_a] svg / math */
struct MatchingDescendantData
{
Node *found_node;
Bool *passed_marker_node;
/* input: */
TidyTagId matching_tagId;
Node *node_to_find;
Node *marker_node;
};
static NodeTraversalSignal FindDescendant_cb(TidyDocImpl* ARG_UNUSED(doc), Node* node, void *propagate)
{
struct MatchingDescendantData *cb_data = (struct MatchingDescendantData *)propagate;
if (TagId(node) == cb_data->matching_tagId)
{
/* make sure we match up 'unknown' tags exactly! */
if (cb_data->matching_tagId != TidyTag_UNKNOWN ||
(node->element != NULL &&
cb_data->node_to_find != NULL &&
cb_data->node_to_find->element != NULL &&
0 == TY_(tmbstrcmp)(cb_data->node_to_find->element, node->element)))
{
cb_data->found_node = node;
return ExitTraversal;
}
}
if (cb_data->passed_marker_node && node == cb_data->marker_node)
*cb_data->passed_marker_node = yes;
return VisitParent;
}
/*
Search the parent chain (from 'parent' upwards up to the root) for a node matching the
given 'node'.
When the search passes beyond the 'marker_node' (which is assumed to sit in the
parent chain), this will be flagged by setting the boolean referenced by
'is_parent_of_marker' to yes.
'is_parent_of_marker' and 'marker_node' are optional parameters and may be NULL.
*/
static Node *FindMatchingDescendant( Node *parent, Node *node, Node *marker_node, Bool *is_parent_of_marker )
{
struct MatchingDescendantData cb_data = { 0 };
cb_data.matching_tagId = TagId(node);
cb_data.node_to_find = node;
cb_data.marker_node = marker_node;
assert(node);
if (is_parent_of_marker)
*is_parent_of_marker = no;
TY_(TraverseNodeTree)(NULL, parent, FindDescendant_cb, &cb_data);
return cb_data.found_node;
}
/*
Act as a generic XML (sub)tree parser: collect each node and add it to the DOM, without any further validation.
TODO : add schema- or other-hierarchy-definition-based validation of the subtree here...
*/
void TY_(ParseNamespace)(TidyDocImpl* doc, Node *basenode, GetTokenMode mode)
{
Lexer* lexer = doc->lexer;
Node *node;
Node *parent = basenode;
uint istackbase;
/* a la <table>: defer popping elements off the inline stack */
TY_(DeferDup)( doc );
istackbase = lexer->istackbase;
lexer->istackbase = lexer->istacksize;
mode = OtherNamespace; /* Preformatted; IgnoreWhitespace; */
while ((node = TY_(GetToken)(doc, mode)) != NULL)
{
/*
fix check to skip action in InsertMisc for regular/empty
nodes, which we don't want here...
The way we do it here is by checking and processing everything
and only what remains goes into InsertMisc()
*/
/* is this a close tag? And does it match the current parent node? */
if (node->type == EndTag)
{
/*
to prevent end tags flowing from one 'alternate namespace' we
check this in two phases: first we check if the tag is a
descendant of the current node, and when it is, we check whether
it is the end tag for a node /within/ or /outside/ the basenode.
*/
Bool outside;
Node *mp = FindMatchingDescendant(parent, node, basenode, &outside);
if (mp != NULL)
{
/*
when mp != parent as we might expect,
infer end tags until we 'hit' the matched
parent or the basenode
*/
Node *n;
for (n = parent;
n != NULL && n != basenode->parent && n != mp;
n = n->parent)
{
/* n->implicit = yes; */
n->closed = yes;
TY_(ReportError)(doc, n->parent, n, MISSING_ENDTAG_BEFORE);
}
assert(outside == no ? n == mp : 1);
assert(outside == yes ? n == basenode->parent : 1);
if (outside == no)
{
/* EndTag for a node within the basenode subtree. Roll on... */
n->closed = yes;
TY_(FreeNode)(doc, node);
node = n;
parent = node->parent;
}
else
{
/* EndTag for a node outside the basenode subtree: let the caller handle that. */
TY_(UngetToken)( doc );
node = basenode;
parent = node->parent;
}
/* when we've arrived at the end-node for the base node, it's quitting time */
if (node == basenode)
{
lexer->istackbase = istackbase;
assert(basenode->closed == yes);
return;
}
}
else
{
/* unmatched close tag: report an error and discard */
TY_(ReportError)(doc, parent, node, NON_MATCHING_ENDTAG);
TY_(ReportError)(doc, parent, node, DISCARDING_UNEXPECTED);
assert(parent);
assert(parent->tag != node->tag);
}
}
else if (node->type == StartTag)
{
/* add another child to the current parent */
TY_(InsertNodeAtEnd)(parent, node);
parent = node;
}
else
{
TY_(InsertNodeAtEnd)(parent, node);
}
}
TY_(ReportError)(doc, basenode->parent, basenode, MISSING_ENDTAG_FOR);
}
void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
{
#if !defined(NDEBUG) && defined(_MSC_VER)

View file

@ -108,6 +108,7 @@ static CheckAttribs CheckHTML;
#define VERS_ELEM_STYLE (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx|HT50|XH50)
#define VERS_ELEM_SUB (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx|HT50|XH50)
#define VERS_ELEM_SUP (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx|HT50|XH50)
#define VERS_ELEM_SVG (xxxx|xxxx|xxxx|H41T|X10T|xxxx|H41F|X10F|xxxx|H41S|X10S|XH11|xxxx|HT50|XH50)
#define VERS_ELEM_TABLE (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10|HT50|XH50)
#define VERS_ELEM_TBODY (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx|HT50|XH50)
#define VERS_ELEM_TD (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10|HT50|XH50)
@ -247,6 +248,7 @@ static const Dict tag_defs[] =
{ TidyTag_STYLE, "style", VERS_ELEM_STYLE, &TY_(W3CAttrsFor_STYLE)[0], (CM_HEAD|CM_BLOCK), TY_(ParseScript), NULL },
{ TidyTag_SUB, "sub", VERS_ELEM_SUB, &TY_(W3CAttrsFor_SUB)[0], (CM_INLINE), TY_(ParseInline), NULL },
{ TidyTag_SUP, "sup", VERS_ELEM_SUP, &TY_(W3CAttrsFor_SUP)[0], (CM_INLINE), TY_(ParseInline), NULL },
{ TidyTag_SVG, "svg", VERS_ELEM_SVG, &TY_(W3CAttrsFor_SVG)[0], (CM_INLINE|CM_BLOCK|CM_MIXED), TY_(ParseNamespace),NULL },
{ TidyTag_TABLE, "table", VERS_ELEM_TABLE, &TY_(W3CAttrsFor_TABLE)[0], (CM_BLOCK), TY_(ParseTableTag), CheckTABLE },
{ TidyTag_TBODY, "tbody", VERS_ELEM_TBODY, &TY_(W3CAttrsFor_TBODY)[0], (CM_TABLE|CM_ROWGRP|CM_OPT), TY_(ParseRowGroup), NULL },
{ TidyTag_TD, "td", VERS_ELEM_TD, &TY_(W3CAttrsFor_TD)[0], (CM_ROW|CM_OPT|CM_NO_INDENT), TY_(ParseBlock), NULL },
@ -949,6 +951,41 @@ uint TY_(nodeHeaderLevel)( Node* node )
return 0;
}
/* [i_a] generic node tree traversal; see also <tidy-int.h> */
NodeTraversalSignal TY_(TraverseNodeTree)(TidyDocImpl* doc, Node* node, NodeTraversalCallBack *cb, void *propagate )
{
while (node)
{
NodeTraversalSignal s = (*cb)(doc, node, propagate);
if (node->content && (s == ContinueTraversal || s == SkipSiblings))
{
s = TY_(TraverseNodeTree)(doc, node->content, cb, propagate);
}
switch (s)
{
case ExitTraversal:
return ExitTraversal;
case VisitParent:
node = node->parent;
continue;
case SkipSiblings:
case SkipChildrenAndSiblings:
return ContinueTraversal;
default:
node = node->next;
break;
}
}
return ContinueTraversal;
}
/*
* local variables:
* mode: c

View file

@ -111,6 +111,7 @@ Parser TY_(ParseSelect);
Parser TY_(ParseOptGroup);
Parser TY_(ParseText);
Parser TY_(ParseDatalist);
Parser TY_(ParseNamespace);
CheckAttribs TY_(CheckAttributes);

View file

@ -121,4 +121,26 @@ TidyOption tidyImplToOption( const TidyOptionImpl* option );
int TY_(DocParseStream)( TidyDocImpl* impl, StreamIn* in );
/*
[i_a] generic node tree traversal code; used in several spots.
Define your own callback, which returns one of the NodeTraversalSignal values
to instruct the tree traversal routine TraverseNodeTree() what to do.
Pass custom data to/from the callback using the 'propagate' reference.
*/
typedef enum
{
ContinueTraversal, /* visit siblings and children */
SkipChildren, /* visit siblings of this node; ignore its children */
SkipSiblings, /* ignore subsequent siblings of this node; ignore their children; traverse */
SkipChildrenAndSiblings, /* visit siblings of this node; ignore its children */
VisitParent, /* REVERSE traversal: visit the parent of the current node */
ExitTraversal /* terminate traversal on the spot */
} NodeTraversalSignal;
typedef NodeTraversalSignal NodeTraversalCallBack(TidyDocImpl* doc, Node* node, void *propagate);
NodeTraversalSignal TY_(TraverseNodeTree)(TidyDocImpl* doc, Node* node, NodeTraversalCallBack *cb, void *propagate);
#endif /* __TIDY_INT_H__ */