Initial cut at RDFa support (again)

New branch that implements support for RDFa attributes.  Should be
cleaner than my first attempt in PR #299 - also references issue #209
This commit is contained in:
Shane McCarron 2015-11-16 11:29:23 -06:00
parent 2bbc751599
commit c0b769c5c7
2 changed files with 145 additions and 0 deletions

View file

@ -802,6 +802,16 @@ typedef enum
/* MathML <math> attributes */
TidyAttr_DISPLAY, /**< DISPLAY= (html5) */
/* RDFa global attributes */
TidyAttr_ABOUT, /**< ABOUT= */
TidyAttr_DATATYPE, /**< DATATYPE= */
TidyAttr_INLIST, /**< INLIST= */
TidyAttr_PREFIX, /**< PREFIX= */
TidyAttr_PROPERTY, /**< PROPERTY= */
TidyAttr_RESOURCE, /**< RESOURCE= */
TidyAttr_TYPEOF, /**< TYPEOF= */
TidyAttr_VOCAB, /**< VOCAB= */
N_TIDY_ATTRIBS /**< Must be last */
} TidyAttrId;

View file

@ -47,6 +47,9 @@ static AttrCheck CheckScroll;
static AttrCheck CheckTextDir;
static AttrCheck CheckLang;
static AttrCheck CheckType;
static AttrCheck CheckRDFaSafeCURIE;
static AttrCheck CheckRDFaTerm;
static AttrCheck CheckRDFaPrefix;
#define CH_PCDATA NULL
#define CH_CHARSET NULL
@ -85,6 +88,11 @@ static AttrCheck CheckType;
#define CH_TARGET CheckTarget
#define CH_VTYPE CheckVType
#define CH_ACTION CheckAction
#define CH_RDFAPREFIX CheckRDFaPrefix
#define CH_RDFASCURIE CheckRDFaSafeCURIE
#define CH_RDFASCURIES CheckRDFaSafeCURIE
#define CH_RDFATERM CheckRDFaTerm
#define CH_RDFATERMS CheckRDFaTerm
static const Attribute attribute_defs [] =
{
@ -417,6 +425,16 @@ static const Attribute attribute_defs [] =
#endif
{ TidyAttr_DISPLAY, "display", CH_PCDATA }, /* on MATH tag (html5) */
/* RDFa Attributes */
{ TidyAttr_ABOUT, "about", CH_RDFASCURIE },
{ TidyAttr_DATATYPE, "datatype", CH_RDFATERM },
{ TidyAttr_INLIST, "inlist", CH_BOOL },
{ TidyAttr_PREFIX, "prefix", CH_RDFAPREFIX },
{ TidyAttr_PROPERTY, "property", CH_RDFATERMS },
{ TidyAttr_RESOURCE, "resource", CH_RDFASCURIE },
{ TidyAttr_TYPEOF, "typeof", CH_RDFATERMS },
{ TidyAttr_VOCAB, "vocab", CH_URL },
/* this must be the final entry */
{ N_TIDY_ATTRIBS, NULL, NULL }
};
@ -432,6 +450,31 @@ static uint AttributeVersions(Node* node, AttVal* attval)
return (XH50 | HT50);
if (strcmp(attval->attribute,"allowfullscreen") == 0)
return (XH50 | HT50);
/* RDFa global attributes */
if (strcmp(attval->attribute,"about") == 0)
return (XH50 | HT50);
if (strcmp(attval->attribute,"datatype") == 0)
return (XH50 | HT50);
if (strcmp(attval->attribute,"inlist") == 0)
return (XH50 | HT50);
if (strcmp(attval->attribute,"prefix") == 0)
return (XH50 | HT50);
if (strcmp(attval->attribute,"property") == 0)
return (XH50 | HT50);
if (strcmp(attval->attribute,"resource") == 0)
return (XH50 | HT50);
if (strcmp(attval->attribute,"typeof") == 0)
return (XH50 | HT50);
if (strcmp(attval->attribute,"vocab") == 0)
return (XH50 | HT50);
/* Override the settings on these attributes because
* they are allowed everywhere by RDFa */
if (strcmp(attval->attribute,"content") == 0)
return (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10|HT50|XH50) ;
if (strcmp(attval->attribute,"rel") == 0)
return (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10|HT50|XH50) ;
if (strcmp(attval->attribute,"rev") == 0)
return (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10|HT50|XH50) ;
}
/* TODO: maybe this should return VERS_PROPRIETARY instead? */
if (!attval || !attval->dict)
@ -2153,6 +2196,98 @@ AttVal *SortAttVal( AttVal *list, TidyAttrSortStrategy strat)
}
}
/* RDFA support checkers
*
*/
/* CheckRDFAPrefix - ensure the prefix attribute value is
* correct
*
* @prefix takes prefix value pairs in the form:
*
* NCName ':' ' '+ AnyURI
*/
void CheckRDFaPrefix ( TidyDocImpl* doc, Node *node, AttVal *attval)
{
if (!AttrHasValue(attval))
{
TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE);
return;
}
/* Copy the attribute value so we can split it */
if (attval->value) {
tmbstr t, tPtr ;
uint prefixCount = 0;
/* isPrefix toggles - start at 1 and change to 0 as we
* iterate over the components of the value */
uint isPrefix = 1;
/* Copy it over */
uint len = TY_(tmbstrlen)(attval->value);
tmbstr s = (tmbstr) TidyDocAlloc( doc, len );
s[0] = '\0';
TY_(tmbstrcpy)( s, attval->value );
/* iterate over value */
tPtr = s;
while ( ( t = strtok(tPtr, " ") ) != NULL ) {
tPtr = NULL;
if (isPrefix) {
/* this piece should be a prefix */
/* prefix rules are that it can have any
* character except a colon - that one must be
* at the end */
tmbstr i = strchr(t, ':') ;
if (i == NULL) {
/* no colon - bad! */
TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE);
} else if (i != ( t + TY_(tmbstrlen)(t) - 1) ) {
/* not at the end - also bad */
TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE);
}
} else {
/* this piece should be a URL */
prefixCount ++;
}
isPrefix = !isPrefix;
}
TidyDocFree( doc, s ) ;
}
}
/* CheckRDFaTerm - are terms valid
*
*/
void CheckRDFaTerm ( TidyDocImpl* doc, Node *node, AttVal *attval)
{
if (!AttrHasValue(attval))
{
TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE);
return;
}
}
/* CheckRDFaSafeCURIE - is a CURIE legal
*
*/
void CheckRDFaSafeCURIE ( TidyDocImpl* doc, Node *node, AttVal *attval)
{
if (!AttrHasValue(attval))
{
TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE);
return;
}
}
/*
* local variables:
* mode: c