Use a hash table for anchors #64

This commit is contained in:
Jim Derry 2014-11-22 19:39:06 +08:00
commit edb185a308
4 changed files with 46 additions and 21 deletions

View file

@ -941,20 +941,33 @@ static void FreeAnchor(TidyDocImpl* doc, Anchor *a)
TidyDocFree( doc, a );
}
static uint anchorNameHash(ctmbstr s)
{
uint hashval;
for (hashval = 0; *s != '\0'; s++) {
tmbchar c = TY_(ToLower)( *s );
hashval = c + 31*hashval;
}
return hashval % ANCHOR_HASH_SIZE;
}
/* removes anchor for specific node */
void TY_(RemoveAnchorByNode)( TidyDocImpl* doc, Node *node )
void TY_(RemoveAnchorByNode)( TidyDocImpl* doc, ctmbstr name, Node *node )
{
TidyAttribImpl* attribs = &doc->attribs;
Anchor *delme = NULL, *curr, *prev = NULL;
uint h = anchorNameHash(name);
for ( curr=attribs->anchor_list; curr!=NULL; curr=curr->next )
for ( curr=attribs->anchor_hash[h]; curr!=NULL; curr=curr->next )
{
if ( curr->node == node )
{
if ( prev )
prev->next = curr->next;
else
attribs->anchor_list = curr->next;
attribs->anchor_hash[h] = curr->next;
delme = curr;
break;
}
@ -981,18 +994,19 @@ static Anchor* AddAnchor( TidyDocImpl* doc, ctmbstr name, Node *node )
{
TidyAttribImpl* attribs = &doc->attribs;
Anchor *a = NewAnchor( doc, name, node );
uint h = anchorNameHash(name);
if ( attribs->anchor_list == NULL)
attribs->anchor_list = a;
if ( attribs->anchor_hash[h] == NULL)
attribs->anchor_hash[h] = a;
else
{
Anchor *here = attribs->anchor_list;
Anchor *here = attribs->anchor_hash[h];
while (here->next)
here = here->next;
here->next = a;
}
return attribs->anchor_list;
return attribs->anchor_hash[h];
}
/* return node associated with anchor */
@ -1000,10 +1014,11 @@ static Node* GetNodeByAnchor( TidyDocImpl* doc, ctmbstr name )
{
TidyAttribImpl* attribs = &doc->attribs;
Anchor *found;
uint h = anchorNameHash(name);
tmbstr lname = TY_(tmbstrdup)(doc->allocator, name);
lname = TY_(tmbstrtolower)(lname);
for ( found = attribs->anchor_list; found != NULL; found = found->next )
for ( found = attribs->anchor_hash[h]; found != NULL; found = found->next )
{
if ( TY_(tmbstrcmp)(found->name, lname) == 0 )
break;
@ -1020,12 +1035,15 @@ void TY_(FreeAnchors)( TidyDocImpl* doc )
{
TidyAttribImpl* attribs = &doc->attribs;
Anchor* a;
while (NULL != (a = attribs->anchor_list) )
uint h;
for (h = 0; h < ANCHOR_HASH_SIZE; h++) {
while (NULL != (a = attribs->anchor_hash[h]) )
{
attribs->anchor_list = a->next;
attribs->anchor_hash[h] = a->next;
FreeAnchor(doc, a);
}
}
}
/* public method for inititializing attribute dictionary */
void TY_(InitAttrs)( TidyDocImpl* doc )

View file

@ -55,10 +55,15 @@ struct _AttrHash
typedef struct _AttrHash AttrHash;
#endif
enum
{
ANCHOR_HASH_SIZE=1021u
};
struct _TidyAttribImpl
{
/* anchor/node lookup */
Anchor* anchor_list;
Anchor* anchor_hash[ANCHOR_HASH_SIZE];
/* Declared literal attributes */
Attribute* declared_attr_list;
@ -117,7 +122,7 @@ Bool TY_(IsValidHTMLID)(ctmbstr id);
Bool TY_(IsValidXMLID)(ctmbstr id);
/* removes anchor for specific node */
void TY_(RemoveAnchorByNode)( TidyDocImpl* doc, Node *node );
void TY_(RemoveAnchorByNode)( TidyDocImpl* doc, ctmbstr name, Node *node );
/* free all anchors */
void TY_(FreeAnchors)( TidyDocImpl* doc );

View file

@ -2638,17 +2638,19 @@ void TY_(FixAnchors)(TidyDocImpl* doc, Node *node, Bool wantName, Bool wantId)
if (id && !wantId
/* make sure that Name has been emitted if requested */
&& (hadName || !wantName || NameEmitted) )
&& (hadName || !wantName || NameEmitted) ) {
if (!wantId && !wantName)
TY_(RemoveAnchorByNode)(doc, id->value, node);
TY_(RemoveAttribute)(doc, node, id);
}
if (name && !wantName
/* make sure that Id has been emitted if requested */
&& (hadId || !wantId || IdEmitted) )
&& (hadId || !wantId || IdEmitted) ) {
if (!wantId && !wantName)
TY_(RemoveAnchorByNode)(doc, name->value, node);
TY_(RemoveAttribute)(doc, node, name);
if (TY_(AttrGetById)(node, TidyAttr_NAME) == NULL &&
TY_(AttrGetById)(node, TidyAttr_ID) == NULL)
TY_(RemoveAnchorByNode)(doc, node);
}
}
if (node->content)

View file

@ -1128,7 +1128,7 @@ void TY_(FreeAttrs)( TidyDocImpl* doc, Node *node )
if ( (attrIsID(av) || attrIsNAME(av)) &&
TY_(IsAnchorElement)(doc, node) )
{
TY_(RemoveAnchorByNode)( doc, node );
TY_(RemoveAnchorByNode)( doc, av->value, node );
}
}