Use a hash table for anchors #64

This commit is contained in:
Jim Derry 2014-11-22 19:39:06 +08:00
commit edb185a308
4 changed files with 46 additions and 21 deletions

View file

@ -941,20 +941,33 @@ static void FreeAnchor(TidyDocImpl* doc, Anchor *a)
TidyDocFree( doc, a ); TidyDocFree( doc, a );
} }
static uint anchorNameHash(ctmbstr s)
{
uint hashval;
for (hashval = 0; *s != '\0'; s++) {
tmbchar c = TY_(ToLower)( *s );
hashval = c + 31*hashval;
}
return hashval % ANCHOR_HASH_SIZE;
}
/* removes anchor for specific node */ /* removes anchor for specific node */
void TY_(RemoveAnchorByNode)( TidyDocImpl* doc, Node *node ) void TY_(RemoveAnchorByNode)( TidyDocImpl* doc, ctmbstr name, Node *node )
{ {
TidyAttribImpl* attribs = &doc->attribs; TidyAttribImpl* attribs = &doc->attribs;
Anchor *delme = NULL, *curr, *prev = NULL; Anchor *delme = NULL, *curr, *prev = NULL;
uint h = anchorNameHash(name);
for ( curr=attribs->anchor_list; curr!=NULL; curr=curr->next ) for ( curr=attribs->anchor_hash[h]; curr!=NULL; curr=curr->next )
{ {
if ( curr->node == node ) if ( curr->node == node )
{ {
if ( prev ) if ( prev )
prev->next = curr->next; prev->next = curr->next;
else else
attribs->anchor_list = curr->next; attribs->anchor_hash[h] = curr->next;
delme = curr; delme = curr;
break; break;
} }
@ -981,18 +994,19 @@ static Anchor* AddAnchor( TidyDocImpl* doc, ctmbstr name, Node *node )
{ {
TidyAttribImpl* attribs = &doc->attribs; TidyAttribImpl* attribs = &doc->attribs;
Anchor *a = NewAnchor( doc, name, node ); Anchor *a = NewAnchor( doc, name, node );
uint h = anchorNameHash(name);
if ( attribs->anchor_list == NULL) if ( attribs->anchor_hash[h] == NULL)
attribs->anchor_list = a; attribs->anchor_hash[h] = a;
else else
{ {
Anchor *here = attribs->anchor_list; Anchor *here = attribs->anchor_hash[h];
while (here->next) while (here->next)
here = here->next; here = here->next;
here->next = a; here->next = a;
} }
return attribs->anchor_list; return attribs->anchor_hash[h];
} }
/* return node associated with anchor */ /* return node associated with anchor */
@ -1000,10 +1014,11 @@ static Node* GetNodeByAnchor( TidyDocImpl* doc, ctmbstr name )
{ {
TidyAttribImpl* attribs = &doc->attribs; TidyAttribImpl* attribs = &doc->attribs;
Anchor *found; Anchor *found;
uint h = anchorNameHash(name);
tmbstr lname = TY_(tmbstrdup)(doc->allocator, name); tmbstr lname = TY_(tmbstrdup)(doc->allocator, name);
lname = TY_(tmbstrtolower)(lname); lname = TY_(tmbstrtolower)(lname);
for ( found = attribs->anchor_list; found != NULL; found = found->next ) for ( found = attribs->anchor_hash[h]; found != NULL; found = found->next )
{ {
if ( TY_(tmbstrcmp)(found->name, lname) == 0 ) if ( TY_(tmbstrcmp)(found->name, lname) == 0 )
break; break;
@ -1020,10 +1035,13 @@ void TY_(FreeAnchors)( TidyDocImpl* doc )
{ {
TidyAttribImpl* attribs = &doc->attribs; TidyAttribImpl* attribs = &doc->attribs;
Anchor* a; Anchor* a;
while (NULL != (a = attribs->anchor_list) ) uint h;
{ for (h = 0; h < ANCHOR_HASH_SIZE; h++) {
attribs->anchor_list = a->next; while (NULL != (a = attribs->anchor_hash[h]) )
FreeAnchor(doc, a); {
attribs->anchor_hash[h] = a->next;
FreeAnchor(doc, a);
}
} }
} }

View file

@ -55,10 +55,15 @@ struct _AttrHash
typedef struct _AttrHash AttrHash; typedef struct _AttrHash AttrHash;
#endif #endif
enum
{
ANCHOR_HASH_SIZE=1021u
};
struct _TidyAttribImpl struct _TidyAttribImpl
{ {
/* anchor/node lookup */ /* anchor/node lookup */
Anchor* anchor_list; Anchor* anchor_hash[ANCHOR_HASH_SIZE];
/* Declared literal attributes */ /* Declared literal attributes */
Attribute* declared_attr_list; Attribute* declared_attr_list;
@ -117,7 +122,7 @@ Bool TY_(IsValidHTMLID)(ctmbstr id);
Bool TY_(IsValidXMLID)(ctmbstr id); Bool TY_(IsValidXMLID)(ctmbstr id);
/* removes anchor for specific node */ /* removes anchor for specific node */
void TY_(RemoveAnchorByNode)( TidyDocImpl* doc, Node *node ); void TY_(RemoveAnchorByNode)( TidyDocImpl* doc, ctmbstr name, Node *node );
/* free all anchors */ /* free all anchors */
void TY_(FreeAnchors)( TidyDocImpl* doc ); void TY_(FreeAnchors)( TidyDocImpl* doc );

View file

@ -2638,17 +2638,19 @@ void TY_(FixAnchors)(TidyDocImpl* doc, Node *node, Bool wantName, Bool wantId)
if (id && !wantId if (id && !wantId
/* make sure that Name has been emitted if requested */ /* make sure that Name has been emitted if requested */
&& (hadName || !wantName || NameEmitted) ) && (hadName || !wantName || NameEmitted) ) {
if (!wantId && !wantName)
TY_(RemoveAnchorByNode)(doc, id->value, node);
TY_(RemoveAttribute)(doc, node, id); TY_(RemoveAttribute)(doc, node, id);
}
if (name && !wantName if (name && !wantName
/* make sure that Id has been emitted if requested */ /* make sure that Id has been emitted if requested */
&& (hadId || !wantId || IdEmitted) ) && (hadId || !wantId || IdEmitted) ) {
if (!wantId && !wantName)
TY_(RemoveAnchorByNode)(doc, name->value, node);
TY_(RemoveAttribute)(doc, node, name); TY_(RemoveAttribute)(doc, node, name);
}
if (TY_(AttrGetById)(node, TidyAttr_NAME) == NULL &&
TY_(AttrGetById)(node, TidyAttr_ID) == NULL)
TY_(RemoveAnchorByNode)(doc, node);
} }
if (node->content) if (node->content)

View file

@ -1128,7 +1128,7 @@ void TY_(FreeAttrs)( TidyDocImpl* doc, Node *node )
if ( (attrIsID(av) || attrIsNAME(av)) && if ( (attrIsID(av) || attrIsNAME(av)) &&
TY_(IsAnchorElement)(doc, node) ) TY_(IsAnchorElement)(doc, node) )
{ {
TY_(RemoveAnchorByNode)( doc, node ); TY_(RemoveAnchorByNode)( doc, av->value, node );
} }
} }