/* hash.c */ #include "common.h" #include "buffer.h" #include "hash.h" #include #include #include /* Special value that can be used to represent NULL in a hash to make it * distinct from the NULL return when a key isn't present. */ void *hash_NULL = "[hash null]"; /* #define INSTRUMENT */ #ifdef INSTRUMENT #include static struct { long n_new; long n_delete; long n_delete_key; long n_put; long n_get; long n_get_key; long n_hash; long n_rehash; long n_find; } _stats; static void _inst_dump( void ) { fprintf( stderr, " hash_new(): %10ld\n", _stats.n_new ); fprintf( stderr, " hash_delete(): %10ld\n", _stats.n_delete ); fprintf( stderr, "hash_delete_key(): %10ld\n", _stats.n_delete_key ); fprintf( stderr, " hash_put(): %10ld\n", _stats.n_put ); fprintf( stderr, " hash_get(): %10ld\n", _stats.n_get ); fprintf( stderr, " hash_get_key(): %10ld\n", _stats.n_get_key ); fprintf( stderr, " _hash(): %10ld\n", _stats.n_hash ); fprintf( stderr, " _rehash(): %10ld\n", _stats.n_rehash ); fprintf( stderr, " _find(): %10ld\n", _stats.n_find ); } static void _inst_init( void ) { static int init_done = 0; if ( !init_done ) { atexit( _inst_dump ); init_done = 1; } } static int _init_done = 0; #define INST_A(f, c) do { _stats.n_ ## f += (c); } while (0) #define INST_I(f) INST_A(f, 1) #define INST_INIT() _inst_init() #else #define INST_A(f, c) (void) 0 #define INST_I(f) (void) 0 #define INST_INIT() (void) 0 #endif #define KEY(sl) ((const void *) ((sl) + 1)) #define EQKEY(sl, key, key_len) \ ((sl)->key_len == key_len && memcmp(KEY(sl), key, key_len) == 0) int hash_new( long capacity, hash ** hh ) { hash *h; int err; long s; INST_INIT( ); INST_I( new ); if ( capacity <= 0 ) { capacity = 1; } if ( h = malloc( sizeof( hash ) ), !h ) { return ERR_Not_Enough_Memory; } if ( h->slot = malloc( sizeof( hash_slot * ) * capacity ), !h->slot ) { free( h ); return ERR_Not_Enough_Memory; } h->cap = capacity; h->state = 0; h->size = 0; h->deleted = 0; h->cbd = NULL; h->cb_add = NULL; h->cb_del = NULL; h->cb_upd = NULL; for ( s = 0; s < h->cap; s++ ) { h->slot[s] = -1; } if ( err = buffer_init( &h->buf, 0, 256 ), ERR_None == err ) { *hh = h; } return err; } int hash_set_callbacks( hash * h, void *cbd, int ( *cb_add ) ( hash * h, void *d, void **v ), int ( *cb_del ) ( hash * h, void *d, void *v ), int ( *cb_upd ) ( hash * h, void *d, void *ov, void **nv ) ) { h->cbd = cbd; h->cb_add = cb_add; h->cb_del = cb_del; h->cb_upd = cb_upd; return ERR_None; } int hash_delete( hash * h ) { INST_I( delete ); if ( !h ) { return ERR_None; } /* If the cb_del callback is defined we need to call * it for each of the values the hash contains. */ if ( h->cb_del ) { long b, s; int err; for ( b = 0; b < h->cap; b++ ) { for ( s = h->slot[b]; s != -1; ) { hash_slot *sl = ( hash_slot * ) ( ( char * )h->buf.buf + s ); if ( err = h->cb_del( h, h->cbd, sl->v ), ERR_None != err ) { return err; } s = sl->link; } } } buffer_delete( &h->buf ); free( h->slot ); free( h ); return ERR_None; } /* Compute the hash for a key */ static unsigned long _hash( const void *k, size_t kl ) { unsigned long h = 0; const unsigned char *kp = k; INST_I( hash ); while ( kl-- ) { h = 31 * h + ( *kp++ ); } return h; } static long _find( hash * h, const void *key, size_t key_len, unsigned long hc ) { long s = h->slot[hc % h->cap]; INST_I( find ); while ( -1 != s ) { hash_slot *sl = ( hash_slot * ) ( ( char * )h->buf.buf + s ); if ( EQKEY( sl, key, key_len ) ) { return s; } s = sl->link; } return s; } /* Change the bucket count of a hash to better suit the number of keys it * contains. At the moment this simplemindedly copies the old hash an item at a * time into a new hash and then pillages the fields of the new hash to * repopulate the old one. * * A smarter strategy might be to merge all the bucket chains into a single * chain - like a hash with just one bucket and then redistribute them into a * set of new buckets. That would avoid copying all the keys but wouldn't * garbage collect deleted keys. * * Note that it's theoretically possible that the new hash we create here could * decide to recursively rehash itself. The only thing stopping that is the * fact that the new hash has enough buckets for all the keys from the outset. * Bear that in mind if you decide to tinker with the code. A recursive rehash * shouldn't break anything but it's clearly a bit daft. */ static int _rehash( hash * h ) { const void *key; size_t key_len; hash *nh; hash_iter i; int err; /* Might as well have plenty of buckets: they're cheap */ long ncap = NMAX( 10, h->size * 2 ); INST_I( rehash ); /* All we do is make a new hash and copy the old one into it... */ if ( err = hash_new( ncap, &nh ), ERR_None != err ) { return err; } /* Iterate through the keys copying entries one at a time. This has the * happy side effect of clearing out the garbage left by any deleted keys. * Any callbacks that are installed for the original hash won't be in * effect on the new hash so there's no need to worry about any side * effects they might have. Once the new hash data is moved back into the * original hash any callbacks will automatically take effect again. */ key = hash_get_first_key( h, &i, &key_len ); while ( key ) { if ( err = hash_put( nh, key, key_len, hash_get( h, key, key_len ) ), ERR_None != err ) { hash_delete( nh ); return err; } key = hash_get_next_key( h, &i, &key_len ); } /* Delete the old contents of the hash and... */ buffer_delete( &h->buf ); free( h->slot ); /* ...move various bits of the new hash into it. */ h->buf = nh->buf; h->slot = nh->slot; h->cap = nh->cap; h->size = nh->size; h->deleted = 0; /* Tweak the buffer's growby field */ h->buf.growby = NMAX( h->buf.size / 4, 256 ); /* This definitely represents a change in the hash's state... */ h->state++; /* Destroy the (now empty) header of the new hash. */ free( nh ); return ERR_None; } int hash_delete_key( hash * h, const void *key, size_t key_len ) { unsigned int hc = _hash( key, key_len ); long *sp = &h->slot[hc % h->cap]; long s = *sp; INST_I( delete_key ); while ( -1 != s ) { hash_slot *sl = ( hash_slot * ) ( ( char * )h->buf.buf + s ); if ( EQKEY( sl, key, key_len ) ) { if ( h->cb_del ) { int err; if ( err = h->cb_del( h, h->cbd, sl->v ), err != ERR_None ) { return err; } } *sp = sl->link; h->size--; h->deleted++; if ( h->deleted > h->size ) { return _rehash( h ); } return ERR_None; } sp = &sl->link; s = *sp; } /* Note that we haven't incremented state because a deletion can't break an * iterator. Well, that's the idea anyway. * TODO: But deletion /can/ cause a rehash - and that'd definitely break * an iterator... Bugger. */ return ERR_None; } /* Add/replace a value in the hash. The key is copied into the hash structure * but the value is treated as an opaque pointer so the life of the pointed-to * object must match the life of the hash. */ int hash_put( hash * h, const void *key, size_t key_len, void *val ) { unsigned int hc = _hash( key, key_len ); hash_slot *sl; int err; long s; INST_I( put ); if ( s = _find( h, key, key_len, hc ), -1 == s ) { long sn = hc % h->cap; /* Create a new entry which includes the key inline after the * hash_slot structure. */ size_t sz = sizeof( hash_slot ) + PAD( key_len ); /* Grow the buffer. This may cause it to move altogether which is why * we don't stash self referential addresses in the hash. */ if ( err = buffer_ensure_free( &h->buf, sz ), ERR_None != err ) { return err; } /* Call any registered callback /before/ we modify the structure so * that if it fails the only possible change to the hash is that the * buffer will have been expanded. */ if ( h->cb_add ) { if ( err = h->cb_add( h, h->cbd, &val ), ERR_None != err ) { return err; } } s = h->buf.used; sl = ( hash_slot * ) ( ( char * )h->buf.buf + s ); h->buf.used += sz; sl->v = val; memcpy( ( void * )KEY( sl ), key, key_len ); sl->key_len = key_len; sl->link = h->slot[sn]; h->slot[sn] = s; h->state++; h->size++; if ( ( int )h->size > h->cap * 5 ) { return _rehash( h ); } } else { /* Replace an existing entry. */ sl = ( hash_slot * ) ( ( char * )h->buf.buf + s ); /* If the value is actually changing inform any callbacks */ if ( sl->v != val ) { if ( h->cb_upd ) { if ( err = h->cb_upd( h, h->cbd, sl->v, &val ), ERR_None != err ) { /* NULL out the pointer on the assumption that the update function * at least managed to free the old value. If this turns out to be * untrue we'll have leaked a little. */ sl->v = NULL; return err; } } else { if ( h->cb_del ) { if ( err = h->cb_del( h, h->cbd, sl->v ), ERR_None != err ) { sl->v = NULL; return err; } } if ( h->cb_add ) { if ( err = h->cb_add( h, h->cbd, &val ), ERR_None != err ) { return err; } } } } sl->v = val; } return ERR_None; } void *hash_get( hash * h, const void *key, size_t key_len ) { long s = _find( h, key, key_len, _hash( key, key_len ) ); INST_I( get ); if ( s == -1 ) { return NULL; } else { hash_slot *sl = ( hash_slot * ) ( ( char * )h->buf.buf + s ); return sl->v; } } size_t hash_size( hash * h ) { return h->size; } const void *hash_get_first_key( hash * h, hash_iter * i, size_t * key_len ) { i->state = h->state; i->bucket = 0; i->sl = h->slot[i->bucket]; return hash_get_next_key( h, i, key_len ); } const void *hash_get_next_key( hash * h, hash_iter * i, size_t * key_len ) { const void *key; hash_slot *sl; /* Assertion fails if the hash has been updated since this iterator was * initialised. */ if ( i->state != h->state ) { fprintf( stderr, "Hash modified during iteration\n" ); exit( 1 ); } while ( i->sl == -1 ) { if ( ++i->bucket >= h->cap ) { return NULL; } i->sl = h->slot[i->bucket]; } sl = ( hash_slot * ) ( ( char * )h->buf.buf + i->sl ); key = KEY( sl ); i->sl = sl->link; if ( key_len ) { *key_len = sl->key_len; } return key; }