/* Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #define C_LUCY_HASH #define C_LUCY_HASHTOMBSTONE #define LUCY_USE_SHORT_NAMES #define CHY_USE_SHORT_NAMES #include #include #include "Lucy/Object/VTable.h" #include "Lucy/Object/Hash.h" #include "Lucy/Object/CharBuf.h" #include "Lucy/Object/Err.h" #include "Lucy/Object/VArray.h" #include "Lucy/Store/InStream.h" #include "Lucy/Store/OutStream.h" #include "Lucy/Util/Freezer.h" #include "Lucy/Util/Memory.h" static HashTombStone TOMBSTONE = { HASHTOMBSTONE, {1} }; #define HashEntry lucy_HashEntry typedef struct HashEntry { Obj *key; Obj *value; int32_t hash_sum; } HashEntry; // Reset the iterator. Hash_Iterate must be called to restart iteration. static INLINE void SI_kill_iter(Hash *self); // Return the entry associated with the key, if any. static INLINE HashEntry* SI_fetch_entry(Hash *self, const Obj *key, int32_t hash_sum); // Double the number of buckets and redistribute all entries. static INLINE HashEntry* SI_rebuild_hash(Hash *self); Hash* Hash_new(uint32_t capacity) { Hash *self = (Hash*)VTable_Make_Obj(HASH); return Hash_init(self, capacity); } Hash* Hash_init(Hash *self, uint32_t capacity) { // Allocate enough space to hold the requested number of elements without // triggering a rebuild. uint32_t requested_capacity = capacity < I32_MAX ? capacity : I32_MAX; uint32_t threshold; capacity = 16; while (1) { threshold = (capacity / 3) * 2; if (threshold > requested_capacity) { break; } capacity *= 2; } // Init. self->size = 0; self->iter_tick = -1; // Derive. self->capacity = capacity; self->entries = (HashEntry*)CALLOCATE(capacity, sizeof(HashEntry)); self->threshold = threshold; return self; } void Hash_destroy(Hash *self) { if (self->entries) { Hash_Clear(self); FREEMEM(self->entries); } SUPER_DESTROY(self, HASH); } Hash* Hash_dump(Hash *self) { Hash *dump = Hash_new(self->size); Obj *key; Obj *value; Hash_Iterate(self); while (Hash_Next(self, &key, &value)) { // Since JSON only supports text hash keys, Dump() can only support // text hash keys. CERTIFY(key, CHARBUF); Hash_Store(dump, key, Obj_Dump(value)); } return dump; } Obj* Hash_load(Hash *self, Obj *dump) { Hash *source = (Hash*)CERTIFY(dump, HASH); CharBuf *class_name = (CharBuf*)Hash_Fetch_Str(source, "_class", 6); UNUSED_VAR(self); // Assume that the presence of the "_class" key paired with a valid class // name indicates the output of a Dump rather than an ordinary Hash. */ if (class_name && CB_Is_A(class_name, CHARBUF)) { VTable *vtable = VTable_fetch_vtable(class_name); if (!vtable) { CharBuf *parent_class = VTable_find_parent_class(class_name); if (parent_class) { VTable *parent = VTable_singleton(parent_class, NULL); vtable = VTable_singleton(class_name, parent); DECREF(parent_class); } else { // TODO: Fix Hash_Load() so that it works with ordinary hash // keys named "_class". THROW(ERR, "Can't find class '%o'", class_name); } } // Dispatch to an alternate Load() method. if (vtable) { Obj_load_t load = (Obj_load_t)METHOD(vtable, Obj, Load); if (load == Obj_load) { THROW(ERR, "Abstract method Load() not defined for %o", VTable_Get_Name(vtable)); } else if (load != (Obj_load_t)Hash_load) { // stop inf loop return load(NULL, dump); } } } // It's an ordinary Hash. Hash *loaded = Hash_new(source->size); Obj *key; Obj *value; Hash_Iterate(source); while (Hash_Next(source, &key, &value)) { Hash_Store(loaded, key, Obj_Load(value, value)); } return (Obj*)loaded; } void Hash_serialize(Hash *self, OutStream *outstream) { Obj *key; Obj *val; uint32_t charbuf_count = 0; OutStream_Write_C32(outstream, self->size); // Write CharBuf keys first. CharBuf keys are the common case; grouping // them together is a form of run-length-encoding and saves space, since // we omit the per-key class name. Hash_Iterate(self); while (Hash_Next(self, &key, &val)) { if (Obj_Is_A(key, CHARBUF)) { charbuf_count++; } } OutStream_Write_C32(outstream, charbuf_count); Hash_Iterate(self); while (Hash_Next(self, &key, &val)) { if (Obj_Is_A(key, CHARBUF)) { Obj_Serialize(key, outstream); FREEZE(val, outstream); } } // Punt on the classes of the remaining keys. Hash_Iterate(self); while (Hash_Next(self, &key, &val)) { if (!Obj_Is_A(key, CHARBUF)) { FREEZE(key, outstream); FREEZE(val, outstream); } } } Hash* Hash_deserialize(Hash *self, InStream *instream) { uint32_t size = InStream_Read_C32(instream); uint32_t num_charbufs = InStream_Read_C32(instream); uint32_t num_other = size - num_charbufs; CharBuf *key = num_charbufs ? CB_new(0) : NULL; if (self) { Hash_init(self, size); } else { self = Hash_new(size); } // Read key-value pairs with CharBuf keys. while (num_charbufs--) { uint32_t len = InStream_Read_C32(instream); char *key_buf = CB_Grow(key, len); InStream_Read_Bytes(instream, key_buf, len); key_buf[len] = '\0'; CB_Set_Size(key, len); Hash_Store(self, (Obj*)key, THAW(instream)); } DECREF(key); // Read remaining key/value pairs. while (num_other--) { Obj *k = THAW(instream); Hash_Store(self, k, THAW(instream)); DECREF(k); } return self; } void Hash_clear(Hash *self) { HashEntry *entry = (HashEntry*)self->entries; HashEntry *const limit = entry + self->capacity; // Iterate through all entries. for (; entry < limit; entry++) { if (!entry->key) { continue; } DECREF(entry->key); DECREF(entry->value); entry->key = NULL; entry->value = NULL; entry->hash_sum = 0; } self->size = 0; } void Hash_do_store(Hash *self, Obj *key, Obj *value, int32_t hash_sum, bool_t use_this_key) { HashEntry *entries = self->size >= self->threshold ? SI_rebuild_hash(self) : (HashEntry*)self->entries; uint32_t tick = hash_sum; const uint32_t mask = self->capacity - 1; while (1) { tick &= mask; HashEntry *entry = entries + tick; if (entry->key == (Obj*)&TOMBSTONE || !entry->key) { if (entry->key == (Obj*)&TOMBSTONE) { // Take note of diminished tombstone clutter. self->threshold++; } entry->key = use_this_key ? key : Hash_Make_Key(self, key, hash_sum); entry->value = value; entry->hash_sum = hash_sum; self->size++; break; } else if (entry->hash_sum == hash_sum && Obj_Equals(key, entry->key) ) { DECREF(entry->value); entry->value = value; break; } tick++; // linear scan } } void Hash_store(Hash *self, Obj *key, Obj *value) { Hash_do_store(self, key, value, Obj_Hash_Sum(key), false); } void Hash_store_str(Hash *self, const char *key, size_t key_len, Obj *value) { ZombieCharBuf *key_buf = ZCB_WRAP_STR((char*)key, key_len); Hash_do_store(self, (Obj*)key_buf, value, ZCB_Hash_Sum(key_buf), false); } Obj* Hash_make_key(Hash *self, Obj *key, int32_t hash_sum) { UNUSED_VAR(self); UNUSED_VAR(hash_sum); return Obj_Clone(key); } Obj* Hash_fetch_str(Hash *self, const char *key, size_t key_len) { ZombieCharBuf *key_buf = ZCB_WRAP_STR(key, key_len); return Hash_fetch(self, (Obj*)key_buf); } static INLINE HashEntry* SI_fetch_entry(Hash *self, const Obj *key, int32_t hash_sum) { uint32_t tick = hash_sum; HashEntry *const entries = (HashEntry*)self->entries; HashEntry *entry; while (1) { tick &= self->capacity - 1; entry = entries + tick; if (!entry->key) { // Failed to find the key, so return NULL. return NULL; } else if (entry->hash_sum == hash_sum && Obj_Equals(key, entry->key) ) { return entry; } tick++; } } Obj* Hash_fetch(Hash *self, const Obj *key) { HashEntry *entry = SI_fetch_entry(self, key, Obj_Hash_Sum(key)); return entry ? entry->value : NULL; } Obj* Hash_delete(Hash *self, const Obj *key) { HashEntry *entry = SI_fetch_entry(self, key, Obj_Hash_Sum(key)); if (entry) { Obj *value = entry->value; DECREF(entry->key); entry->key = (Obj*)&TOMBSTONE; entry->value = NULL; entry->hash_sum = 0; self->size--; self->threshold--; // limit number of tombstones return value; } else { return NULL; } } Obj* Hash_delete_str(Hash *self, const char *key, size_t key_len) { ZombieCharBuf *key_buf = ZCB_WRAP_STR(key, key_len); return Hash_delete(self, (Obj*)key_buf); } uint32_t Hash_iterate(Hash *self) { SI_kill_iter(self); return self->size; } static INLINE void SI_kill_iter(Hash *self) { self->iter_tick = -1; } bool_t Hash_next(Hash *self, Obj **key, Obj **value) { while (1) { if (++self->iter_tick >= (int32_t)self->capacity) { // Bail since we've completed the iteration. --self->iter_tick; *key = NULL; *value = NULL; return false; } else { HashEntry *const entry = (HashEntry*)self->entries + self->iter_tick; if (entry->key && entry->key != (Obj*)&TOMBSTONE) { // Success! *key = entry->key; *value = entry->value; return true; } } } } Obj* Hash_find_key(Hash *self, const Obj *key, int32_t hash_sum) { HashEntry *entry = SI_fetch_entry(self, key, hash_sum); return entry ? entry->key : NULL; } VArray* Hash_keys(Hash *self) { Obj *key; Obj *val; VArray *keys = VA_new(self->size); Hash_Iterate(self); while (Hash_Next(self, &key, &val)) { VA_push(keys, INCREF(key)); } return keys; } VArray* Hash_values(Hash *self) { Obj *key; Obj *val; VArray *values = VA_new(self->size); Hash_Iterate(self); while (Hash_Next(self, &key, &val)) { VA_push(values, INCREF(val)); } return values; } bool_t Hash_equals(Hash *self, Obj *other) { Hash *twin = (Hash*)other; Obj *key; Obj *val; if (twin == self) { return true; } if (!Obj_Is_A(other, HASH)) { return false; } if (self->size != twin->size) { return false; } Hash_Iterate(self); while (Hash_Next(self, &key, &val)) { Obj *other_val = Hash_Fetch(twin, key); if (!other_val || !Obj_Equals(other_val, val)) { return false; } } return true; } uint32_t Hash_get_capacity(Hash *self) { return self->capacity; } uint32_t Hash_get_size(Hash *self) { return self->size; } static INLINE HashEntry* SI_rebuild_hash(Hash *self) { HashEntry *old_entries = (HashEntry*)self->entries; HashEntry *entry = old_entries; HashEntry *limit = old_entries + self->capacity; SI_kill_iter(self); self->capacity *= 2; self->threshold = (self->capacity / 3) * 2; self->entries = (HashEntry*)CALLOCATE(self->capacity, sizeof(HashEntry)); self->size = 0; for (; entry < limit; entry++) { if (!entry->key || entry->key == (Obj*)&TOMBSTONE) { continue; } Hash_do_store(self, entry->key, entry->value, entry->hash_sum, true); } FREEMEM(old_entries); return (HashEntry*)self->entries; } /***************************************************************************/ uint32_t HashTombStone_get_refcount(HashTombStone* self) { CHY_UNUSED_VAR(self); return 1; } HashTombStone* HashTombStone_inc_refcount(HashTombStone* self) { return self; } uint32_t HashTombStone_dec_refcount(HashTombStone* self) { UNUSED_VAR(self); return 1; }