/* $NetBSD: hash.c,v 1.74 2023/12/19 19:33:39 rillig Exp $ */ /* * Copyright (c) 1988, 1989, 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * Adam de Boor. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Copyright (c) 1988, 1989 by Adam de Boor * Copyright (c) 1989 by Berkeley Softworks * All rights reserved. * * This code is derived from software contributed to Berkeley by * Adam de Boor. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* Hash tables with string keys and pointer values. */ #include "make.h" /* "@(#)hash.c 8.1 (Berkeley) 6/6/93" */ MAKE_RCSID("$NetBSD: hash.c,v 1.74 2023/12/19 19:33:39 rillig Exp $"); /* * The ratio of # entries to # buckets at which we rebuild the table to * make it larger. */ #define rebuildLimit 3 /* This hash function matches Gosling's Emacs and java.lang.String. */ static unsigned int Hash_String(const char *key, const char **out_keyEnd) { unsigned int h; const char *p; h = 0; for (p = key; *p != '\0'; p++) h = 31 * h + (unsigned char)*p; *out_keyEnd = p; return h; } /* This hash function matches Gosling's Emacs and java.lang.String. */ unsigned int Hash_Substring(Substring key) { unsigned int h; const char *p; h = 0; for (p = key.start; p != key.end; p++) h = 31 * h + (unsigned char)*p; return h; } static HashEntry * HashTable_Find(HashTable *t, Substring key, unsigned int h) { HashEntry *he; unsigned int chainlen = 0; size_t keyLen = Substring_Length(key); #ifdef DEBUG_HASH_LOOKUP DEBUG4(HASH, "HashTable_Find: %p h=%08x key=%.*s\n", t, h, (int)keyLen, key.start); #endif for (he = t->buckets[h & t->bucketsMask]; he != NULL; he = he->next) { chainlen++; if (he->hash == h && strncmp(he->key, key.start, keyLen) == 0 && he->key[keyLen] == '\0') break; } if (chainlen > t->maxchain) t->maxchain = chainlen; return he; } /* Set up the hash table. */ void HashTable_Init(HashTable *t) { unsigned int n = 16, i; HashEntry **buckets = bmake_malloc(sizeof *buckets * n); for (i = 0; i < n; i++) buckets[i] = NULL; t->buckets = buckets; t->bucketsSize = n; t->numEntries = 0; t->bucketsMask = n - 1; t->maxchain = 0; } /* * Remove everything from the hash table and free up the memory for the keys * of the hash table, but not for the values associated to these keys. */ void HashTable_Done(HashTable *t) { HashEntry **buckets = t->buckets; size_t i, n = t->bucketsSize; for (i = 0; i < n; i++) { HashEntry *he = buckets[i]; while (he != NULL) { HashEntry *next = he->next; free(he); he = next; } } free(t->buckets); #ifdef CLEANUP t->buckets = NULL; #endif } /* Find the entry corresponding to the key, or return NULL. */ HashEntry * HashTable_FindEntry(HashTable *t, const char *key) { const char *keyEnd; unsigned int h = Hash_String(key, &keyEnd); return HashTable_Find(t, Substring_Init(key, keyEnd), h); } /* Find the value corresponding to the key, or return NULL. */ void * HashTable_FindValue(HashTable *t, const char *key) { HashEntry *he = HashTable_FindEntry(t, key); return he != NULL ? he->value : NULL; } /* * Find the value corresponding to the key and the precomputed hash, * or return NULL. */ void * HashTable_FindValueBySubstringHash(HashTable *t, Substring key, unsigned int h) { HashEntry *he = HashTable_Find(t, key, h); return he != NULL ? he->value : NULL; } /* * Make the hash table larger. Any bucket numbers from the old table become * invalid; the hash values stay valid though. */ static void HashTable_Enlarge(HashTable *t) { unsigned int oldSize = t->bucketsSize; HashEntry **oldBuckets = t->buckets; unsigned int newSize = 2 * oldSize; unsigned int newMask = newSize - 1; HashEntry **newBuckets = bmake_malloc(sizeof *newBuckets * newSize); size_t i; for (i = 0; i < newSize; i++) newBuckets[i] = NULL; for (i = 0; i < oldSize; i++) { HashEntry *he = oldBuckets[i]; while (he != NULL) { HashEntry *next = he->next; he->next = newBuckets[he->hash & newMask]; newBuckets[he->hash & newMask] = he; he = next; } } free(oldBuckets); t->bucketsSize = newSize; t->bucketsMask = newMask; t->buckets = newBuckets; DEBUG4(HASH, "HashTable_Enlarge: %p size=%d entries=%d maxchain=%d\n", (void *)t, t->bucketsSize, t->numEntries, t->maxchain); t->maxchain = 0; } /* * Find or create an entry corresponding to the key. * Return in out_isNew whether a new entry has been created. */ HashEntry * HashTable_CreateEntry(HashTable *t, const char *key, bool *out_isNew) { const char *keyEnd; unsigned int h = Hash_String(key, &keyEnd); HashEntry *he = HashTable_Find(t, Substring_Init(key, keyEnd), h); if (he != NULL) { if (out_isNew != NULL) *out_isNew = false; return he; } if (t->numEntries >= rebuildLimit * t->bucketsSize) HashTable_Enlarge(t); he = bmake_malloc(sizeof *he + (size_t)(keyEnd - key)); he->value = NULL; he->hash = h; memcpy(he->key, key, (size_t)(keyEnd - key) + 1); he->next = t->buckets[h & t->bucketsMask]; t->buckets[h & t->bucketsMask] = he; t->numEntries++; if (out_isNew != NULL) *out_isNew = true; return he; } void HashTable_Set(HashTable *t, const char *key, void *value) { HashEntry *he = HashTable_CreateEntry(t, key, NULL); HashEntry_Set(he, value); } /* Delete the entry from the table, don't free the value of the entry. */ void HashTable_DeleteEntry(HashTable *t, HashEntry *he) { HashEntry **ref = &t->buckets[he->hash & t->bucketsMask]; HashEntry *p; for (; (p = *ref) != NULL; ref = &p->next) { if (p == he) { *ref = p->next; free(p); t->numEntries--; return; } } abort(); } /* * Return the next entry in the hash table, or NULL if the end of the table * is reached. */ HashEntry * HashIter_Next(HashIter *hi) { HashTable *t = hi->table; HashEntry *he = hi->entry; HashEntry **buckets = t->buckets; unsigned int bucketsSize = t->bucketsSize; if (he != NULL) he = he->next; /* skip the most recently returned entry */ while (he == NULL) { /* find the next nonempty chain */ if (hi->nextBucket >= bucketsSize) return NULL; he = buckets[hi->nextBucket++]; } hi->entry = he; return he; } void HashTable_DebugStats(HashTable *t, const char *name) { DEBUG4(HASH, "HashTable %s: size=%u numEntries=%u maxchain=%u\n", name, t->bucketsSize, t->numEntries, t->maxchain); }