- replaced the hash table size heuristic with a table of hard coded table sizes

(taken from the GNU ISO C++ Library), which are all prime
- replaced the string hash function by one that works slightly better & faster
- changed various types to unsigned
- added code to help debug the number of hash collisions (off by default)

svn-id: r21431
This commit is contained in:
Max Horn 2006-03-24 16:53:32 +00:00
parent 7307c4cb3d
commit 9dc07c11cd
2 changed files with 92 additions and 61 deletions

View file

@ -60,57 +60,68 @@
namespace Common { namespace Common {
// int: // int:
int hashit(int x, int hashsize) { uint hashit(int x, uint hashsize) {
return x % hashsize; return x % hashsize;
} }
int data_eq(int x, int y) { bool data_eq(int x, int y) {
return x == y; return x == y;
} }
#if 0 #if 0
// double: // double:
int hashit(double d, int hashsize) { uint hashit(double d, uint hashsize) {
int hash, dex; TODO
byte *p = (byte *)&d;
hash = 0;
for (dex = 0; dex < sizeof(double); dex++)
hash = ((hash << 8) + p[dex]) % hashsize;
return hash;
} }
#endif #endif
int data_eq(double d1, double d2) { bool data_eq(double d1, double d2) {
return (d1 == d2); return (d1 == d2);
} }
// const char *: // const char *:
int hashit(const char *str, int hashsize) { uint hashit(const char *str, uint hashsize) {
const byte *p = (const byte *)str; const byte *p = (const byte *)str;
int hash, dex; uint hash;
char c;
// my31 algo
hash = 0; hash = 0;
while ((c = *p++))
hash = (hash * 31 + c);
for (dex = 0; p[dex] != 0; dex++) return hash % hashsize;
hash = ((hash << 8) + p[dex]) % hashsize;
return hash;
} }
int data_eq(const char *str1, const char *str2) { bool data_eq(const char *str1, const char *str2) {
return !strcmp(str1, str2); return !strcmp(str1, str2);
} }
// String: // String:
int hashit(const Common::String &str, int hashsize) { uint hashit(const Common::String &str, uint hashsize) {
return hashit(str.c_str(), hashsize); return hashit(str.c_str(), hashsize);
} }
int data_eq(const Common::String &str1, const String &str2) { bool data_eq(const Common::String &str1, const String &str2) {
return (str1 == str2); return (str1 == str2);
} }
// The following table is taken from the GNU ISO C++ Library's hashtable.h file.
static const uint primes[] = {
53ul, 97ul, 193ul, 389ul, 769ul,
1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
1610612741ul, 3221225473ul, 4294967291ul
};
uint nextTableSize(uint x) {
int i = 0;
while (x >= primes[i])
i++;
return primes[i];
}
} // End of namespace Common } // End of namespace Common

View file

@ -64,8 +64,6 @@
namespace Common { namespace Common {
#define INIT_SIZE 11
typedef Common::String String; typedef Common::String String;
// If aa is an AssocArray<Key,Val>, then space is allocated each // If aa is an AssocArray<Key,Val>, then space is allocated each
@ -80,14 +78,25 @@ typedef Common::String String;
// be considered equal. Also, we assume that "=" works // be considered equal. Also, we assume that "=" works
// on Val's for assignment. // on Val's for assignment.
int hashit(int x, int hashsize); uint hashit(int x, uint hashsize);
int data_eq(int x, int y); bool data_eq(int x, int y);
int hashit(double x, int hashsize); uint hashit(double x, uint hashsize);
int data_eq(double x, double y); bool data_eq(double x, double y);
int hashit(const char *str, int hashsize); uint hashit(const char *str, uint hashsize);
int data_eq(const char *str1, const char *str2); bool data_eq(const char *str1, const char *str2);
int hashit(const String &str, int hashsize); uint hashit(const String &str, uint hashsize);
int data_eq(const String &str1, const String &str2); bool data_eq(const String &str1, const String &str2);
// The table sizes ideally are primes. We use a helper function to find
// suitable table sizes.
uint nextTableSize(uint x);
// Enable the following #define if you want to check how many collisions the
// code produces (many collisions indicate either a bad hash function, or a
// hash table that is too small).
//#define DEBUG_HASH_COLLISIONS
template <class Key, class Val> template <class Key, class Val>
class AssocArray { class AssocArray {
@ -101,7 +110,11 @@ private:
}; };
aa_ref_t **_arr; // hashtable of size arrsize. aa_ref_t **_arr; // hashtable of size arrsize.
int _arrsize, _nele; uint _arrsize, _nele;
#ifdef DEBUG_HASH_COLLISIONS
mutable int _collisions;
#endif
int lookup(const Key &key) const; int lookup(const Key &key) const;
void expand_array(void); void expand_array(void);
@ -137,9 +150,7 @@ public:
template <class Key, class Val> template <class Key, class Val>
int AssocArray<Key, Val>::lookup(const Key &key) const { int AssocArray<Key, Val>::lookup(const Key &key) const {
int ctr; uint ctr = hashit(key, _arrsize);
ctr = hashit(key, _arrsize);
while (_arr[ctr] != NULL && !data_eq(_arr[ctr]->key, key)) { while (_arr[ctr] != NULL && !data_eq(_arr[ctr]->key, key)) {
ctr++; ctr++;
@ -148,19 +159,23 @@ int AssocArray<Key, Val>::lookup(const Key &key) const {
ctr = 0; ctr = 0;
} }
#ifdef DEBUG_HASH_COLLISIONS
fprintf(stderr, "collisions = %d in AssocArray %p\n", _collisions, (const void *)this);
#endif
return ctr; return ctr;
} }
template <class Key, class Val> template <class Key, class Val>
bool AssocArray<Key, Val>::contains(const Key &key) const { bool AssocArray<Key, Val>::contains(const Key &key) const {
int ctr = lookup(key); uint ctr = lookup(key);
return (_arr[ctr] != NULL); return (_arr[ctr] != NULL);
} }
template <class Key, class Val> template <class Key, class Val>
Key *AssocArray<Key, Val>::new_all_keys(void) const { Key *AssocArray<Key, Val>::new_all_keys(void) const {
Key *all_keys; Key *all_keys;
int ctr, dex; uint ctr, dex;
if (_nele == 0) if (_nele == 0)
return NULL; return NULL;
@ -186,7 +201,7 @@ Key *AssocArray<Key, Val>::new_all_keys(void) const {
template <class Key, class Val> template <class Key, class Val>
Val *AssocArray<Key, Val>::new_all_values(void) const { Val *AssocArray<Key, Val>::new_all_values(void) const {
Val *all_values; Val *all_values;
int ctr, dex; uint ctr, dex;
if (_nele == 0) if (_nele == 0)
return NULL; return NULL;
@ -212,21 +227,24 @@ Val *AssocArray<Key, Val>::new_all_values(void) const {
template <class Key, class Val> template <class Key, class Val>
AssocArray<Key, Val>::AssocArray() { AssocArray<Key, Val>::AssocArray() {
int ctr; uint ctr;
_arr = new aa_ref_t *[INIT_SIZE]; _arrsize = nextTableSize(0);
_arr = new aa_ref_t *[_arrsize];
assert(_arr != NULL); assert(_arr != NULL);
for (ctr = 0; ctr < _arrsize; ctr++)
for (ctr = 0; ctr < INIT_SIZE; ctr++)
_arr[ctr] = NULL; _arr[ctr] = NULL;
_arrsize = INIT_SIZE;
_nele = 0; _nele = 0;
#ifdef DEBUG_HASH_COLLISIONS
_collisions = 0;
#endif
} }
template <class Key, class Val> template <class Key, class Val>
AssocArray<Key, Val>::~AssocArray() { AssocArray<Key, Val>::~AssocArray() {
int ctr; uint ctr;
for (ctr = 0; ctr < _arrsize; ctr++) for (ctr = 0; ctr < _arrsize; ctr++)
if (_arr[ctr] != NULL) if (_arr[ctr] != NULL)
@ -237,20 +255,20 @@ AssocArray<Key, Val>::~AssocArray() {
template <class Key, class Val> template <class Key, class Val>
void AssocArray<Key, Val>::clear(bool shrinkArray) { void AssocArray<Key, Val>::clear(bool shrinkArray) {
for (int ctr = 0; ctr < _arrsize; ctr++) { for (uint ctr = 0; ctr < _arrsize; ctr++) {
if (_arr[ctr] != NULL) { if (_arr[ctr] != NULL) {
delete _arr[ctr]; delete _arr[ctr];
_arr[ctr] = NULL; _arr[ctr] = NULL;
} }
} }
if (shrinkArray && _arrsize > INIT_SIZE) { if (shrinkArray && _arrsize > nextTableSize(0)) {
delete _arr; delete[] _arr;
_arr = new aa_ref_t *[INIT_SIZE]; _arrsize = nextTableSize(0);
_arrsize = INIT_SIZE; _arr = new aa_ref_t *[_arrsize];
assert(_arr != NULL);
for (int ctr = 0; ctr < _arrsize; ctr++) for (uint ctr = 0; ctr < _arrsize; ctr++)
_arr[ctr] = NULL; _arr[ctr] = NULL;
} }
@ -260,19 +278,14 @@ void AssocArray<Key, Val>::clear(bool shrinkArray) {
template <class Key, class Val> template <class Key, class Val>
void AssocArray<Key, Val>::expand_array(void) { void AssocArray<Key, Val>::expand_array(void) {
aa_ref_t **old_arr; aa_ref_t **old_arr;
int old_arrsize, old_nele, ctr, dex; uint old_arrsize, old_nele, ctr, dex;
old_nele = _nele; old_nele = _nele;
old_arr = _arr; old_arr = _arr;
old_arrsize = _arrsize; old_arrsize = _arrsize;
// GROWTH_FACTOR 1.531415936535
// allocate a new array // allocate a new array
_arrsize = 153 * old_arrsize / 100; _arrsize = nextTableSize(old_arrsize);
// Ensure that _arrsize is odd.
_arrsize |= 1;
_arr = new aa_ref_t *[_arrsize]; _arr = new aa_ref_t *[_arrsize];
assert(_arr != NULL); assert(_arr != NULL);
@ -307,12 +320,19 @@ void AssocArray<Key, Val>::expand_array(void) {
template <class Key, class Val> template <class Key, class Val>
Val &AssocArray<Key, Val>::operator [](const Key &key) { Val &AssocArray<Key, Val>::operator [](const Key &key) {
int ctr = lookup(key); uint ctr = lookup(key);
if (_arr[ctr] == NULL) { if (_arr[ctr] == NULL) {
_arr[ctr] = new aa_ref_t(key); _arr[ctr] = new aa_ref_t(key);
_nele++; _nele++;
#ifdef DEBUG_HASH_COLLISIONS
if (ctr != hashit(key, _arrsize)) {
_collisions++;
// fprintf(stderr, "collisions = %d\n", _collisions);
}
#endif
// Only fill array to fifty percent
if (_nele > _arrsize / 2) { if (_nele > _arrsize / 2) {
expand_array(); expand_array();
ctr = lookup(key); ctr = lookup(key);
@ -329,7 +349,7 @@ const Val &AssocArray<Key, Val>::operator [](const Key &key) const {
template <class Key, class Val> template <class Key, class Val>
const Val &AssocArray<Key, Val>::queryVal(const Key &key) const { const Val &AssocArray<Key, Val>::queryVal(const Key &key) const {
int ctr = lookup(key); uint ctr = lookup(key);
assert(_arr[ctr] != NULL); assert(_arr[ctr] != NULL);
return _arr[ctr]->dat; return _arr[ctr]->dat;
} }