- replaced the hash table size heuristic with a table of hard coded table sizes
(taken from the GNU ISO C++ Library), which are all prime - replaced the string hash function by one that works slightly better & faster - changed various types to unsigned - added code to help debug the number of hash collisions (off by default) svn-id: r21431
This commit is contained in:
parent
7307c4cb3d
commit
9dc07c11cd
2 changed files with 92 additions and 61 deletions
|
@ -60,57 +60,68 @@
|
||||||
namespace Common {
|
namespace Common {
|
||||||
|
|
||||||
// int:
|
// int:
|
||||||
int hashit(int x, int hashsize) {
|
uint hashit(int x, uint hashsize) {
|
||||||
return x % hashsize;
|
return x % hashsize;
|
||||||
}
|
}
|
||||||
|
|
||||||
int data_eq(int x, int y) {
|
bool data_eq(int x, int y) {
|
||||||
return x == y;
|
return x == y;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
// double:
|
// double:
|
||||||
int hashit(double d, int hashsize) {
|
uint hashit(double d, uint hashsize) {
|
||||||
int hash, dex;
|
TODO
|
||||||
byte *p = (byte *)&d;
|
|
||||||
|
|
||||||
hash = 0;
|
|
||||||
|
|
||||||
for (dex = 0; dex < sizeof(double); dex++)
|
|
||||||
hash = ((hash << 8) + p[dex]) % hashsize;
|
|
||||||
|
|
||||||
return hash;
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int data_eq(double d1, double d2) {
|
bool data_eq(double d1, double d2) {
|
||||||
return (d1 == d2);
|
return (d1 == d2);
|
||||||
}
|
}
|
||||||
|
|
||||||
// const char *:
|
// const char *:
|
||||||
int hashit(const char *str, int hashsize) {
|
uint hashit(const char *str, uint hashsize) {
|
||||||
const byte *p = (const byte *)str;
|
const byte *p = (const byte *)str;
|
||||||
int hash, dex;
|
uint hash;
|
||||||
|
char c;
|
||||||
|
|
||||||
|
// my31 algo
|
||||||
hash = 0;
|
hash = 0;
|
||||||
|
while ((c = *p++))
|
||||||
|
hash = (hash * 31 + c);
|
||||||
|
|
||||||
for (dex = 0; p[dex] != 0; dex++)
|
return hash % hashsize;
|
||||||
hash = ((hash << 8) + p[dex]) % hashsize;
|
|
||||||
|
|
||||||
return hash;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int data_eq(const char *str1, const char *str2) {
|
bool data_eq(const char *str1, const char *str2) {
|
||||||
return !strcmp(str1, str2);
|
return !strcmp(str1, str2);
|
||||||
}
|
}
|
||||||
|
|
||||||
// String:
|
// String:
|
||||||
int hashit(const Common::String &str, int hashsize) {
|
uint hashit(const Common::String &str, uint hashsize) {
|
||||||
return hashit(str.c_str(), hashsize);
|
return hashit(str.c_str(), hashsize);
|
||||||
}
|
}
|
||||||
|
|
||||||
int data_eq(const Common::String &str1, const String &str2) {
|
bool data_eq(const Common::String &str1, const String &str2) {
|
||||||
return (str1 == str2);
|
return (str1 == str2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The following table is taken from the GNU ISO C++ Library's hashtable.h file.
|
||||||
|
static const uint primes[] = {
|
||||||
|
53ul, 97ul, 193ul, 389ul, 769ul,
|
||||||
|
1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
|
||||||
|
49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
|
||||||
|
1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
|
||||||
|
50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
|
||||||
|
1610612741ul, 3221225473ul, 4294967291ul
|
||||||
|
};
|
||||||
|
|
||||||
|
uint nextTableSize(uint x) {
|
||||||
|
int i = 0;
|
||||||
|
while (x >= primes[i])
|
||||||
|
i++;
|
||||||
|
return primes[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
} // End of namespace Common
|
} // End of namespace Common
|
||||||
|
|
|
@ -64,8 +64,6 @@
|
||||||
|
|
||||||
namespace Common {
|
namespace Common {
|
||||||
|
|
||||||
#define INIT_SIZE 11
|
|
||||||
|
|
||||||
typedef Common::String String;
|
typedef Common::String String;
|
||||||
|
|
||||||
// If aa is an AssocArray<Key,Val>, then space is allocated each
|
// If aa is an AssocArray<Key,Val>, then space is allocated each
|
||||||
|
@ -80,14 +78,25 @@ typedef Common::String String;
|
||||||
// be considered equal. Also, we assume that "=" works
|
// be considered equal. Also, we assume that "=" works
|
||||||
// on Val's for assignment.
|
// on Val's for assignment.
|
||||||
|
|
||||||
int hashit(int x, int hashsize);
|
uint hashit(int x, uint hashsize);
|
||||||
int data_eq(int x, int y);
|
bool data_eq(int x, int y);
|
||||||
int hashit(double x, int hashsize);
|
uint hashit(double x, uint hashsize);
|
||||||
int data_eq(double x, double y);
|
bool data_eq(double x, double y);
|
||||||
int hashit(const char *str, int hashsize);
|
uint hashit(const char *str, uint hashsize);
|
||||||
int data_eq(const char *str1, const char *str2);
|
bool data_eq(const char *str1, const char *str2);
|
||||||
int hashit(const String &str, int hashsize);
|
uint hashit(const String &str, uint hashsize);
|
||||||
int data_eq(const String &str1, const String &str2);
|
bool data_eq(const String &str1, const String &str2);
|
||||||
|
|
||||||
|
|
||||||
|
// The table sizes ideally are primes. We use a helper function to find
|
||||||
|
// suitable table sizes.
|
||||||
|
uint nextTableSize(uint x);
|
||||||
|
|
||||||
|
|
||||||
|
// Enable the following #define if you want to check how many collisions the
|
||||||
|
// code produces (many collisions indicate either a bad hash function, or a
|
||||||
|
// hash table that is too small).
|
||||||
|
//#define DEBUG_HASH_COLLISIONS
|
||||||
|
|
||||||
template <class Key, class Val>
|
template <class Key, class Val>
|
||||||
class AssocArray {
|
class AssocArray {
|
||||||
|
@ -101,7 +110,11 @@ private:
|
||||||
};
|
};
|
||||||
|
|
||||||
aa_ref_t **_arr; // hashtable of size arrsize.
|
aa_ref_t **_arr; // hashtable of size arrsize.
|
||||||
int _arrsize, _nele;
|
uint _arrsize, _nele;
|
||||||
|
|
||||||
|
#ifdef DEBUG_HASH_COLLISIONS
|
||||||
|
mutable int _collisions;
|
||||||
|
#endif
|
||||||
|
|
||||||
int lookup(const Key &key) const;
|
int lookup(const Key &key) const;
|
||||||
void expand_array(void);
|
void expand_array(void);
|
||||||
|
@ -137,9 +150,7 @@ public:
|
||||||
|
|
||||||
template <class Key, class Val>
|
template <class Key, class Val>
|
||||||
int AssocArray<Key, Val>::lookup(const Key &key) const {
|
int AssocArray<Key, Val>::lookup(const Key &key) const {
|
||||||
int ctr;
|
uint ctr = hashit(key, _arrsize);
|
||||||
|
|
||||||
ctr = hashit(key, _arrsize);
|
|
||||||
|
|
||||||
while (_arr[ctr] != NULL && !data_eq(_arr[ctr]->key, key)) {
|
while (_arr[ctr] != NULL && !data_eq(_arr[ctr]->key, key)) {
|
||||||
ctr++;
|
ctr++;
|
||||||
|
@ -148,19 +159,23 @@ int AssocArray<Key, Val>::lookup(const Key &key) const {
|
||||||
ctr = 0;
|
ctr = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef DEBUG_HASH_COLLISIONS
|
||||||
|
fprintf(stderr, "collisions = %d in AssocArray %p\n", _collisions, (const void *)this);
|
||||||
|
#endif
|
||||||
|
|
||||||
return ctr;
|
return ctr;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Key, class Val>
|
template <class Key, class Val>
|
||||||
bool AssocArray<Key, Val>::contains(const Key &key) const {
|
bool AssocArray<Key, Val>::contains(const Key &key) const {
|
||||||
int ctr = lookup(key);
|
uint ctr = lookup(key);
|
||||||
return (_arr[ctr] != NULL);
|
return (_arr[ctr] != NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Key, class Val>
|
template <class Key, class Val>
|
||||||
Key *AssocArray<Key, Val>::new_all_keys(void) const {
|
Key *AssocArray<Key, Val>::new_all_keys(void) const {
|
||||||
Key *all_keys;
|
Key *all_keys;
|
||||||
int ctr, dex;
|
uint ctr, dex;
|
||||||
|
|
||||||
if (_nele == 0)
|
if (_nele == 0)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -186,7 +201,7 @@ Key *AssocArray<Key, Val>::new_all_keys(void) const {
|
||||||
template <class Key, class Val>
|
template <class Key, class Val>
|
||||||
Val *AssocArray<Key, Val>::new_all_values(void) const {
|
Val *AssocArray<Key, Val>::new_all_values(void) const {
|
||||||
Val *all_values;
|
Val *all_values;
|
||||||
int ctr, dex;
|
uint ctr, dex;
|
||||||
|
|
||||||
if (_nele == 0)
|
if (_nele == 0)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -212,21 +227,24 @@ Val *AssocArray<Key, Val>::new_all_values(void) const {
|
||||||
|
|
||||||
template <class Key, class Val>
|
template <class Key, class Val>
|
||||||
AssocArray<Key, Val>::AssocArray() {
|
AssocArray<Key, Val>::AssocArray() {
|
||||||
int ctr;
|
uint ctr;
|
||||||
|
|
||||||
_arr = new aa_ref_t *[INIT_SIZE];
|
_arrsize = nextTableSize(0);
|
||||||
|
_arr = new aa_ref_t *[_arrsize];
|
||||||
assert(_arr != NULL);
|
assert(_arr != NULL);
|
||||||
|
for (ctr = 0; ctr < _arrsize; ctr++)
|
||||||
for (ctr = 0; ctr < INIT_SIZE; ctr++)
|
|
||||||
_arr[ctr] = NULL;
|
_arr[ctr] = NULL;
|
||||||
|
|
||||||
_arrsize = INIT_SIZE;
|
|
||||||
_nele = 0;
|
_nele = 0;
|
||||||
|
|
||||||
|
#ifdef DEBUG_HASH_COLLISIONS
|
||||||
|
_collisions = 0;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Key, class Val>
|
template <class Key, class Val>
|
||||||
AssocArray<Key, Val>::~AssocArray() {
|
AssocArray<Key, Val>::~AssocArray() {
|
||||||
int ctr;
|
uint ctr;
|
||||||
|
|
||||||
for (ctr = 0; ctr < _arrsize; ctr++)
|
for (ctr = 0; ctr < _arrsize; ctr++)
|
||||||
if (_arr[ctr] != NULL)
|
if (_arr[ctr] != NULL)
|
||||||
|
@ -237,20 +255,20 @@ AssocArray<Key, Val>::~AssocArray() {
|
||||||
|
|
||||||
template <class Key, class Val>
|
template <class Key, class Val>
|
||||||
void AssocArray<Key, Val>::clear(bool shrinkArray) {
|
void AssocArray<Key, Val>::clear(bool shrinkArray) {
|
||||||
for (int ctr = 0; ctr < _arrsize; ctr++) {
|
for (uint ctr = 0; ctr < _arrsize; ctr++) {
|
||||||
if (_arr[ctr] != NULL) {
|
if (_arr[ctr] != NULL) {
|
||||||
delete _arr[ctr];
|
delete _arr[ctr];
|
||||||
_arr[ctr] = NULL;
|
_arr[ctr] = NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (shrinkArray && _arrsize > INIT_SIZE) {
|
if (shrinkArray && _arrsize > nextTableSize(0)) {
|
||||||
delete _arr;
|
delete[] _arr;
|
||||||
|
|
||||||
_arr = new aa_ref_t *[INIT_SIZE];
|
_arrsize = nextTableSize(0);
|
||||||
_arrsize = INIT_SIZE;
|
_arr = new aa_ref_t *[_arrsize];
|
||||||
|
assert(_arr != NULL);
|
||||||
for (int ctr = 0; ctr < _arrsize; ctr++)
|
for (uint ctr = 0; ctr < _arrsize; ctr++)
|
||||||
_arr[ctr] = NULL;
|
_arr[ctr] = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -260,19 +278,14 @@ void AssocArray<Key, Val>::clear(bool shrinkArray) {
|
||||||
template <class Key, class Val>
|
template <class Key, class Val>
|
||||||
void AssocArray<Key, Val>::expand_array(void) {
|
void AssocArray<Key, Val>::expand_array(void) {
|
||||||
aa_ref_t **old_arr;
|
aa_ref_t **old_arr;
|
||||||
int old_arrsize, old_nele, ctr, dex;
|
uint old_arrsize, old_nele, ctr, dex;
|
||||||
|
|
||||||
old_nele = _nele;
|
old_nele = _nele;
|
||||||
old_arr = _arr;
|
old_arr = _arr;
|
||||||
old_arrsize = _arrsize;
|
old_arrsize = _arrsize;
|
||||||
|
|
||||||
// GROWTH_FACTOR 1.531415936535
|
|
||||||
// allocate a new array
|
// allocate a new array
|
||||||
_arrsize = 153 * old_arrsize / 100;
|
_arrsize = nextTableSize(old_arrsize);
|
||||||
|
|
||||||
// Ensure that _arrsize is odd.
|
|
||||||
_arrsize |= 1;
|
|
||||||
|
|
||||||
_arr = new aa_ref_t *[_arrsize];
|
_arr = new aa_ref_t *[_arrsize];
|
||||||
|
|
||||||
assert(_arr != NULL);
|
assert(_arr != NULL);
|
||||||
|
@ -307,12 +320,19 @@ void AssocArray<Key, Val>::expand_array(void) {
|
||||||
|
|
||||||
template <class Key, class Val>
|
template <class Key, class Val>
|
||||||
Val &AssocArray<Key, Val>::operator [](const Key &key) {
|
Val &AssocArray<Key, Val>::operator [](const Key &key) {
|
||||||
int ctr = lookup(key);
|
uint ctr = lookup(key);
|
||||||
|
|
||||||
if (_arr[ctr] == NULL) {
|
if (_arr[ctr] == NULL) {
|
||||||
_arr[ctr] = new aa_ref_t(key);
|
_arr[ctr] = new aa_ref_t(key);
|
||||||
_nele++;
|
_nele++;
|
||||||
|
|
||||||
|
#ifdef DEBUG_HASH_COLLISIONS
|
||||||
|
if (ctr != hashit(key, _arrsize)) {
|
||||||
|
_collisions++;
|
||||||
|
// fprintf(stderr, "collisions = %d\n", _collisions);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
// Only fill array to fifty percent
|
||||||
if (_nele > _arrsize / 2) {
|
if (_nele > _arrsize / 2) {
|
||||||
expand_array();
|
expand_array();
|
||||||
ctr = lookup(key);
|
ctr = lookup(key);
|
||||||
|
@ -329,7 +349,7 @@ const Val &AssocArray<Key, Val>::operator [](const Key &key) const {
|
||||||
|
|
||||||
template <class Key, class Val>
|
template <class Key, class Val>
|
||||||
const Val &AssocArray<Key, Val>::queryVal(const Key &key) const {
|
const Val &AssocArray<Key, Val>::queryVal(const Key &key) const {
|
||||||
int ctr = lookup(key);
|
uint ctr = lookup(key);
|
||||||
assert(_arr[ctr] != NULL);
|
assert(_arr[ctr] != NULL);
|
||||||
return _arr[ctr]->dat;
|
return _arr[ctr]->dat;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue