Revised HashMap implementation
svn-id: r34273
This commit is contained in:
parent
155b8606c1
commit
31ce5eb496
4 changed files with 231 additions and 179 deletions
165
common/hashmap.h
165
common/hashmap.h
|
@ -24,32 +24,8 @@
|
|||
*/
|
||||
|
||||
// The hash map (associative array) implementation in this file is
|
||||
// based on code by Andrew Y. Ng, 1996:
|
||||
|
||||
/*
|
||||
* Copyright (c) 1998-2003 Massachusetts Institute of Technology.
|
||||
* This code was developed as part of the Haystack research project
|
||||
* (http://haystack.lcs.mit.edu/). Permission is hereby granted,
|
||||
* free of charge, to any person obtaining a copy of this software
|
||||
* and associated documentation files (the "Software"), to deal in
|
||||
* the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute,
|
||||
* sublicense, and/or sell copies of the Software, and to permit
|
||||
* persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
// based on the PyDict implementation of CPython. The erase() method
|
||||
// is based on example code in the Wikipedia article on Hash tables.
|
||||
|
||||
#ifndef COMMON_HASHMAP_H
|
||||
#define COMMON_HASHMAP_H
|
||||
|
@ -74,11 +50,6 @@
|
|||
|
||||
namespace Common {
|
||||
|
||||
// The table sizes ideally are primes. We use a helper function to find
|
||||
// suitable table sizes.
|
||||
uint nextTableSize(uint x);
|
||||
|
||||
|
||||
// Enable the following #define if you want to check how many collisions the
|
||||
// code produces (many collisions indicate either a bad hash function, or a
|
||||
// hash table that is too small).
|
||||
|
@ -113,9 +84,24 @@ public:
|
|||
Node(const Key &key) : _key(key), _value() {}
|
||||
};
|
||||
|
||||
enum {
|
||||
HASHMAP_PERTURB_SHIFT = 5,
|
||||
HASHMAP_MIN_CAPACITY = 16,
|
||||
|
||||
// The quotient of the next two constants controls how much the
|
||||
// internal storage of the hashmap may fill up before being
|
||||
// increased automatically.
|
||||
// Note: the quotient of these two must be between and different
|
||||
// from 0 and 1.
|
||||
HASHMAP_LOADFACTOR_NUMERATOR = 2,
|
||||
HASHMAP_LOADFACTOR_DENOMINATOR = 3,
|
||||
|
||||
HASHMAP_MEMORYPOOL_SIZE = HASHMAP_MIN_CAPACITY * HASHMAP_LOADFACTOR_NUMERATOR / HASHMAP_LOADFACTOR_DENOMINATOR
|
||||
};
|
||||
|
||||
|
||||
#ifdef USE_HASHMAP_MEMORY_POOL
|
||||
MemoryPool _nodePool;
|
||||
FixedSizeMemoryPool<sizeof(Node), HASHMAP_MEMORYPOOL_SIZE> _nodePool;
|
||||
|
||||
Node *allocNode(const Key &key) {
|
||||
void* mem = _nodePool.malloc();
|
||||
|
@ -137,7 +123,7 @@ public:
|
|||
#endif
|
||||
|
||||
Node **_storage; // hashtable of size arrsize.
|
||||
uint _capacity;
|
||||
uint _mask; /**< Capacity of the HashMap minus one; must be a power of two of minus one */
|
||||
uint _size;
|
||||
|
||||
HashFunc _hash;
|
||||
|
@ -153,7 +139,7 @@ public:
|
|||
void assign(const HM_t &map);
|
||||
int lookup(const Key &key) const;
|
||||
int lookupAndCreateIfMissing(const Key &key);
|
||||
void expand_array(uint newsize);
|
||||
void expandStorage(uint newCapacity);
|
||||
|
||||
template<class T> friend class IteratorImpl;
|
||||
|
||||
|
@ -175,7 +161,7 @@ public:
|
|||
|
||||
NodeType *deref() const {
|
||||
assert(_hashmap != 0);
|
||||
assert(_idx < _hashmap->_capacity);
|
||||
assert(_idx <= _hashmap->_mask);
|
||||
Node *node = _hashmap->_storage[_idx];
|
||||
assert(node != 0);
|
||||
return node;
|
||||
|
@ -196,8 +182,8 @@ public:
|
|||
assert(_hashmap);
|
||||
do {
|
||||
_idx++;
|
||||
} while (_idx < _hashmap->_capacity && _hashmap->_storage[_idx] == 0);
|
||||
if (_idx >= _hashmap->_capacity)
|
||||
} while (_idx <= _hashmap->_mask && _hashmap->_storage[_idx] == 0);
|
||||
if (_idx > _hashmap->_mask)
|
||||
_idx = (uint)-1;
|
||||
|
||||
return *this;
|
||||
|
@ -247,7 +233,7 @@ public:
|
|||
|
||||
iterator begin() {
|
||||
// Find and return the _key non-empty entry
|
||||
for (uint ctr = 0; ctr < _capacity; ++ctr) {
|
||||
for (uint ctr = 0; ctr <= _mask; ++ctr) {
|
||||
if (_storage[ctr])
|
||||
return iterator(ctr, this);
|
||||
}
|
||||
|
@ -259,7 +245,7 @@ public:
|
|||
|
||||
const_iterator begin() const {
|
||||
// Find and return the first non-empty entry
|
||||
for (uint ctr = 0; ctr < _capacity; ++ctr) {
|
||||
for (uint ctr = 0; ctr <= _mask; ++ctr) {
|
||||
if (_storage[ctr])
|
||||
return const_iterator(ctr, this);
|
||||
}
|
||||
|
@ -298,14 +284,11 @@ public:
|
|||
*/
|
||||
template<class Key, class Val, class HashFunc, class EqualFunc>
|
||||
HashMap<Key, Val, HashFunc, EqualFunc>::HashMap() :
|
||||
#ifdef USE_HASHMAP_MEMORY_POOL
|
||||
_nodePool(sizeof(Node)),
|
||||
#endif
|
||||
_defaultVal() {
|
||||
_capacity = nextTableSize(0);
|
||||
_storage = new Node *[_capacity];
|
||||
_mask = HASHMAP_MIN_CAPACITY - 1;
|
||||
_storage = new Node *[HASHMAP_MIN_CAPACITY];
|
||||
assert(_storage != NULL);
|
||||
memset(_storage, 0, _capacity * sizeof(Node *));
|
||||
memset(_storage, 0, HASHMAP_MIN_CAPACITY * sizeof(Node *));
|
||||
|
||||
_size = 0;
|
||||
|
||||
|
@ -322,9 +305,6 @@ HashMap<Key, Val, HashFunc, EqualFunc>::HashMap() :
|
|||
*/
|
||||
template<class Key, class Val, class HashFunc, class EqualFunc>
|
||||
HashMap<Key, Val, HashFunc, EqualFunc>::HashMap(const HM_t &map) :
|
||||
#ifdef USE_HASHMAP_MEMORY_POOL
|
||||
_nodePool(sizeof(Node)),
|
||||
#endif
|
||||
_defaultVal() {
|
||||
assign(map);
|
||||
}
|
||||
|
@ -334,14 +314,14 @@ HashMap<Key, Val, HashFunc, EqualFunc>::HashMap(const HM_t &map) :
|
|||
*/
|
||||
template<class Key, class Val, class HashFunc, class EqualFunc>
|
||||
HashMap<Key, Val, HashFunc, EqualFunc>::~HashMap() {
|
||||
for (uint ctr = 0; ctr < _capacity; ++ctr)
|
||||
for (uint ctr = 0; ctr <= _mask; ++ctr)
|
||||
if (_storage[ctr] != NULL)
|
||||
freeNode(_storage[ctr]);
|
||||
|
||||
delete[] _storage;
|
||||
#ifdef DEBUG_HASH_COLLISIONS
|
||||
extern void updateHashCollisionStats(int, int, int, int);
|
||||
updateHashCollisionStats(_collisions, _lookups, _capacity, _size);
|
||||
updateHashCollisionStats(_collisions, _lookups, _mask+1, _size);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -354,14 +334,14 @@ HashMap<Key, Val, HashFunc, EqualFunc>::~HashMap() {
|
|||
*/
|
||||
template<class Key, class Val, class HashFunc, class EqualFunc>
|
||||
void HashMap<Key, Val, HashFunc, EqualFunc>::assign(const HM_t &map) {
|
||||
_capacity = map._capacity;
|
||||
_storage = new Node *[_capacity];
|
||||
_mask = map._mask;
|
||||
_storage = new Node *[_mask+1];
|
||||
assert(_storage != NULL);
|
||||
memset(_storage, 0, _capacity * sizeof(Node *));
|
||||
memset(_storage, 0, (_mask+1) * sizeof(Node *));
|
||||
|
||||
// Simply clone the map given to us, one by one.
|
||||
_size = 0;
|
||||
for (uint ctr = 0; ctr < _capacity; ++ctr) {
|
||||
for (uint ctr = 0; ctr <= _mask; ++ctr) {
|
||||
if (map._storage[ctr] != NULL) {
|
||||
_storage[ctr] = allocNode(map._storage[ctr]->_key);
|
||||
_storage[ctr]->_value = map._storage[ctr]->_value;
|
||||
|
@ -375,43 +355,46 @@ void HashMap<Key, Val, HashFunc, EqualFunc>::assign(const HM_t &map) {
|
|||
|
||||
template<class Key, class Val, class HashFunc, class EqualFunc>
|
||||
void HashMap<Key, Val, HashFunc, EqualFunc>::clear(bool shrinkArray) {
|
||||
for (uint ctr = 0; ctr < _capacity; ++ctr) {
|
||||
for (uint ctr = 0; ctr <= _mask; ++ctr) {
|
||||
if (_storage[ctr] != NULL) {
|
||||
freeNode(_storage[ctr]);
|
||||
_storage[ctr] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if (shrinkArray && _capacity > nextTableSize(0)) {
|
||||
#ifdef USE_HASHMAP_MEMORY_POOL
|
||||
_nodePool.freeUnusedPages();
|
||||
#endif
|
||||
|
||||
if (shrinkArray && _mask >= HASHMAP_MIN_CAPACITY) {
|
||||
delete[] _storage;
|
||||
|
||||
_capacity = nextTableSize(0);
|
||||
_storage = new Node *[_capacity];
|
||||
_mask = HASHMAP_MIN_CAPACITY;
|
||||
_storage = new Node *[HASHMAP_MIN_CAPACITY];
|
||||
assert(_storage != NULL);
|
||||
memset(_storage, 0, _capacity * sizeof(Node *));
|
||||
memset(_storage, 0, HASHMAP_MIN_CAPACITY * sizeof(Node *));
|
||||
}
|
||||
|
||||
_size = 0;
|
||||
}
|
||||
|
||||
template<class Key, class Val, class HashFunc, class EqualFunc>
|
||||
void HashMap<Key, Val, HashFunc, EqualFunc>::expand_array(uint newsize) {
|
||||
assert(newsize > _capacity);
|
||||
uint ctr, dex;
|
||||
void HashMap<Key, Val, HashFunc, EqualFunc>::expandStorage(uint newCapacity) {
|
||||
assert(newCapacity > _mask+1);
|
||||
|
||||
const uint old_size = _size;
|
||||
const uint old_capacity = _capacity;
|
||||
const uint old_mask = _mask;
|
||||
Node **old_storage = _storage;
|
||||
|
||||
// allocate a new array
|
||||
_size = 0;
|
||||
_capacity = newsize;
|
||||
_storage = new Node *[_capacity];
|
||||
_mask = newCapacity - 1;
|
||||
_storage = new Node *[newCapacity];
|
||||
assert(_storage != NULL);
|
||||
memset(_storage, 0, _capacity * sizeof(Node *));
|
||||
memset(_storage, 0, newCapacity * sizeof(Node *));
|
||||
|
||||
// rehash all the old elements
|
||||
for (ctr = 0; ctr < old_capacity; ++ctr) {
|
||||
for (uint ctr = 0; ctr <= old_mask; ++ctr) {
|
||||
if (old_storage[ctr] == NULL)
|
||||
continue;
|
||||
|
||||
|
@ -419,12 +402,13 @@ void HashMap<Key, Val, HashFunc, EqualFunc>::expand_array(uint newsize) {
|
|||
// Since we know that no key exists twice in the old table, we
|
||||
// can do this slightly better than by calling lookup, since we
|
||||
// don't have to call _equal().
|
||||
dex = _hash(old_storage[ctr]->_key) % _capacity;
|
||||
while (_storage[dex] != NULL) {
|
||||
dex = (dex + 1) % _capacity;
|
||||
const uint hash = _hash(old_storage[ctr]->_key);
|
||||
uint idx = hash & _mask;
|
||||
for (uint perturb = hash; _storage[idx] != NULL; perturb >>= HASHMAP_PERTURB_SHIFT) {
|
||||
idx = (5 * idx + perturb + 1) & _mask;
|
||||
}
|
||||
|
||||
_storage[dex] = old_storage[ctr];
|
||||
_storage[idx] = old_storage[ctr];
|
||||
_size++;
|
||||
}
|
||||
|
||||
|
@ -439,10 +423,13 @@ void HashMap<Key, Val, HashFunc, EqualFunc>::expand_array(uint newsize) {
|
|||
|
||||
template<class Key, class Val, class HashFunc, class EqualFunc>
|
||||
int HashMap<Key, Val, HashFunc, EqualFunc>::lookup(const Key &key) const {
|
||||
uint ctr = _hash(key) % _capacity;
|
||||
const uint hash = _hash(key);
|
||||
uint ctr = hash & _mask;
|
||||
for (uint perturb = hash; ; perturb >>= HASHMAP_PERTURB_SHIFT) {
|
||||
if (_storage[ctr] == NULL || _equal(_storage[ctr]->_key, key))
|
||||
break;
|
||||
|
||||
while (_storage[ctr] != NULL && !_equal(_storage[ctr]->_key, key)) {
|
||||
ctr = (ctr + 1) % _capacity;
|
||||
ctr = (5 * ctr + perturb + 1) & _mask;
|
||||
|
||||
#ifdef DEBUG_HASH_COLLISIONS
|
||||
_collisions++;
|
||||
|
@ -453,7 +440,7 @@ int HashMap<Key, Val, HashFunc, EqualFunc>::lookup(const Key &key) const {
|
|||
_lookups++;
|
||||
fprintf(stderr, "collisions %d, lookups %d, ratio %f in HashMap %p; size %d num elements %d\n",
|
||||
_collisions, _lookups, ((double) _collisions / (double)_lookups),
|
||||
(const void *)this, _capacity, _size);
|
||||
(const void *)this, _mask+1, _size);
|
||||
#endif
|
||||
|
||||
return ctr;
|
||||
|
@ -467,9 +454,11 @@ int HashMap<Key, Val, HashFunc, EqualFunc>::lookupAndCreateIfMissing(const Key &
|
|||
_storage[ctr] = allocNode(key);
|
||||
_size++;
|
||||
|
||||
// Keep the load factor below 75%.
|
||||
if (_size > _capacity * 75 / 100) {
|
||||
expand_array(nextTableSize(_capacity));
|
||||
// Keep the load factor below a certain threshold.
|
||||
uint capacity = _mask + 1;
|
||||
if (_size * HASHMAP_LOADFACTOR_DENOMINATOR > capacity * HASHMAP_LOADFACTOR_NUMERATOR) {
|
||||
capacity = capacity < 500 ? (capacity * 4) : (capacity * 2);
|
||||
expandStorage(capacity);
|
||||
ctr = lookup(key);
|
||||
}
|
||||
}
|
||||
|
@ -520,23 +509,35 @@ void HashMap<Key, Val, HashFunc, EqualFunc>::setVal(const Key &key, const Val &v
|
|||
template<class Key, class Val, class HashFunc, class EqualFunc>
|
||||
void HashMap<Key, Val, HashFunc, EqualFunc>::erase(const Key &key) {
|
||||
// This is based on code in the Wikipedia article on Hash tables.
|
||||
uint i = lookup(key);
|
||||
|
||||
const uint hash = _hash(key);
|
||||
uint i = hash & _mask;
|
||||
uint perturb;
|
||||
|
||||
for (perturb = hash; ; perturb >>= HASHMAP_PERTURB_SHIFT) {
|
||||
if (_storage[i] == NULL || _equal(_storage[i]->_key, key))
|
||||
break;
|
||||
|
||||
i = (5 * i + perturb + 1) & _mask;
|
||||
}
|
||||
|
||||
if (_storage[i] == NULL)
|
||||
return; // key wasn't present, so no work has to be done
|
||||
|
||||
// If we remove a key, we must check all subsequent keys and possibly
|
||||
// reinsert them.
|
||||
uint j = i;
|
||||
freeNode(_storage[i]);
|
||||
_storage[i] = NULL;
|
||||
while (true) {
|
||||
for (perturb = hash; ; perturb >>= HASHMAP_PERTURB_SHIFT) {
|
||||
// Look at the next table slot
|
||||
j = (j + 1) % _capacity;
|
||||
j = (5 * j + perturb + 1) & _mask;
|
||||
// If the next slot is empty, we are done
|
||||
if (_storage[j] == NULL)
|
||||
break;
|
||||
// Compute the slot where the content of the next slot should normally be,
|
||||
// assuming an empty table, and check whether we have to move it.
|
||||
uint k = _hash(_storage[j]->_key) % _capacity;
|
||||
uint k = _hash(_storage[j]->_key) & _mask;
|
||||
if ((j > i && (k <= i || k > j)) ||
|
||||
(j < i && (k <= i && k > j)) ) {
|
||||
_storage[i] = _storage[j];
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue