1. 本文参考SGI STL中Hash Table的实现以及《STL源码剖析》;
2. Hash Table可提供对任何有名项的存取操作和删除操作。由于操作对象是有名项,所以Hash Table也可被视为一种字典结构。这种结构的用意在于提供常数时间之基本操作;
3. 学习Hash Table的初衷来自于面试一家公司的时候问到这个问题,本来以为本科的时候已经学的不错了,就没复习,没想到被面试官一问,各种不知道……囧……还得继续埋头学习。
4. 这里有一个比较重要的策略要注意,也是我以前学习的时候忽略掉的:当元素的个数(包括新增的元素)大于bucket vector的大小时,表格需要重建,新的buctet vector大小是__stl_prime_list中当前vector大小后面那个数字,用__stl_next_prime来查找。这个质数表有个特点,除了第一个数字,其他每一个数字都大约是前一个的两倍。
以下是源代码(为了便于理解学习,程序没有使用template,以size_t为例):
#ifndef _STAN_SEPARATE_CHAINING_H_ #define _STAN_SEPARATE_CHAINING_H_ #include <vector> using namespace std; typedef struct _Hashtable_node { struct _Hashtable_node* next; size_t val; }node, *pnode; // Note: assumes long is at least 32 bits. enum { __stl_num_primes = 28 }; unsigned long* lower_bound(unsigned long* first, const unsigned long* last, const size_t& value) { size_t len = last - first; size_t half; unsigned long* middle; while (len > 0) { half = len >> 1; middle = first + half; if (*middle < value) { first = middle + 1; len = len - half - 1; } else { len = half; } } return first; } static const unsigned long __stl_prime_list[__stl_num_primes] = { 53ul, 97ul, 193ul, 389ul, 769ul, 1543ul, 3079ul, 6151ul, 12289ul, 24593ul, 49157ul, 98317ul, 196613ul, 393241ul, 786433ul, 1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul, 50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul, 1610612741ul, 3221225473ul, 4294967291ul }; inline unsigned long __stl_next_prime(unsigned long __n) { const unsigned long* __first = __stl_prime_list; const unsigned long* __last = __stl_prime_list + (int)__stl_num_primes; const unsigned long* pos = lower_bound(const_cast<unsigned long*>(__first), const_cast<unsigned long*>(__last), __n); return pos == __last ? *(__last - 1) : *pos; } class hashtable { public: //初始化一个hashtable hashtable(const size_t n) { const size_t n_buckets = __stl_next_prime(n); buckets.reserve(n_buckets); buckets.insert(buckets.end(), n_buckets, (pnode)0); num_elements = 0; } //创建一个新结点 node* new_node(const size_t& obj) { node* n = new node(); n->next = 0; n->val = obj; return n; } //删除一个结点 void delete_node(node* n) { n->next = 0; n->val = 0; delete n; n = 0; } //计算所在的bucket size_t bkt_num(const size_t& obj, size_t n) const { return obj % n; } //插入不重复元素 void insert_unique(const size_t& obj) { resize(num_elements + 1); insert_unique_noresize(obj); } void resize(size_t num_elements_hint) { const size_t old_n = buckets.size(); if (num_elements_hint > old_n) { const size_t n = __stl_next_prime(num_elements_hint); if (n > old_n) { vector<pnode> tmp(n); for (size_t bucket = 0; bucket < old_n; ++bucket) { pnode first = buckets[bucket]; while (first) { size_t new_bucket = bkt_num(first->val, n); buckets[bucket] = first->next; first->next = tmp[new_bucket]; tmp[new_bucket] = first; first = buckets[bucket]; } } buckets.swap(tmp); } } } void insert_unique_noresize(const size_t& obj) { const size_t n = bkt_num(obj, buckets.size()); pnode first = buckets[n]; for (pnode cur = first; cur; cur = cur->next) { if (cur->val == obj) { return; } } pnode tmp = new_node(obj); tmp->next = first; buckets[n] = tmp; ++num_elements; } void insert_equal(const size_t& obj) { resize(num_elements + 1); insert_equal_noresize(obj); } void insert_equal_noresize(const size_t& obj) { const size_t n = bkt_num(obj, buckets.size()); pnode first = buckets[n]; for (pnode cur = first; cur; cur = cur->next) { if (cur->val == obj) { node* tmp = new_node(obj); tmp->next = cur->next; cur->next = tmp; ++num_elements; return; } } node* tmp = new_node(obj); tmp->next = first; buckets[n] = tmp; ++num_elements; } //返回所找目标的位置 node* find(const size_t& obj) { size_t n = bkt_num(obj, buckets.size()); node* first; for (first = buckets[n];first && !(first->val == obj); first = first->next) { } return first; } //如果有重复的元素,都要删除 size_t erase(const size_t& obj) { const size_t n = bkt_num(obj, buckets.size()); pnode first = buckets[n]; size_t erased = 0; if (first) { pnode cur = first; pnode next = cur->next; while (next) { if (next->val == obj) { cur->next = next->next; delete_node(next); next = cur->next; ++erased; --num_elements; } else { cur = next; next = cur->next; } } if (first->val == obj) { buckets[n] = first->next; delete_node(first); ++erased; --num_elements; } } return erased; } size_t bucket_count() const { return buckets.size(); } size_t size() const { return num_elements; } private: size_t num_elements; vector<pnode> buckets; }; #endif