Memcached源码分析之Hash表扩容
Hash表是Memcached里面最重要的结构之一,其采用链接法来处理Hash冲突,当Hash表中的项太多时,也就是Hash冲突比较高的时候,Hash表的遍历就脱变成单链表,此时为了提供Hash的性能,Hash表需要扩容,Memcached的扩容条件是当表中元素个数超过Hash容量的1.5倍时就进行扩容,扩容过程由独立的线程来完成,扩容过程中会采用2个Hash表,将老表中的数据通过Hash算法映射到新表中,每次移动的桶的数目可以配置,默认是每次移动老表中的1个桶。
-
- int assoc_insert(item *it, const uint32_t hv) {
- unsigned int oldbucket;
-
- if (expanding &&(oldbucket = (hv & hashmask(hashpower - 1))) >= expand_bucket)
- {
- it->h_next = old_hashtable[oldbucket];
- old_hashtable[oldbucket] = it;
- } else {
- it->h_next = primary_hashtable[hv & hashmask(hashpower)];
- primary_hashtable[hv & hashmask(hashpower)] = it;
- }
-
- hash_items++;
-
- if (! expanding && hash_items > (hashsize(hashpower) * 3) / 2) {
- assoc_start_expand();
- }
-
- MEMCACHED_ASSOC_INSERT(ITEM_key(it), it->nkey, hash_items);
- return 1;
- }
-
- static void assoc_start_expand(void) {
- if (started_expanding)
- return;
- started_expanding = true;
- pthread_cond_signal(&maintenance_cond);
- }
-
- static void *assoc_maintenance_thread(void *arg) {
-
- while (do_run_maintenance_thread) {
- int ii = 0;
-
- item_lock_global();
- mutex_lock(&cache_lock);
-
- for (ii = 0; ii < hash_bulk_move && expanding; ++ii) {
- item *it, *next;
- int bucket;
-
- for (it = old_hashtable[expand_bucket]; NULL != it; it = next) {
- next = it->h_next;
-
- bucket = hash(ITEM_key(it), it->nkey, 0) & hashmask(hashpower);
- it->h_next = primary_hashtable[bucket];
- primary_hashtable[bucket] = it;
- }
-
- old_hashtable[expand_bucket] = NULL;
-
- expand_bucket++;
- if (expand_bucket == hashsize(hashpower - 1)) {
- expanding = false;
- free(old_hashtable);
- STATS_LOCK();
- stats.hash_bytes -= hashsize(hashpower - 1) * sizeof(void *);
- stats.hash_is_expanding = 0;
- STATS_UNLOCK();
- if (settings.verbose > 1)
- fprintf(stderr, "Hash table expansion done\n");
- }
- }
-
- mutex_unlock(&cache_lock);
- item_unlock_global();
-
- if (!expanding) {
-
- switch_item_lock_type(ITEM_LOCK_GRANULAR);
- slabs_rebalancer_resume();
-
- mutex_lock(&cache_lock);
- started_expanding = false;
- pthread_cond_wait(&maintenance_cond, &cache_lock);
- mutex_unlock(&cache_lock);
- slabs_rebalancer_pause();
- switch_item_lock_type(ITEM_LOCK_GLOBAL);
- mutex_lock(&cache_lock);
- assoc_expand();
- mutex_unlock(&cache_lock);
- }
- }
- return NULL;
- }
-
- static void assoc_expand(void) {
- old_hashtable = primary_hashtable;
-
- primary_hashtable = calloc(hashsize(hashpower + 1), sizeof(void *));
- if (primary_hashtable) {
- if (settings.verbose > 1)
- fprintf(stderr, "Hash table expansion starting\n");
- hashpower++;
- expanding = true;
- expand_bucket = 0;
- STATS_LOCK();
- stats.hash_power_level = hashpower;
- stats.hash_bytes += hashsize(hashpower) * sizeof(void *);
- stats.hash_is_expanding = 1;
- STATS_UNLOCK();
- } else {
- primary_hashtable = old_hashtable;
- }
- }
Memcached源码分析之item结构
item是Memcached中抽象实际数据的结构,我们分析下item的一些特性,便于后续Memcached的其他特性分析。
- typedef struct _stritem {
- struct _stritem *next;
- struct _stritem *prev;
- struct _stritem *h_next;
- rel_time_t time;
- rel_time_t exptime;
- int nbytes;
- unsigned short refcount;
- uint8_t nsuffix;
- uint8_t it_flags;
- uint8_t slabs_clsid;
- uint8_t nkey;
- union {
- uint64_t cas;
- char end;
- } data[];
- } item;
其结构图如下所示:

即Item由两部分组成,item的属性信息和item的数据部分,属性信息解释如上,数据部分包括cas,key和真实的value信息,item在内存中的存储形式如下:

这个图画出了部分结构,还有Hash表的结构没有画出。

这里大概介绍了item的一些信息,后面我们会分析item插入Hash表等信息。
注:本篇博客的图片摘自:http://kenby.iteye.com/blog/1423989
http://www.nosqlnotes.net/archives/222
Memcached源码阅读之get过程
我们在前面分析过,Memcached从网络读取完数据,解析数据,如果是get操作,则执行get操作,下面我们分析下get操作的流程。
-
- item *item_get(const char *key, const size_t nkey) {
- item *it;
- uint32_t hv;
- hv = hash(key, nkey, 0);
- item_lock(hv);
- it = do_item_get(key, nkey, hv);
- item_unlock(hv);
- return it;
- }
-
- void item_lock(uint32_t hv) {
- uint8_t *lock_type = pthread_getspecific(item_lock_type_key);
- if (likely(*lock_type == ITEM_LOCK_GRANULAR)) {
- mutex_lock(&item_locks[(hv & hashmask(hashpower)) % item_lock_count]);
- } else {
- mutex_lock(&item_global_lock);
- }
- }
-
- void item_unlock(uint32_t hv) {
- uint8_t *lock_type = pthread_getspecific(item_lock_type_key);
- if (likely(*lock_type == ITEM_LOCK_GRANULAR)) {
- mutex_unlock(&item_locks[(hv & hashmask(hashpower)) % item_lock_count]);
- } else {
- mutex_unlock(&item_global_lock);
- }
- }
-
- item *do_item_get(const char *key, const size_t nkey, const uint32_t hv) {
- item *it = assoc_find(key, nkey, hv);
- if (it != NULL) {
- refcount_incr(&it->refcount);
- if (slab_rebalance_signal &&
- ((void *)it >= slab_rebal.slab_start && (void *)it < slab_rebal.slab_end)) {
- do_item_unlink_nolock(it, hv);
- do_item_remove(it);
- it = NULL;
- }
- }
- int was_found = 0;
-
- if (settings.verbose > 2) {
- if (it == NULL) {
- fprintf(stderr, "> NOT FOUND %s", key);
- } else {
- fprintf(stderr, "> FOUND KEY %s", ITEM_key(it));
- was_found++;
- }
- }
-
- if (it != NULL) {
-
- if (settings.oldest_live != 0 && settings.oldest_live <= current_time &&
- it->time <= settings.oldest_live) {
- do_item_unlink(it, hv);
- do_item_remove(it);
- it = NULL;
- if (was_found) {
- fprintf(stderr, " -nuked by flush");
- }
-
- } else if (it->exptime != 0 && it->exptime <= current_time) {
- do_item_unlink(it, hv);
- do_item_remove(it);
- it = NULL;
- if (was_found) {
- fprintf(stderr, " -nuked by expire");
- }
- } else {
- it->it_flags |= ITEM_FETCHED;
- DEBUG_REFCNT(it, '+');
- }
- }
-
- if (settings.verbose > 2)
- fprintf(stderr, "\n");
-
- return it;
- }
-
-
- void do_item_remove(item *it) {
- MEMCACHED_ITEM_REMOVE(ITEM_key(it), it->nkey, it->nbytes);
- assert((it->it_flags & ITEM_SLABBED) == 0);
-
- if (refcount_decr(&it->refcount) == 0) {
- item_free(it);
- }
- }
-
- void item_free(item *it) {
- size_t ntotal = ITEM_ntotal(it);
- unsigned int clsid;
- assert((it->it_flags & ITEM_LINKED) == 0);
- assert(it != heads[it->slabs_clsid]);
- assert(it != tails[it->slabs_clsid]);
- assert(it->refcount == 0);
-
-
- clsid = it->slabs_clsid;
- it->slabs_clsid = 0;
- DEBUG_REFCNT(it, 'F');
- slabs_free(it, ntotal, clsid);
- }
-
- void slabs_free(void *ptr, size_t size, unsigned int id) {
- pthread_mutex_lock(&slabs_lock);
- do_slabs_free(ptr, size, id);
- pthread_mutex_unlock(&slabs_lock);
- }
-
- static void do_slabs_free(void *ptr, const size_t size, unsigned int id) {
- slabclass_t *p;
- item *it;
-
- assert(((item *)ptr)->slabs_clsid == 0);
- assert(id >= POWER_SMALLEST && id <= power_largest);
- if (id < POWER_SMALLEST || id > power_largest)
- return;
-
- MEMCACHED_SLABS_FREE(size, id, ptr);
- p = &slabclass[id];
-
- it = (item *)ptr;
- it->it_flags |= ITEM_SLABBED;
- it->prev = 0;
- it->next = p->slots;
- if (it->next) it->next->prev = it;
- p->slots = it;
-
- p->sl_curr++;
- p->requested -= size;
- return;
- }
-
- void do_item_unlink(item *it, const uint32_t hv) {
- MEMCACHED_ITEM_UNLINK(ITEM_key(it), it->nkey, it->nbytes);
- mutex_lock(&cache_lock);
- if ((it->it_flags & ITEM_LINKED) != 0) {
- it->it_flags &= ~ITEM_LINKED;
- STATS_LOCK();
- stats.curr_bytes -= ITEM_ntotal(it);
- stats.curr_items -= 1;
- STATS_UNLOCK();
- assoc_delete(ITEM_key(it), it->nkey, hv);
- item_unlink_q(it);
- do_item_remove(it);
- }
- mutex_unlock(&cache_lock);
- }
Memcached的get操作在读取数据时,会判断数据的有效性,使得不用额外去处理过期数据,get操作牵涉到Slab结构,Hash表,LRU队列的更新,我们后面专门分析这些的变更,这里暂不分析。
Memcached源码分析之set操作
之前分析了Memcached的get操作,下面分析set操作的流程。
-
- enum store_item_type store_item(item *item, int comm, conn* c) {
- enum store_item_type ret;
- uint32_t hv;
-
- hv = hash(ITEM_key(item), item->nkey, 0);
- item_lock(hv);
- ret = do_store_item(item, comm, c, hv);
- item_unlock(hv);
- return ret;
- }
-
- enum store_item_type do_store_item(item *it, int comm, conn *c,const uint32_t hv)
- {
- char *key = ITEM_key(it);
- item *old_it = do_item_get(key, it->nkey, hv);
- enum store_item_type stored = NOT_STORED;
-
- item *new_it = NULL;
- int flags;
-
- if (old_it != NULL && comm == NREAD_ADD)
- {
- do_item_update(old_it);
- }
- else if (!old_it
- && (comm == NREAD_REPLACE || comm == NREAD_APPEND
- || comm == NREAD_PREPEND))
- {
-
- }
- else if (comm == NREAD_CAS)
- {
- if (old_it == NULL)
- {
-
- stored = NOT_FOUND;
- pthread_mutex_lock(&c->thread->stats.mutex);
- c->thread->stats.cas_misses++;
- pthread_mutex_unlock(&c->thread->stats.mutex);
- }
- else if (ITEM_get_cas(it) == ITEM_get_cas(old_it))
- {
- pthread_mutex_lock(&c->thread->stats.mutex);
- c->thread->stats.slab_stats[old_it->slabs_clsid].cas_hits++;
- pthread_mutex_unlock(&c->thread->stats.mutex);
-
- item_replace(old_it, it, hv);
- stored = STORED;
- }
- else
- {
- pthread_mutex_lock(&c->thread->stats.mutex);
- c->thread->stats.slab_stats[old_it->slabs_clsid].cas_badval++;
- pthread_mutex_unlock(&c->thread->stats.mutex);
-
- if (settings.verbose > 1)
- {
- fprintf(stderr, "CAS: failure: expected %llu, got %llu\n",
- (unsigned long long) ITEM_get_cas(old_it),
- (unsigned long long) ITEM_get_cas(it));
- }
- stored = EXISTS;
- }
- }
- else
- {
- if (comm == NREAD_APPEND || comm == NREAD_PREPEND)
- {
-
- if (ITEM_get_cas(it) != 0)
- {
- if (ITEM_get_cas(it) != ITEM_get_cas(old_it))
- {
- stored = EXISTS;
- }
- }
-
- if (stored == NOT_STORED)
- {
- flags = (int) strtol(ITEM_suffix(old_it), (char **) NULL, 10);
-
- new_it = do_item_alloc(key, it->nkey, flags, old_it->exptime,it->nbytes + old_it->nbytes - 2 , hv);
-
- if (new_it == NULL)
- {
-
- if (old_it != NULL)
- do_item_remove(old_it);
-
- return NOT_STORED;
- }
-
- if (comm == NREAD_APPEND)
- {
- memcpy(ITEM_data(new_it), ITEM_data(old_it),old_it->nbytes);
- memcpy(ITEM_data(new_it) + old_it->nbytes - 2,ITEM_data(it), it->nbytes);
- }
- else
- {
-
- memcpy(ITEM_data(new_it), ITEM_data(it), it->nbytes);
- memcpy(ITEM_data(new_it) + it->nbytes - 2 ,ITEM_data(old_it), old_it->nbytes);
- }
-
- it = new_it;
- }
- }
-
- if (stored == NOT_STORED)
- {
- if (old_it != NULL)
- item_replace(old_it, it, hv);
- else
- do_item_link(it, hv);
-
- c->cas = ITEM_get_cas(it);
-
- stored = STORED;
- }
- }
-
- if (old_it != NULL)
- do_item_remove(old_it);
- if (new_it != NULL)
- do_item_remove(new_it);
-
- if (stored == STORED)
- {
- c->cas = ITEM_get_cas(it);
- }
-
- return stored;
- }
-
- void do_item_update(item *it) {
- MEMCACHED_ITEM_UPDATE(ITEM_key(it), it->nkey, it->nbytes);
- if (it->time < current_time - ITEM_UPDATE_INTERVAL) {
- assert((it->it_flags & ITEM_SLABBED) == 0);
-
- mutex_lock(&cache_lock);
- if ((it->it_flags & ITEM_LINKED) != 0) {
- item_unlink_q(it);
- it->time = current_time;
- item_link_q(it);
- }
- mutex_unlock(&cache_lock);
- }
- }
-
- int do_item_replace(item *it, item *new_it, const uint32_t hv) {
- MEMCACHED_ITEM_REPLACE(ITEM_key(it), it->nkey, it->nbytes,
- ITEM_key(new_it), new_it->nkey, new_it->nbytes);
- assert((it->it_flags & ITEM_SLABBED) == 0);
-
- do_item_unlink(it, hv);
- return do_item_link(new_it, hv);
- }
有些item的操作已经在get操作中有分析,我们此处不做分析,我们下一篇分析下Memcached内部如何选择合适的空间来存放item.
上文来自:http://blog.csdn.net/lcli2009?viewmode=contents