0%

Redis源码剖析之字典

以下涉及到的源码均为redis5.0-rc3版本的代码【点击查看官方源码

Redis字典

字典是一个key-value形式的数据结构,使用过java中map结构的都应该了解,因为其也是字典;而了解哈希表的也应该不会陌生,因为字典的底层就是哈希表。而redis本身就是一个key-value形式的nosql数据库,足以见得字典结构在redis中的地位。

数据结构

字典的定义是在redis源码中的dict.h头文件中,其底层是由哈希表构成。

哈希表

首先来看一下哈希表的结构,在dict.h头文件中是这样定义的:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
//哈希表的节点entry
typedef struct dictEntry {
void *key; //键
union { //值
void *val;
uint64_t u64;
int64_t s64;
double d;
} v;
struct dictEntry *next; //next指针
} dictEntry;

/* This is our hash table structure. Every dictionary has two of this as we
* implement incremental rehashing, for the old to the new table. */
typedef struct dictht { //哈希表
dictEntry **table;
unsigned long size;
unsigned long sizemask;
unsigned long used;
} dictht;

哈希表由如下内容构成:

  • table:哈希表节点数组(指针型)。而节点dictEntry又由key,value及next指针构成;其中value也明确定义了可以是指针、无符号和有符号64位整型、double类型;next指针则是用于解决哈希键的冲突问题。
  • size:哈希节点数组能容纳的节点个数。
  • sizemask:值为size-1,是用于计算索引的掩码。
  • used:节点数组已有节点的个数。

通常,图更能有助于理解,如下所示:
redis-2-1

字典

在dict.h头文件中定义如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
//特定函数
typedef struct dictType {
//计算哈希值
uint64_t (*hashFunction)(const void *key);
//复制键
void *(*keyDup)(void *privdata, const void *key);
//复制值
void *(*valDup)(void *privdata, const void *obj);
//比较键
int (*keyCompare)(void *privdata, const void *key1, const void *key2);
//销毁键
void (*keyDestructor)(void *privdata, void *key);
//销毁值
void (*valDestructor)(void *privdata, void *obj);
} dictType;

typedef struct dict {
dictType *type;
void *privdata;
//哈希表
dictht ht[2];
//用于标记是否正在进行rehash操作
long rehashidx; /* rehashing not in progress if rehashidx == -1 */
//正在run的迭代器数量
unsigned long iterators; /* number of iterators currently running */
} dict;

/* If safe is set to 1 this is a safe iterator, that means, you can call
* dictAdd, dictFind, and other functions against the dictionary even while
* iterating. Otherwise it is a non safe iterator, and only dictNext()
* should be called while iterating. */
typedef struct dictIterator {
dict *d;
long index;
int table, safe;
dictEntry *entry, *nextEntry;
/* unsafe iterator fingerprint for misuse detection. */
long long fingerprint;
} dictIterator;

其中,明确标定了需要两个hash表,这是为什么呢?从哈希表结构的定义的代码注释(Every dictionary has two of this as we implement incremental rehashing, for the old to the new table.)可知,一个用于真正存值,另一个则用于在rehash的时候使用。
字典的示意图如下:
redis-2-2

在dict.h头文件中还有这样一行代码,其定义了字典中哈希表hash节点的初始个数为4。

1
2
/* This is the initial size of every hash table */
#define DICT_HT_INITIAL_SIZE 4

从字典结构的源码中还能看见一个dictIterator结构,这便是用于遍历字典的迭代器。

关键点

哈希冲突

哈希表插入元素是通过利用key的hash值来计算索引位置,所以会存在冲突情况,而解决冲突则是利用hash节点结构中的next指针。当要插入一个新值的时候,比如entry(key,value),其插入过程如下图所示:
redis-2-3
哈希冲突的解决过程代码,再后面的操作函数-添加元素中可以详见。

空间扩容与释放

空间的扩容和释放过程其实就是rehash过程。在rehash过程正在进行时,会将rehashidx值设置为-1,重新计算key的hash值,然后利用字典中专门为rehash操作而设的一张hash表进行周转迁移数据,具体详见操作函数-rehash操作
而rehash的过程中,删除、更改和查询元素等操作也是要同时遍历两张hash表的,因为可能你想要的那个元素已被迁移到另一张表中。对于删除元素操作可见操作函数-删除元素中的相应代码。

操作函数

为了理解一些点,下面将列出部分函数,想知道更多操作的内部细节可浏览源码。此处先给出头文件中宏定义的两个码,后续函数中都会用到。

1
2
#define DICT_OK 0
#define DICT_ERR 1

创建字典

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
/* Create a new hash table */
dict *dictCreate(dictType *type,
void *privDataPtr)
{
//创建一个字典指针,分配固定大小内存
dict *d = zmalloc(sizeof(*d));
//初始化
_dictInit(d,type,privDataPtr);
return d;
}

/* Initialize the hash table */
int _dictInit(dict *d, dictType *type,
void *privDataPtr)
{
_dictReset(&d->ht[0]);
_dictReset(&d->ht[1]);
d->type = type;
d->privdata = privDataPtr;
d->rehashidx = -1;
d->iterators = 0;
//DICT_OK为宏定义的码
return DICT_OK;
}

添加元素

字典的添加元素过程是先获取key的hash值,然后再通过掩码计算索引(hash & d->ht[table].sizemask),最后锁定索引的位置完成元素的添加。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
/* Add an element to the target hash table */
int dictAdd(dict *d, void *key, void *val)
{
dictEntry *entry = dictAddRaw(d,key,NULL);

if (!entry) return DICT_ERR;
dictSetVal(d, entry, val);
return DICT_OK;
}

/* Low level add or find:
* This function adds the entry but instead of setting a value returns the
* dictEntry structure to the user, that will make sure to fill the value
* field as he wishes.
*
* This function is also directly exposed to the user API to be called
* mainly in order to store non-pointers inside the hash value, example:
*
* entry = dictAddRaw(dict,mykey,NULL);
* if (entry != NULL) dictSetSignedIntegerVal(entry,1000);
*
* Return values:
*
* If key already exists NULL is returned, and "*existing" is populated
* with the existing entry if existing is not NULL.
*
* If key was added, the hash entry is returned to be manipulated by the caller.
*/
dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing)
{
long index;
dictEntry *entry;
dictht *ht;

if (dictIsRehashing(d)) _dictRehashStep(d);

/* Get the index of the new element, or -1 if
* the element already exists. */
if ((index = _dictKeyIndex(d, key, dictHashKey(d,key), existing)) == -1)
计算索引值,如果元素已存在则return
return NULL;

/* Allocate the memory and store the new entry.
* Insert the element in top, with the assumption that in a database
* system it is more likely that recently added entries are accessed
* more frequently. */
ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
entry = zmalloc(sizeof(*entry));
//元素的加入总是加在索引处的第一个位置,这一步在解决hash冲突时将新加入的entry->next 指向索引上原有的第一个entry
entry->next = ht->table[index];
ht->table[index] = entry;
ht->used++;

/* Set the hash entry fields. */
dictSetKey(d, entry, key);
return entry;
}
/* Returns the index of a free slot that can be populated with
* a hash entry for the given 'key'.
* If the key already exists, -1 is returned
* and the optional output parameter may be filled.
*
* Note that if we are in the process of rehashing the hash table, the
* index is always returned in the context of the second (new) hash table. */
static long _dictKeyIndex(dict *d, const void *key, uint64_t hash, dictEntry **existing)
{
unsigned long idx, table;
dictEntry *he;
if (existing) *existing = NULL;

/* Expand the hash table if needed */
if (_dictExpandIfNeeded(d) == DICT_ERR)
return -1;
for (table = 0; table <= 1; table++) {
//用hash值与掩码进行与操作得出索引值
idx = hash & d->ht[table].sizemask;
/* Search if this slot does not already contain the given key */
he = d->ht[table].table[idx];
while(he) {
if (key==he->key || dictCompareKeys(d, key, he->key)) {
if (existing) *existing = he;
//已存在返回-1
return -1;
}
he = he->next;
}
if (!dictIsRehashing(d)) break;
}
//不存在,返回索引位置
return idx;
}

删除元素

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
/* Remove an element, returning DICT_OK on success or DICT_ERR if the
* element was not found. */
int dictDelete(dict *ht, const void *key) {
return dictGenericDelete(ht,key,0) ? DICT_OK : DICT_ERR;
}

/* Search and remove an element. This is an helper function for
* dictDelete() and dictUnlink(), please check the top comment
* of those functions. */
static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) {
uint64_t h, idx;
dictEntry *he, *prevHe;
int table;

if (d->ht[0].used == 0 && d->ht[1].used == 0) return NULL;

if (dictIsRehashing(d)) _dictRehashStep(d);
h = dictHashKey(d, key);

//遍历两个hash table并删除给定值,为了避免rehash过程中,用于rehash哈希表存储了要删除的值
for (table = 0; table <= 1; table++) {
idx = h & d->ht[table].sizemask;
he = d->ht[table].table[idx];
prevHe = NULL;
while(he) {
if (key==he->key || dictCompareKeys(d, key, he->key)) {
/* Unlink the element from the list */
if (prevHe)
prevHe->next = he->next;
else
d->ht[table].table[idx] = he->next;
if (!nofree) {
dictFreeKey(d, he);
dictFreeVal(d, he);
zfree(he);
}
d->ht[table].used--;
return he;
}
prevHe = he;
he = he->next;
}
if (!dictIsRehashing(d)) break;
}
return NULL; /* not found */
}

rehash操作

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
/* Performs N steps of incremental rehashing. Returns 1 if there are still
* keys to move from the old to the new hash table, otherwise 0 is returned.
*
* Note that a rehashing step consists in moving a bucket (that may have more
* than one key as we use chaining) from the old to the new hash table, however
* since part of the hash table may be composed of empty spaces, it is not
* guaranteed that this function will rehash even a single bucket, since it
* will visit at max N*10 empty buckets in total, otherwise the amount of
* work it does would be unbound and the function may block for a long time. */
int dictRehash(dict *d, int n) {
int empty_visits = n*10; /* Max number of empty buckets to visit. */
if (!dictIsRehashing(d)) return 0;

while(n-- && d->ht[0].used != 0) {
dictEntry *de, *nextde;

/* Note that rehashidx can't overflow as we are sure there are more
* elements because ht[0].used != 0 */
assert(d->ht[0].size > (unsigned long)d->rehashidx);
while(d->ht[0].table[d->rehashidx] == NULL) {
d->rehashidx++;
if (--empty_visits == 0) return 1;
}
de = d->ht[0].table[d->rehashidx];
/* Move all the keys in this bucket from the old to the new hash HT */
//迁移数据
while(de) {
uint64_t h;

nextde = de->next;
/* Get the index in the new hash table */
//计算新索引
h = dictHashKey(d, de->key) & d->ht[1].sizemask;
de->next = d->ht[1].table[h];
d->ht[1].table[h] = de;
d->ht[0].used--;
d->ht[1].used++;
de = nextde;
}
d->ht[0].table[d->rehashidx] = NULL;
d->rehashidx++;
}

/* Check if we already rehashed the whole table... */
if (d->ht[0].used == 0) {
//如果整个rehash操作已完成,则将h[1]和h[0]互换位置
zfree(d->ht[0].table);
d->ht[0] = d->ht[1];
_dictReset(&d->ht[1]);
d->rehashidx = -1;
return 0;
}

/* More to rehash... */
return 1;