【OVS2.5.0源码分析】datapath之流表查询

流表查询是datapath报文处理过程中,最为关键的一个步骤,一个skb报文进入如何能够快速地进行匹配流表? 本篇分析ovs是如何查询流表的。

1、ovs_flow_tbl_lookup_stats函数

struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
					  const struct sw_flow_key *key,            //由ovs_flow_key_extract函数根据skb生成
					  u32 skb_hash,<span style="white-space:pre">				</span>    //skb中携带的信息
					  u32 *n_mask_hit)
{
	struct mask_array *ma = rcu_dereference(tbl->mask_array);
	struct table_instance *ti = rcu_dereference(tbl->ti);     //得到table实例
	struct mask_cache_entry *entries, *ce;
	struct sw_flow *flow;
	u32 hash;
	int seg;

	*n_mask_hit = 0;
	if (unlikely(!skb_hash)) {	//如果报文没有hash值,则mask_index为0,全遍历所有的mask。
		u32 mask_index = 0;

		return flow_lookup(tbl, ti, ma, key, n_mask_hit, &mask_index);
	}

	/* Pre and post recirulation flows usually have the same skb_hash
	 * value. To avoid hash collisions, rehash the 'skb_hash' with
	 * 'recirc_id'.  */
	if (key->recirc_id)
		skb_hash = jhash_1word(skb_hash, key->recirc_id);

	ce = NULL;
	hash = skb_hash;
	entries = this_cpu_ptr(tbl->mask_cache);

	/* Find the cache entry 'ce' to operate on. */
	for (seg = 0; seg < MC_HASH_SEGS; seg++) {		//32位的hash值被分成4段,每段8字节,作为cache的索引
		int index = hash & (MC_HASH_ENTRIES - 1);
		struct mask_cache_entry *e;

		e = &entries[index];			        //entry最大为256项
		if (e->skb_hash == skb_hash) {                  //如果在cache entry找到报文hash相同项,则根据该entry指定的mask查表
			flow = flow_lookup(tbl, ti, ma, key, n_mask_hit,
					   &e->mask_index);
			if (!flow)
				e->skb_hash = 0;
			return flow;
		}

		if (!ce || e->skb_hash < ce->skb_hash)
			ce = e;  /* A better replacement cache candidate. */

		hash >>= MC_HASH_SHIFT;
	}

	/* Cache miss, do full lookup. */
	flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, &ce->mask_index);     //没有命中,ce作为新的cache项,将被刷新,下一次可以直接命中
	if (flow)
		ce->skb_hash = skb_hash;

	return flow;
}
2、flow_lookup函数

</pre><pre name="code" class="cpp">static struct sw_flow *flow_lookup(struct flow_table *tbl,
				   struct table_instance *ti,
				   const struct mask_array *ma,
				   const struct sw_flow_key *key,
				   u32 *n_mask_hit,
				   u32 *index)
{
	struct sw_flow_mask *mask;
	struct sw_flow *flow;
	int i;

	if (*index < ma->max) {         //如果index的值小于mask的entry数量,说明index是有效值,基于该值获取sw_flow_mask值
		mask = rcu_dereference_ovsl(ma->masks[*index]);
		if (mask) {
			flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
			if (flow)
				return flow;
		}
	}

	for (i = 0; i < ma->max; i++)  {

		if (i == *index)	//前面已查询过,所以跳过该mask
			continue;

		mask = rcu_dereference_ovsl(ma->masks[i]);
		if (!mask)
			continue;

		flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
		if (flow) { /* Found */
			*index = i;		//更新index指向的值,下次可以直接命中;此处说明cache没有命中,下一次可以直接命中
			return flow;
		}
	}

	return NULL;
}
3、masked_flow_lookup函数

static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
					  const struct sw_flow_key *unmasked,
					  const struct sw_flow_mask *mask,
					  u32 *n_mask_hit)
{
	struct sw_flow *flow;
	struct hlist_head *head;
	u32 hash;
	struct sw_flow_key masked_key; 

	ovs_flow_mask_key(&masked_key, unmasked, false, mask);       //根据mask,计算masked后的key,用以支持通配符
	hash = flow_hash(&masked_key, &mask->range);                 //根据masked key和mask.range 计算hash值
	head = find_bucket(ti, hash);                                //根据hash值,找到sw_flow的链表头
	(*n_mask_hit)++;
	hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver]) {   //遍历链表
		if (flow->mask == mask && flow->flow_table.hash == hash &&      //mask相同、hash相同并且key相同,则匹配到流表
		    flow_cmp_masked_key(flow, &masked_key, &mask->range))
			return flow;
	}
	return NULL;
}
ovs_flow_mask_key 函数

void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
		       bool full, const struct sw_flow_mask *mask)
{
	int start = full ? 0 : mask->range.start;
	int len = full ? sizeof *dst : range_n_bytes(&mask->range);
	const long *m = (const long *)((const u8 *)&mask->key + start);
	const long *s = (const long *)((const u8 *)src + start);
	long *d = (long *)((u8 *)dst + start);
	int i;

	/* If 'full' is true then all of 'dst' is fully initialized. Otherwise,
	 * if 'full' is false the memory outside of the 'mask->range' is left
	 * uninitialized. This can be used as an optimization when further
	 * operations on 'dst' only use contents within 'mask->range'.
	 */
	for (i = 0; i < len; i += sizeof(long))
		*d++ = *s++ & *m++;                 //目标key = 源key & mask, 起始位置和长度由mask的range成员对象指定
}
find_bucket 函数

static struct hlist_head *find_bucket(struct table_instance *ti, u32 hash)
{
	hash = jhash_1word(hash, ti->hash_seed);
	return flex_array_get(ti->buckets,
				(hash & (ti->n_buckets - 1)));        //hash的低N位作为index
}

到此流表查找过程已经比较清晰了,tbl->mask_cache是用来加速报文处理的,相同流的skb其hash值也是相同的,可以快速找到mask对象,然后通过hash计算找到bucket进行匹配。 下图想用来阐述skb在流表查询中依赖了哪些数据,通过哪些数据完成了flow的查找。

【OVS2.5.0源码分析】datapath之流表查询_第1张图片
PS: 一个框(橙色)如果有多个箭头输入,表示要获取该框的内容,依赖哪些信息。



你可能感兴趣的:(网络,openvswitch,ovs)