ICode9

精准搜索请尝试: 精确搜索
首页 > 其他分享> 文章详细

dpdk-rte_mbuf数据结构学习

2021-07-11 09:55:35  阅读:422  来源: 互联网

标签:rte data mbuf len type uint32 dpdk


搞网络不知道dpdk。。。不合适。。。

搞dpdk不知道rte_mbuf。。。不合适。。。

所以,搞搞搞。。。

上源码!!!

//关于dpdk rte_mbuf数据结构的学习

/* define a set of marker types that can be used to refer to set points in the
 * mbuf */
/* 定义一组可用于引用 mbuf 中的设置点的标记类型*/
__extension__
typedef void    *MARKER[0];   /**< generic marker for a point in a structure */
__extension__
typedef uint8_t  MARKER8[0];  /**< generic marker with 1B alignment */
__extension__
typedef uint64_t MARKER64[0]; /**< marker that allows us to overwrite 8 bytes
                               * with a single assignment */


/**
 * The generic rte_mbuf, containing a packet mbuf.
 */
struct rte_mbuf {
	MARKER cacheline0;			/* 柔性数组,标记开头 */

	void *buf_addr;           /**< Virtual address of segment buffer. */
	/**
	 * Physical address of segment buffer.
	 * Force alignment to 8-bytes, so as to ensure we have the exact
	 * same mbuf cacheline0 layout for 32-bit and 64-bit. This makes
	 * working on vector drivers easier.
	 */
	RTE_STD_C11
	union {
		rte_iova_t buf_iova;
		rte_iova_t buf_physaddr; /**< deprecated */
	} __rte_aligned(sizeof(rte_iova_t));

	/* next 8 bytes are initialised on RX descriptor rearm */
	MARKER64 rearm_data;
	uint16_t data_off;

	/**
	 * Reference counter. Its size should at least equal to the size
	 * of port field (16 bits), to support zero-copy broadcast.
	 * It should only be accessed using the following functions:
	 * rte_mbuf_refcnt_update(), rte_mbuf_refcnt_read(), and
	 * rte_mbuf_refcnt_set(). The functionality of these functions (atomic,
	 * or non-atomic) is controlled by the CONFIG_RTE_MBUF_REFCNT_ATOMIC
	 * config option.
	 */
	RTE_STD_C11
	union {
		rte_atomic16_t refcnt_atomic; /**< Atomically accessed refcnt */
		uint16_t refcnt;              /**< Non-atomically accessed refcnt */
	};
	uint16_t nb_segs;         /**< Number of segments. */

	/** Input port (16 bits to support more than 256 virtual ports). */
	uint16_t port;

	uint64_t ol_flags;        /**< Offload features. */

	/* remaining bytes are set on RX when pulling packet from descriptor */
	MARKER rx_descriptor_fields1;

	/*
	 * The packet type, which is the combination of outer/inner L2, L3, L4
	 * and tunnel types. The packet_type is about data really present in the
	 * mbuf. Example: if vlan stripping is enabled, a received vlan packet
	 * would have RTE_PTYPE_L2_ETHER and not RTE_PTYPE_L2_VLAN because the
	 * vlan is stripped from the data.
	 */
	RTE_STD_C11
	union {
		uint32_t packet_type; /**< L2/L3/L4 and tunnel information. */
		struct {
			uint32_t l2_type:4; /**< (Outer) L2 type. */
			uint32_t l3_type:4; /**< (Outer) L3 type. */
			uint32_t l4_type:4; /**< (Outer) L4 type. */
			uint32_t tun_type:4; /**< Tunnel type. */
			RTE_STD_C11
			union {
				uint8_t inner_esp_next_proto;
				/**< ESP next protocol type, valid if
				 * RTE_PTYPE_TUNNEL_ESP tunnel type is set
				 * on both Tx and Rx.
				 */
				__extension__
				struct {
					uint8_t inner_l2_type:4;
					/**< Inner L2 type. */
					uint8_t inner_l3_type:4;
					/**< Inner L3 type. */
				};
			};
			uint32_t inner_l4_type:4; /**< Inner L4 type. */
		};
	};

	uint32_t pkt_len;         /**< Total pkt len: sum of all segments. */
	uint16_t data_len;        /**< Amount of data in segment buffer. */
	/** VLAN TCI (CPU order), valid if PKT_RX_VLAN_STRIPPED is set. */
	uint16_t vlan_tci;

	union {
		uint32_t rss;     /**< RSS hash result if RSS enabled */
		struct {
			RTE_STD_C11
			union {
				struct {
					uint16_t hash;
					uint16_t id;
				};
				uint32_t lo;
				/**< Second 4 flexible bytes */
			};
			uint32_t hi;
			/**< First 4 flexible bytes or FD ID, dependent on
			     PKT_RX_FDIR_* flag in ol_flags. */
		} fdir;           /**< Filter identifier if FDIR enabled */
		struct {
			uint32_t lo;
			uint32_t hi;
		} sched;          /**< Hierarchical scheduler */
		uint32_t usr;	  /**< User defined tags. See rte_distributor_process() */
	} hash;                   /**< hash information */

	/** Outer VLAN TCI (CPU order), valid if PKT_RX_QINQ_STRIPPED is set. */
	uint16_t vlan_tci_outer;

	uint16_t buf_len;         /**< Length of segment buffer. */

	/** Valid if PKT_RX_TIMESTAMP is set. The unit and time reference
	 * are not normalized but are always the same for a given port.
	 */
	uint64_t timestamp;

	/* second cache line - fields only used in slow path or on TX */
	MARKER cacheline1 __rte_cache_min_aligned;

	RTE_STD_C11
	union {
		void *userdata;   /**< Can be used for external metadata */
		uint64_t udata64; /**< Allow 8-byte userdata on 32-bit */
	};

	struct rte_mempool *pool; /**< Pool from which mbuf was allocated. */
	struct rte_mbuf *next;    /**< Next segment of scattered packet. */

	/* fields to support TX offloads */
	RTE_STD_C11
	union {
		uint64_t tx_offload;       /**< combined for easy fetch */
		__extension__
		struct {
			uint64_t l2_len:7;
			/**< L2 (MAC) Header Length for non-tunneling pkt.
			 * Outer_L4_len + ... + Inner_L2_len for tunneling pkt.
			 */
			uint64_t l3_len:9; /**< L3 (IP) Header Length. */
			uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
			uint64_t tso_segsz:16; /**< TCP TSO segment size */

			/* fields for TX offloading of tunnels */
			uint64_t outer_l3_len:9; /**< Outer L3 (IP) Hdr Length. */
			uint64_t outer_l2_len:7; /**< Outer L2 (MAC) Hdr Length. */

			/* uint64_t unused:8; */
		};
	};

	/** Size of the application private data. In case of an indirect
	 * mbuf, it stores the direct mbuf private data size. */
	uint16_t priv_size;

	/** Timesync flags for use with IEEE1588. */
	uint16_t timesync;

	/** Sequence number. See also rte_reorder_insert(). */
	uint32_t seqn;

}

 

好家伙,果然mbuf,大名鼎鼎。下面分别对每个字段进行学习解释。

 

下面按照出现顺序对每个字段进行解释。

MARKER cacheline0;

typedef void    *MARKER[0];   /**< generic marker for a point in a structure */

查看typedef,发现这是一个柔性数组。长度为0,所以这里在编译时是不占用内存滴。只是一个标记喽。MARKER嘛。

 

void *buf_addr;           /**< Virtual address of segment buffer. */

有图就容易解释了,一些指针、成员或函数结果的内容在下表中列出,mbuf指针简写为m

m 首部,即mbuf结构体
m->buf_addr headroom起始地址
m->data_off data起始地址相对于buf_addr的偏移
m->buf_len mbuf和priv之后内存的长度,包含headroom
m->pkt_len 整个mbuf链的data总长度
m->data_len 实际data的长度
m->buf_addr+m->data_off 实际data的起始地址
rte_pktmbuf_mtod(m) 同上
rte_pktmbuf_data_len(m) 同m->data_len
rte_pktmbuf_pkt_len 同m->pkt_len
rte_pktmbuf_data_room_size 同m->buf_len
rte_pktmbuf_headroom headroom长度
rte_pktmbuf_tailroom 尾部剩余空间长度

综合图片解释以及上述表格的备注。这里buf_addr就是rte_mbuf结构体尾部,headroom起始地址。

/**
	 * Physical address of segment buffer.
	 * Force alignment to 8-bytes, so as to ensure we have the exact
	 * same mbuf cacheline0 layout for 32-bit and 64-bit. This makes
	 * working on vector drivers easier.
	 */
	RTE_STD_C11
	union {
		rte_iova_t buf_iova;
		rte_iova_t buf_physaddr; /**< deprecated */
	} __rte_aligned(sizeof(rte_iova_t));

段缓冲区的物理地址。 强制8字节对齐,保证在32位和64位有相同的cacheline0。这块暂时无需关注。

/* next 8 bytes are initialised on RX descriptor rearm */
	MARKER64 rearm_data;

接下来的 8 个字节在 RX 描述符重装时初始化 。

uint16_t data_off;

data起始地址相对于buf_addr的偏移。要获取data的位置,m->buf_addr + m->data_off ,就是对应的data的实际指针。一般中间间隔是一个headroom的大小。

 

/**
	 * Reference counter. Its size should at least equal to the size
	 * of port field (16 bits), to support zero-copy broadcast.
	 * It should only be accessed using the following functions:
	 * rte_mbuf_refcnt_update(), rte_mbuf_refcnt_read(), and
	 * rte_mbuf_refcnt_set(). The functionality of these functions (atomic,
	 * or non-atomic) is controlled by the CONFIG_RTE_MBUF_REFCNT_ATOMIC
	 * config option.
	 */
	RTE_STD_C11
	union {
		rte_atomic16_t refcnt_atomic; /**< Atomically accessed refcnt */
		uint16_t refcnt;              /**< Non-atomically accessed refcnt */
	};

引用计数。这里用union实现了原子访问和非原子访问2种。计数的规格至少等于端口字段的大小16bits,(用来支持零拷贝广播?不明白)。

 

uint16_t nb_segs;         /**< Number of segments. */

分片数。

 

	/** Input port (16 bits to support more than 256 virtual ports). */
	uint16_t port;

入接口id号。

 

	uint64_t ol_flags;        /**< Offload features. */

offload特性标记。

offload特性,主要是指将原本在协议栈中进行的IP分片、TCP分段、重组、checksum校验等操作,转移到网卡硬件中进行,降低系统CPU的消耗,提高处理性能。

 

	/* remaining bytes are set on RX when pulling packet from descriptor */
	MARKER rx_descriptor_fields1;

从描述符中提取数据包时,剩余字节设置在 RX 上。标记使用,MARKER。。。

 

	/*
	 * The packet type, which is the combination of outer/inner L2, L3, L4
	 * and tunnel types. The packet_type is about data really present in the
	 * mbuf. Example: if vlan stripping is enabled, a received vlan packet
	 * would have RTE_PTYPE_L2_ETHER and not RTE_PTYPE_L2_VLAN because the
	 * vlan is stripped from the data.
	 */
	 /* 数据包类型,它是外部/内部 L2、L3、L4 和隧道类型的组合。 
	  * packet_type 是关于 mbuf 中真正存在的数据。 
	  * 如果启用了 vlan 剥离,则接收到的 vlan 数据包将具有 RTE_PTYPE_L2_ETHER 
	  * 而不是 RTE_PTYPE_L2_VLAN,因为 vlan 已从数据中剥离。 
	  */
	RTE_STD_C11
	union {
		uint32_t packet_type; /**< L2/L3/L4 and tunnel information. */
		struct {
			uint32_t l2_type:4; /**< (Outer) L2 type. */
			uint32_t l3_type:4; /**< (Outer) L3 type. */
			uint32_t l4_type:4; /**< (Outer) L4 type. */
			uint32_t tun_type:4; /**< Tunnel type. */
			RTE_STD_C11
			union {
				uint8_t inner_esp_next_proto;
				/**< ESP next protocol type, valid if
				 * RTE_PTYPE_TUNNEL_ESP tunnel type is set
				 * on both Tx and Rx.
				 */
				__extension__
				struct {
					uint8_t inner_l2_type:4;
					/**< Inner L2 type. */
					uint8_t inner_l3_type:4;
					/**< Inner L3 type. */
				};
			};
			uint32_t inner_l4_type:4; /**< Inner L4 type. */
		};
	};

 

此数据结构比较清晰,无需多余解释。有一个疑问,这里的inner && outer具体是什么呢

 

 

	uint32_t pkt_len;         /**< Total pkt len: sum of all segments. */
	uint16_t data_len;        /**< Amount of data in segment buffer. */

pkt_len,包括所有分片的长度。

data_len,当前的数据长度。如果没有分片,pkt_len与data_len数值应该是相同的。也就是pkt_len >= data_len.

 

	/** VLAN TCI (CPU order), valid if PKT_RX_VLAN_STRIPPED is set. */
	uint16_t vlan_tci;

只有开启了PKT_RX_VLAN_STRIPPED标记,此字段才是有效的。vlan时使用,学习vlan时,需要关注此字段

 

	union {
		uint32_t rss;     /**< RSS hash result if RSS enabled */
		struct {
			RTE_STD_C11
			union {
				struct {
					uint16_t hash;
					uint16_t id;
				};
				uint32_t lo;
				/**< Second 4 flexible bytes */
			};
			uint32_t hi;
			/**< First 4 flexible bytes or FD ID, dependent on
			     PKT_RX_FDIR_* flag in ol_flags. */
		} fdir;           /**< Filter identifier if FDIR enabled */
		struct {
			uint32_t lo;
			uint32_t hi;
		} sched;          /**< Hierarchical scheduler */
		uint32_t usr;	  /**< User defined tags. See rte_distributor_process() */
	} hash;                   /**< hash information */

哈希数据。这里是一个union。当RSS开启时,对应rss字段是哈希结果。学习RSS时,关注一下

 

	/** Outer VLAN TCI (CPU order), valid if PKT_RX_QINQ_STRIPPED is set. */
	uint16_t vlan_tci_outer;

 只有开启了QINQ剥离时,此字段有效。外部vlan相关。

 

	uint16_t buf_len;         /**< Length of segment buffer. */

mbuf和priv之后内存的长度,包含headroom。

	/** Valid if PKT_RX_TIMESTAMP is set. The unit and time reference
	 * are not normalized but are always the same for a given port.
	 */
	uint64_t timestamp;

时间戳。PKT_RX_TIMESAMP开启时,此字段有效。单位和时间参考未标准化,但对于给定端口始终相同。

	/* second cache line - fields only used in slow path or on TX */
	MARKER cacheline1 __rte_cache_min_aligned;

第二个cacheline,这部分内容仅用在慢路或者发包流程中。

 

	RTE_STD_C11
	union {
		void *userdata;   /**< Can be used for external metadata */
		uint64_t udata64; /**< Allow 8-byte userdata on 32-bit */
	};
//#define RTE_STD_C11 __extension__

__extension__字段用于消除编译告警。

这里是一个union,

在userdata指针总可以用来存放额外的元数据。

udata64,可以存放8字节的用户数据。

 

	struct rte_mempool *pool; /**< Pool from which mbuf was allocated. */

标识本mbuf是从哪个rte_mempool池子中申请到的。也就是该mbuf是哪个rte_mempool池子的。

 

	struct rte_mbuf *next;    /**< Next segment of scattered packet. */

在分片报文中,标记下一个报文的位置。

 

	/* fields to support TX offloads */
	/* 用于支持发包硬件卸载的字段 */
	RTE_STD_C11
	union {
		uint64_t tx_offload;       /**< combined for easy fetch */
		/* tx_offload 组合起来,方便取用 */
		__extension__
		struct {
			uint64_t l2_len:7;
			/**< L2 (MAC) Header Length for non-tunneling pkt.
			 * Outer_L4_len + ... + Inner_L2_len for tunneling pkt.
			 */
			uint64_t l3_len:9; /**< L3 (IP) Header Length. */
			uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
			uint64_t tso_segsz:16; /**< TCP TSO segment size */
			/* TSO(TCP Segment Offload)是一种利用网卡的少量处理能力,
			 降低CPU发送数据包负载的技术,需要网卡硬件及驱动的支持。 */

			/* fields for TX offloading of tunnels */
			uint64_t outer_l3_len:9; /**< Outer L3 (IP) Hdr Length. */
			uint64_t outer_l2_len:7; /**< Outer L2 (MAC) Hdr Length. */

			/* uint64_t unused:8; */
		};
	};

支持硬件发包卸载的字段内容。内部为一个union。其中tx_offload字段是为了容易获取搞出来的。

 

	/** Size of the application private data. In case of an indirect
	 * mbuf, it stores the direct mbuf private data size. */
	uint16_t priv_size;

应用程序私有数据的大小。 

在indirect mbuf 的情况下,它存储direct mbuf 私有数据大小。 关于direct mbuf与indirect mbuf的区别,参考链接

10. Mbuf Library — Data Plane Development Kit 21.08.0-rc1 documentation (dpdk.org)

 

	/** Timesync flags for use with IEEE1588. */
	/* IEEE1588 协议,又称 PTP( precise time protocol,精确时间协议),
	 * 可以达到亚微秒级别时间同步精度,于 2002 年发布 version 1,
	 * 2008 年发布 version 2。 */
	uint16_t timesync;

时间同步。参考IEEE1588。

IEEE 1588_百度百科 (baidu.com)

 

	/** Sequence number. See also rte_reorder_insert(). */
	uint32_t seqn;

序列号。这个是哪里用到呢

 

 

rte_mbuf的数据结构学习完毕。有一些遗留的问题,后续来完善。

标签:rte,data,mbuf,len,type,uint32,dpdk
来源: https://blog.51cto.com/qiaopeng688/3035432

本站声明: 1. iCode9 技术分享网(下文简称本站)提供的所有内容,仅供技术学习、探讨和分享;
2. 关于本站的所有留言、评论、转载及引用,纯属内容发起人的个人观点,与本站观点和立场无关;
3. 关于本站的所有言论和文字,纯属内容发起人的个人观点,与本站观点和立场无关;
4. 本站文章均是网友提供,不完全保证技术分享内容的完整性、准确性、时效性、风险性和版权归属;如您发现该文章侵犯了您的权益,可联系我们第一时间进行删除;
5. 本站为非盈利性的个人网站,所有内容不会用来进行牟利,也不会利用任何形式的广告来间接获益,纯粹是为了广大技术爱好者提供技术内容和技术思想的分享性交流网站。

专注分享技术,共同学习,共同进步。侵权联系[81616952@qq.com]

Copyright (C)ICode9.com, All Rights Reserved.

ICode9版权所有