]> git.proxmox.com Git - ceph.git/blame - ceph/src/spdk/dpdk/lib/librte_gro/gro_tcp4.h
import 15.2.0 Octopus source
[ceph.git] / ceph / src / spdk / dpdk / lib / librte_gro / gro_tcp4.h
CommitLineData
11fdf7f2
TL
1/* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2017 Intel Corporation
3 */
4
5#ifndef _GRO_TCP4_H_
6#define _GRO_TCP4_H_
7
8#include <rte_ip.h>
9#include <rte_tcp.h>
10
11#define INVALID_ARRAY_INDEX 0xffffffffUL
12#define GRO_TCP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)
13
14/*
15 * The max length of a IPv4 packet, which includes the length of the L3
16 * header, the L4 header and the data payload.
17 */
18#define MAX_IPV4_PKT_LENGTH UINT16_MAX
19
9f95a23c
TL
20/* The maximum TCP header length */
21#define MAX_TCP_HLEN 60
22#define INVALID_TCP_HDRLEN(len) \
23 (((len) < sizeof(struct tcp_hdr)) || ((len) > MAX_TCP_HLEN))
24
11fdf7f2
TL
25/* Header fields representing a TCP/IPv4 flow */
26struct tcp4_flow_key {
27 struct ether_addr eth_saddr;
28 struct ether_addr eth_daddr;
29 uint32_t ip_src_addr;
30 uint32_t ip_dst_addr;
31
32 uint32_t recv_ack;
33 uint16_t src_port;
34 uint16_t dst_port;
35};
36
37struct gro_tcp4_flow {
38 struct tcp4_flow_key key;
39 /*
40 * The index of the first packet in the flow.
41 * INVALID_ARRAY_INDEX indicates an empty flow.
42 */
43 uint32_t start_index;
44};
45
46struct gro_tcp4_item {
47 /*
48 * The first MBUF segment of the packet. If the value
49 * is NULL, it means the item is empty.
50 */
51 struct rte_mbuf *firstseg;
52 /* The last MBUF segment of the packet */
53 struct rte_mbuf *lastseg;
54 /*
55 * The time when the first packet is inserted into the table.
56 * This value won't be updated, even if the packet is merged
57 * with other packets.
58 */
59 uint64_t start_time;
60 /*
61 * next_pkt_idx is used to chain the packets that
62 * are in the same flow but can't be merged together
63 * (e.g. caused by packet reordering).
64 */
65 uint32_t next_pkt_idx;
66 /* TCP sequence number of the packet */
67 uint32_t sent_seq;
68 /* IPv4 ID of the packet */
69 uint16_t ip_id;
70 /* the number of merged packets */
71 uint16_t nb_merged;
72 /* Indicate if IPv4 ID can be ignored */
73 uint8_t is_atomic;
74};
75
76/*
77 * TCP/IPv4 reassembly table structure.
78 */
79struct gro_tcp4_tbl {
80 /* item array */
81 struct gro_tcp4_item *items;
82 /* flow array */
83 struct gro_tcp4_flow *flows;
84 /* current item number */
85 uint32_t item_num;
86 /* current flow num */
87 uint32_t flow_num;
88 /* item array size */
89 uint32_t max_item_num;
90 /* flow array size */
91 uint32_t max_flow_num;
92};
93
94/**
95 * This function creates a TCP/IPv4 reassembly table.
96 *
97 * @param socket_id
98 * Socket index for allocating the TCP/IPv4 reassemble table
99 * @param max_flow_num
100 * The maximum number of flows in the TCP/IPv4 GRO table
101 * @param max_item_per_flow
102 * The maximum number of packets per flow
103 *
104 * @return
105 * - Return the table pointer on success.
106 * - Return NULL on failure.
107 */
108void *gro_tcp4_tbl_create(uint16_t socket_id,
109 uint16_t max_flow_num,
110 uint16_t max_item_per_flow);
111
112/**
113 * This function destroys a TCP/IPv4 reassembly table.
114 *
115 * @param tbl
116 * Pointer pointing to the TCP/IPv4 reassembly table.
117 */
118void gro_tcp4_tbl_destroy(void *tbl);
119
120/**
121 * This function merges a TCP/IPv4 packet. It doesn't process the packet,
122 * which has SYN, FIN, RST, PSH, CWR, ECE or URG set, or doesn't have
123 * payload.
124 *
125 * This function doesn't check if the packet has correct checksums and
126 * doesn't re-calculate checksums for the merged packet. Additionally,
127 * it assumes the packets are complete (i.e., MF==0 && frag_off==0),
128 * when IP fragmentation is possible (i.e., DF==0). It returns the
129 * packet, if the packet has invalid parameters (e.g. SYN bit is set)
130 * or there is no available space in the table.
131 *
132 * @param pkt
133 * Packet to reassemble
134 * @param tbl
135 * Pointer pointing to the TCP/IPv4 reassembly table
136 * @start_time
137 * The time when the packet is inserted into the table
138 *
139 * @return
140 * - Return a positive value if the packet is merged.
141 * - Return zero if the packet isn't merged but stored in the table.
142 * - Return a negative value for invalid parameters or no available
143 * space in the table.
144 */
145int32_t gro_tcp4_reassemble(struct rte_mbuf *pkt,
146 struct gro_tcp4_tbl *tbl,
147 uint64_t start_time);
148
149/**
150 * This function flushes timeout packets in a TCP/IPv4 reassembly table,
151 * and without updating checksums.
152 *
153 * @param tbl
154 * TCP/IPv4 reassembly table pointer
155 * @param flush_timestamp
156 * Flush packets which are inserted into the table before or at the
157 * flush_timestamp.
158 * @param out
159 * Pointer array used to keep flushed packets
160 * @param nb_out
161 * The element number in 'out'. It also determines the maximum number of
162 * packets that can be flushed finally.
163 *
164 * @return
165 * The number of flushed packets
166 */
167uint16_t gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl *tbl,
168 uint64_t flush_timestamp,
169 struct rte_mbuf **out,
170 uint16_t nb_out);
171
172/**
173 * This function returns the number of the packets in a TCP/IPv4
174 * reassembly table.
175 *
176 * @param tbl
177 * TCP/IPv4 reassembly table pointer
178 *
179 * @return
180 * The number of packets in the table
181 */
182uint32_t gro_tcp4_tbl_pkt_count(void *tbl);
183
184/*
185 * Check if two TCP/IPv4 packets belong to the same flow.
186 */
187static inline int
188is_same_tcp4_flow(struct tcp4_flow_key k1, struct tcp4_flow_key k2)
189{
190 return (is_same_ether_addr(&k1.eth_saddr, &k2.eth_saddr) &&
191 is_same_ether_addr(&k1.eth_daddr, &k2.eth_daddr) &&
192 (k1.ip_src_addr == k2.ip_src_addr) &&
193 (k1.ip_dst_addr == k2.ip_dst_addr) &&
194 (k1.recv_ack == k2.recv_ack) &&
195 (k1.src_port == k2.src_port) &&
196 (k1.dst_port == k2.dst_port));
197}
198
199/*
200 * Merge two TCP/IPv4 packets without updating checksums.
201 * If cmp is larger than 0, append the new packet to the
202 * original packet. Otherwise, pre-pend the new packet to
203 * the original packet.
204 */
205static inline int
206merge_two_tcp4_packets(struct gro_tcp4_item *item,
207 struct rte_mbuf *pkt,
208 int cmp,
209 uint32_t sent_seq,
210 uint16_t ip_id,
211 uint16_t l2_offset)
212{
213 struct rte_mbuf *pkt_head, *pkt_tail, *lastseg;
214 uint16_t hdr_len, l2_len;
215
216 if (cmp > 0) {
217 pkt_head = item->firstseg;
218 pkt_tail = pkt;
219 } else {
220 pkt_head = pkt;
221 pkt_tail = item->firstseg;
222 }
223
224 /* check if the IPv4 packet length is greater than the max value */
225 hdr_len = l2_offset + pkt_head->l2_len + pkt_head->l3_len +
226 pkt_head->l4_len;
227 l2_len = l2_offset > 0 ? pkt_head->outer_l2_len : pkt_head->l2_len;
228 if (unlikely(pkt_head->pkt_len - l2_len + pkt_tail->pkt_len -
229 hdr_len > MAX_IPV4_PKT_LENGTH))
230 return 0;
231
232 /* remove the packet header for the tail packet */
233 rte_pktmbuf_adj(pkt_tail, hdr_len);
234
235 /* chain two packets together */
236 if (cmp > 0) {
237 item->lastseg->next = pkt;
238 item->lastseg = rte_pktmbuf_lastseg(pkt);
239 /* update IP ID to the larger value */
240 item->ip_id = ip_id;
241 } else {
242 lastseg = rte_pktmbuf_lastseg(pkt);
243 lastseg->next = item->firstseg;
244 item->firstseg = pkt;
245 /* update sent_seq to the smaller value */
246 item->sent_seq = sent_seq;
247 item->ip_id = ip_id;
248 }
249 item->nb_merged++;
250
251 /* update MBUF metadata for the merged packet */
252 pkt_head->nb_segs += pkt_tail->nb_segs;
253 pkt_head->pkt_len += pkt_tail->pkt_len;
254
255 return 1;
256}
257
258/*
259 * Check if two TCP/IPv4 packets are neighbors.
260 */
261static inline int
262check_seq_option(struct gro_tcp4_item *item,
263 struct tcp_hdr *tcph,
264 uint32_t sent_seq,
265 uint16_t ip_id,
266 uint16_t tcp_hl,
267 uint16_t tcp_dl,
268 uint16_t l2_offset,
269 uint8_t is_atomic)
270{
271 struct rte_mbuf *pkt_orig = item->firstseg;
272 struct ipv4_hdr *iph_orig;
273 struct tcp_hdr *tcph_orig;
274 uint16_t len, tcp_hl_orig;
275
276 iph_orig = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt_orig, char *) +
277 l2_offset + pkt_orig->l2_len);
278 tcph_orig = (struct tcp_hdr *)((char *)iph_orig + pkt_orig->l3_len);
279 tcp_hl_orig = pkt_orig->l4_len;
280
281 /* Check if TCP option fields equal */
282 len = RTE_MAX(tcp_hl, tcp_hl_orig) - sizeof(struct tcp_hdr);
283 if ((tcp_hl != tcp_hl_orig) || ((len > 0) &&
284 (memcmp(tcph + 1, tcph_orig + 1,
285 len) != 0)))
286 return 0;
287
288 /* Don't merge packets whose DF bits are different */
289 if (unlikely(item->is_atomic ^ is_atomic))
290 return 0;
291
292 /* check if the two packets are neighbors */
293 len = pkt_orig->pkt_len - l2_offset - pkt_orig->l2_len -
294 pkt_orig->l3_len - tcp_hl_orig;
295 if ((sent_seq == item->sent_seq + len) && (is_atomic ||
296 (ip_id == item->ip_id + 1)))
297 /* append the new packet */
298 return 1;
299 else if ((sent_seq + tcp_dl == item->sent_seq) && (is_atomic ||
300 (ip_id + item->nb_merged == item->ip_id)))
301 /* pre-pend the new packet */
302 return -1;
303
304 return 0;
305}
306#endif