]> git.proxmox.com Git - mirror_qemu.git/blame - hw/net/net_tx_pkt.c
Merge tag 'net-pull-request' of https://github.com/jasowang/qemu into staging
[mirror_qemu.git] / hw / net / net_tx_pkt.c
CommitLineData
e263cd49 1/*
605d52e6 2 * QEMU TX packets abstractions
e263cd49
DF
3 *
4 * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
5 *
6 * Developed by Daynix Computing LTD (http://www.daynix.com)
7 *
8 * Authors:
9 * Dmitry Fleytman <dmitry@daynix.com>
10 * Tamir Shomer <tamirs@daynix.com>
11 * Yan Vugenfirer <yan@daynix.com>
12 *
13 * This work is licensed under the terms of the GNU GPL, version 2 or later.
14 * See the COPYING file in the top-level directory.
15 *
16 */
17
e9abfcb5 18#include "qemu/osdep.h"
f199b13b 19#include "qemu/crc32c.h"
e263cd49 20#include "net/eth.h"
e263cd49
DF
21#include "net/checksum.h"
22#include "net/tap.h"
23#include "net/net.h"
edf5ca5d 24#include "hw/pci/pci_device.h"
a51db580 25#include "net_tx_pkt.h"
e263cd49
DF
26
27enum {
605d52e6
DF
28 NET_TX_PKT_VHDR_FRAG = 0,
29 NET_TX_PKT_L2HDR_FRAG,
30 NET_TX_PKT_L3HDR_FRAG,
31 NET_TX_PKT_PL_START_FRAG
e263cd49
DF
32};
33
34/* TX packet private context */
605d52e6 35struct NetTxPkt {
e263cd49 36 struct virtio_net_hdr virt_hdr;
e263cd49
DF
37
38 struct iovec *raw;
39 uint32_t raw_frags;
40 uint32_t max_raw_frags;
41
42 struct iovec *vec;
43
aaa8a15c
AO
44 struct {
45 struct eth_header eth;
46 struct vlan_header vlan[3];
47 } l2_hdr;
2a5f744e
AO
48 union {
49 struct ip_header ip;
50 struct ip6_header ip6;
51 uint8_t octets[ETH_MAX_IP_DGRAM_LEN];
52 } l3_hdr;
e263cd49
DF
53
54 uint32_t payload_len;
55
56 uint32_t payload_frags;
57 uint32_t max_payload_frags;
58
59 uint16_t hdr_len;
60 eth_pkt_types_e packet_type;
61 uint8_t l4proto;
62};
63
a51db580 64void net_tx_pkt_init(struct NetTxPkt **pkt, uint32_t max_frags)
e263cd49 65{
605d52e6 66 struct NetTxPkt *p = g_malloc0(sizeof *p);
e263cd49 67
47882fa4 68 p->vec = g_new(struct iovec, max_frags + NET_TX_PKT_PL_START_FRAG);
e263cd49 69
47882fa4 70 p->raw = g_new(struct iovec, max_frags);
e263cd49
DF
71
72 p->max_payload_frags = max_frags;
73 p->max_raw_frags = max_frags;
605d52e6 74 p->vec[NET_TX_PKT_VHDR_FRAG].iov_base = &p->virt_hdr;
55daf493 75 p->vec[NET_TX_PKT_VHDR_FRAG].iov_len = sizeof p->virt_hdr;
605d52e6 76 p->vec[NET_TX_PKT_L2HDR_FRAG].iov_base = &p->l2_hdr;
eb700029 77 p->vec[NET_TX_PKT_L3HDR_FRAG].iov_base = &p->l3_hdr;
e263cd49
DF
78
79 *pkt = p;
80}
81
605d52e6 82void net_tx_pkt_uninit(struct NetTxPkt *pkt)
e263cd49
DF
83{
84 if (pkt) {
85 g_free(pkt->vec);
86 g_free(pkt->raw);
87 g_free(pkt);
88 }
89}
90
eb700029 91void net_tx_pkt_update_ip_hdr_checksum(struct NetTxPkt *pkt)
e263cd49
DF
92{
93 uint16_t csum;
e263cd49 94 assert(pkt);
e263cd49 95
2a5f744e 96 pkt->l3_hdr.ip.ip_len = cpu_to_be16(pkt->payload_len +
605d52e6 97 pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len);
e263cd49 98
2a5f744e
AO
99 pkt->l3_hdr.ip.ip_sum = 0;
100 csum = net_raw_checksum(pkt->l3_hdr.octets,
605d52e6 101 pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len);
2a5f744e 102 pkt->l3_hdr.ip.ip_sum = cpu_to_be16(csum);
eb700029
DF
103}
104
105void net_tx_pkt_update_ip_checksums(struct NetTxPkt *pkt)
106{
107 uint16_t csum;
108 uint32_t cntr, cso;
109 assert(pkt);
110 uint8_t gso_type = pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN;
111 void *ip_hdr = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base;
112
113 if (pkt->payload_len + pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len >
114 ETH_MAX_IP_DGRAM_LEN) {
115 return;
116 }
117
118 if (gso_type == VIRTIO_NET_HDR_GSO_TCPV4 ||
119 gso_type == VIRTIO_NET_HDR_GSO_UDP) {
120 /* Calculate IP header checksum */
121 net_tx_pkt_update_ip_hdr_checksum(pkt);
122
123 /* Calculate IP pseudo header checksum */
124 cntr = eth_calc_ip4_pseudo_hdr_csum(ip_hdr, pkt->payload_len, &cso);
125 csum = cpu_to_be16(~net_checksum_finish(cntr));
126 } else if (gso_type == VIRTIO_NET_HDR_GSO_TCPV6) {
127 /* Calculate IP pseudo header checksum */
128 cntr = eth_calc_ip6_pseudo_hdr_csum(ip_hdr, pkt->payload_len,
129 IP_PROTO_TCP, &cso);
130 csum = cpu_to_be16(~net_checksum_finish(cntr));
131 } else {
132 return;
133 }
e263cd49 134
605d52e6 135 iov_from_buf(&pkt->vec[NET_TX_PKT_PL_START_FRAG], pkt->payload_frags,
e263cd49
DF
136 pkt->virt_hdr.csum_offset, &csum, sizeof(csum));
137}
138
f199b13b
AO
139bool net_tx_pkt_update_sctp_checksum(struct NetTxPkt *pkt)
140{
141 uint32_t csum = 0;
142 struct iovec *pl_start_frag = pkt->vec + NET_TX_PKT_PL_START_FRAG;
143
144 if (iov_from_buf(pl_start_frag, pkt->payload_frags, 8, &csum, sizeof(csum)) < sizeof(csum)) {
145 return false;
146 }
147
148 csum = cpu_to_le32(iov_crc32c(0xffffffff, pl_start_frag, pkt->payload_frags));
149 if (iov_from_buf(pl_start_frag, pkt->payload_frags, 8, &csum, sizeof(csum)) < sizeof(csum)) {
150 return false;
151 }
152
153 return true;
154}
155
605d52e6 156static void net_tx_pkt_calculate_hdr_len(struct NetTxPkt *pkt)
e263cd49 157{
605d52e6
DF
158 pkt->hdr_len = pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len +
159 pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len;
e263cd49
DF
160}
161
605d52e6 162static bool net_tx_pkt_parse_headers(struct NetTxPkt *pkt)
e263cd49
DF
163{
164 struct iovec *l2_hdr, *l3_hdr;
165 size_t bytes_read;
166 size_t full_ip6hdr_len;
167 uint16_t l3_proto;
168
169 assert(pkt);
170
605d52e6
DF
171 l2_hdr = &pkt->vec[NET_TX_PKT_L2HDR_FRAG];
172 l3_hdr = &pkt->vec[NET_TX_PKT_L3HDR_FRAG];
e263cd49
DF
173
174 bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, 0, l2_hdr->iov_base,
175 ETH_MAX_L2_HDR_LEN);
a7278b36
DR
176 if (bytes_read < sizeof(struct eth_header)) {
177 l2_hdr->iov_len = 0;
178 return false;
179 }
180
181 l2_hdr->iov_len = sizeof(struct eth_header);
182 switch (be16_to_cpu(PKT_GET_ETH_HDR(l2_hdr->iov_base)->h_proto)) {
183 case ETH_P_VLAN:
184 l2_hdr->iov_len += sizeof(struct vlan_header);
185 break;
186 case ETH_P_DVLAN:
187 l2_hdr->iov_len += 2 * sizeof(struct vlan_header);
188 break;
189 }
190
191 if (bytes_read < l2_hdr->iov_len) {
e263cd49 192 l2_hdr->iov_len = 0;
eb700029
DF
193 l3_hdr->iov_len = 0;
194 pkt->packet_type = ETH_PKT_UCAST;
e263cd49 195 return false;
eb700029
DF
196 } else {
197 l2_hdr->iov_len = ETH_MAX_L2_HDR_LEN;
198 l2_hdr->iov_len = eth_get_l2_hdr_length(l2_hdr->iov_base);
199 pkt->packet_type = get_eth_packet_type(l2_hdr->iov_base);
e263cd49
DF
200 }
201
eb700029 202 l3_proto = eth_get_l3_proto(l2_hdr, 1, l2_hdr->iov_len);
e263cd49
DF
203
204 switch (l3_proto) {
205 case ETH_P_IP:
e263cd49
DF
206 bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, l2_hdr->iov_len,
207 l3_hdr->iov_base, sizeof(struct ip_header));
208
209 if (bytes_read < sizeof(struct ip_header)) {
210 l3_hdr->iov_len = 0;
211 return false;
212 }
213
214 l3_hdr->iov_len = IP_HDR_GET_LEN(l3_hdr->iov_base);
e263cd49 215
eb700029 216 if (l3_hdr->iov_len < sizeof(struct ip_header)) {
e263cd49
DF
217 l3_hdr->iov_len = 0;
218 return false;
219 }
eb700029 220
4f51e1d3 221 pkt->l4proto = IP_HDR_GET_P(l3_hdr->iov_base);
eb700029
DF
222
223 if (IP_HDR_GET_LEN(l3_hdr->iov_base) != sizeof(struct ip_header)) {
224 /* copy optional IPv4 header data if any*/
225 bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags,
226 l2_hdr->iov_len + sizeof(struct ip_header),
227 l3_hdr->iov_base + sizeof(struct ip_header),
228 l3_hdr->iov_len - sizeof(struct ip_header));
229 if (bytes_read < l3_hdr->iov_len - sizeof(struct ip_header)) {
230 l3_hdr->iov_len = 0;
231 return false;
232 }
233 }
234
e263cd49
DF
235 break;
236
237 case ETH_P_IPV6:
eb700029
DF
238 {
239 eth_ip6_hdr_info hdrinfo;
240
e263cd49 241 if (!eth_parse_ipv6_hdr(pkt->raw, pkt->raw_frags, l2_hdr->iov_len,
eb700029 242 &hdrinfo)) {
e263cd49
DF
243 l3_hdr->iov_len = 0;
244 return false;
245 }
246
eb700029
DF
247 pkt->l4proto = hdrinfo.l4proto;
248 full_ip6hdr_len = hdrinfo.full_hdr_len;
249
250 if (full_ip6hdr_len > ETH_MAX_IP_DGRAM_LEN) {
251 l3_hdr->iov_len = 0;
252 return false;
253 }
e263cd49
DF
254
255 bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, l2_hdr->iov_len,
256 l3_hdr->iov_base, full_ip6hdr_len);
257
258 if (bytes_read < full_ip6hdr_len) {
259 l3_hdr->iov_len = 0;
260 return false;
261 } else {
262 l3_hdr->iov_len = full_ip6hdr_len;
263 }
264 break;
eb700029 265 }
e263cd49
DF
266 default:
267 l3_hdr->iov_len = 0;
268 break;
269 }
270
605d52e6 271 net_tx_pkt_calculate_hdr_len(pkt);
e263cd49
DF
272 return true;
273}
274
eb700029 275static void net_tx_pkt_rebuild_payload(struct NetTxPkt *pkt)
e263cd49 276{
eb700029 277 pkt->payload_len = iov_size(pkt->raw, pkt->raw_frags) - pkt->hdr_len;
605d52e6 278 pkt->payload_frags = iov_copy(&pkt->vec[NET_TX_PKT_PL_START_FRAG],
e263cd49
DF
279 pkt->max_payload_frags,
280 pkt->raw, pkt->raw_frags,
eb700029
DF
281 pkt->hdr_len, pkt->payload_len);
282}
e263cd49 283
eb700029
DF
284bool net_tx_pkt_parse(struct NetTxPkt *pkt)
285{
286 if (net_tx_pkt_parse_headers(pkt)) {
287 net_tx_pkt_rebuild_payload(pkt);
e263cd49
DF
288 return true;
289 } else {
290 return false;
291 }
292}
293
605d52e6 294struct virtio_net_hdr *net_tx_pkt_get_vhdr(struct NetTxPkt *pkt)
e263cd49
DF
295{
296 assert(pkt);
297 return &pkt->virt_hdr;
298}
299
605d52e6 300static uint8_t net_tx_pkt_get_gso_type(struct NetTxPkt *pkt,
e263cd49
DF
301 bool tso_enable)
302{
303 uint8_t rc = VIRTIO_NET_HDR_GSO_NONE;
304 uint16_t l3_proto;
305
eb700029 306 l3_proto = eth_get_l3_proto(&pkt->vec[NET_TX_PKT_L2HDR_FRAG], 1,
605d52e6 307 pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len);
e263cd49
DF
308
309 if (!tso_enable) {
310 goto func_exit;
311 }
312
605d52e6 313 rc = eth_get_gso_type(l3_proto, pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base,
e263cd49
DF
314 pkt->l4proto);
315
316func_exit:
317 return rc;
318}
319
f9a9eb16 320bool net_tx_pkt_build_vheader(struct NetTxPkt *pkt, bool tso_enable,
e263cd49
DF
321 bool csum_enable, uint32_t gso_size)
322{
323 struct tcp_hdr l4hdr;
f9a9eb16 324 size_t bytes_read;
e263cd49
DF
325 assert(pkt);
326
327 /* csum has to be enabled if tso is. */
328 assert(csum_enable || !tso_enable);
329
605d52e6 330 pkt->virt_hdr.gso_type = net_tx_pkt_get_gso_type(pkt, tso_enable);
e263cd49
DF
331
332 switch (pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
333 case VIRTIO_NET_HDR_GSO_NONE:
334 pkt->virt_hdr.hdr_len = 0;
335 pkt->virt_hdr.gso_size = 0;
336 break;
337
338 case VIRTIO_NET_HDR_GSO_UDP:
eb700029 339 pkt->virt_hdr.gso_size = gso_size;
e263cd49
DF
340 pkt->virt_hdr.hdr_len = pkt->hdr_len + sizeof(struct udp_header);
341 break;
342
343 case VIRTIO_NET_HDR_GSO_TCPV4:
344 case VIRTIO_NET_HDR_GSO_TCPV6:
f9a9eb16
AO
345 bytes_read = iov_to_buf(&pkt->vec[NET_TX_PKT_PL_START_FRAG],
346 pkt->payload_frags, 0, &l4hdr, sizeof(l4hdr));
02ef5fdc
AO
347 if (bytes_read < sizeof(l4hdr) ||
348 l4hdr.th_off * sizeof(uint32_t) < sizeof(l4hdr)) {
f9a9eb16
AO
349 return false;
350 }
351
e263cd49 352 pkt->virt_hdr.hdr_len = pkt->hdr_len + l4hdr.th_off * sizeof(uint32_t);
eb700029 353 pkt->virt_hdr.gso_size = gso_size;
e263cd49
DF
354 break;
355
356 default:
dfc6f865 357 g_assert_not_reached();
e263cd49
DF
358 }
359
360 if (csum_enable) {
361 switch (pkt->l4proto) {
362 case IP_PROTO_TCP:
dd32b5ea
AO
363 if (pkt->payload_len < sizeof(struct tcp_hdr)) {
364 return false;
365 }
e263cd49
DF
366 pkt->virt_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
367 pkt->virt_hdr.csum_start = pkt->hdr_len;
368 pkt->virt_hdr.csum_offset = offsetof(struct tcp_hdr, th_sum);
369 break;
370 case IP_PROTO_UDP:
dd32b5ea
AO
371 if (pkt->payload_len < sizeof(struct udp_hdr)) {
372 return false;
373 }
e263cd49
DF
374 pkt->virt_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
375 pkt->virt_hdr.csum_start = pkt->hdr_len;
376 pkt->virt_hdr.csum_offset = offsetof(struct udp_hdr, uh_sum);
377 break;
378 default:
379 break;
380 }
381 }
f9a9eb16
AO
382
383 return true;
e263cd49
DF
384}
385
eb700029
DF
386void net_tx_pkt_setup_vlan_header_ex(struct NetTxPkt *pkt,
387 uint16_t vlan, uint16_t vlan_ethtype)
e263cd49 388{
e263cd49
DF
389 assert(pkt);
390
0b117830 391 eth_setup_vlan_headers(pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_base,
aaa8a15c
AO
392 &pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len,
393 vlan, vlan_ethtype);
e263cd49 394
aaa8a15c 395 pkt->hdr_len += sizeof(struct vlan_header);
e263cd49
DF
396}
397
a51db580 398bool net_tx_pkt_add_raw_fragment(struct NetTxPkt *pkt, void *base, size_t len)
e263cd49 399{
e263cd49
DF
400 struct iovec *ventry;
401 assert(pkt);
035e69b0
MMC
402
403 if (pkt->raw_frags >= pkt->max_raw_frags) {
404 return false;
405 }
e263cd49 406
e263cd49 407 ventry = &pkt->raw[pkt->raw_frags];
163246e1
AO
408 ventry->iov_base = base;
409 ventry->iov_len = len;
410 pkt->raw_frags++;
e263cd49 411
163246e1 412 return true;
eb700029 413}
e263cd49 414
eb700029
DF
415bool net_tx_pkt_has_fragments(struct NetTxPkt *pkt)
416{
417 return pkt->raw_frags > 0;
e263cd49
DF
418}
419
605d52e6 420eth_pkt_types_e net_tx_pkt_get_packet_type(struct NetTxPkt *pkt)
e263cd49
DF
421{
422 assert(pkt);
423
424 return pkt->packet_type;
425}
426
605d52e6 427size_t net_tx_pkt_get_total_len(struct NetTxPkt *pkt)
e263cd49
DF
428{
429 assert(pkt);
430
431 return pkt->hdr_len + pkt->payload_len;
432}
433
605d52e6 434void net_tx_pkt_dump(struct NetTxPkt *pkt)
e263cd49 435{
605d52e6 436#ifdef NET_TX_PKT_DEBUG
e263cd49
DF
437 assert(pkt);
438
439 printf("TX PKT: hdr_len: %d, pkt_type: 0x%X, l2hdr_len: %lu, "
440 "l3hdr_len: %lu, payload_len: %u\n", pkt->hdr_len, pkt->packet_type,
605d52e6
DF
441 pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len,
442 pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len, pkt->payload_len);
e263cd49
DF
443#endif
444}
445
a51db580
AO
446void net_tx_pkt_reset(struct NetTxPkt *pkt,
447 NetTxPktFreeFrag callback, void *context)
e263cd49
DF
448{
449 int i;
450
451 /* no assert, as reset can be called before tx_pkt_init */
452 if (!pkt) {
453 return;
454 }
455
456 memset(&pkt->virt_hdr, 0, sizeof(pkt->virt_hdr));
457
e263cd49 458 assert(pkt->vec);
eb700029 459
e263cd49
DF
460 pkt->payload_len = 0;
461 pkt->payload_frags = 0;
462
283f0a05
TH
463 if (pkt->max_raw_frags > 0) {
464 assert(pkt->raw);
465 for (i = 0; i < pkt->raw_frags; i++) {
466 assert(pkt->raw[i].iov_base);
a51db580 467 callback(context, pkt->raw[i].iov_base, pkt->raw[i].iov_len);
283f0a05 468 }
e263cd49
DF
469 }
470 pkt->raw_frags = 0;
471
472 pkt->hdr_len = 0;
e263cd49
DF
473 pkt->l4proto = 0;
474}
475
163246e1
AO
476void net_tx_pkt_unmap_frag_pci(void *context, void *base, size_t len)
477{
478 pci_dma_unmap(context, base, len, DMA_DIRECTION_TO_DEVICE, 0);
479}
480
a51db580
AO
481bool net_tx_pkt_add_raw_fragment_pci(struct NetTxPkt *pkt, PCIDevice *pci_dev,
482 dma_addr_t pa, size_t len)
163246e1
AO
483{
484 dma_addr_t mapped_len = len;
a51db580 485 void *base = pci_dma_map(pci_dev, pa, &mapped_len, DMA_DIRECTION_TO_DEVICE);
163246e1
AO
486 if (!base) {
487 return false;
488 }
489
a51db580
AO
490 if (mapped_len != len || !net_tx_pkt_add_raw_fragment(pkt, base, len)) {
491 net_tx_pkt_unmap_frag_pci(pci_dev, base, mapped_len);
163246e1
AO
492 return false;
493 }
494
495 return true;
496}
497
02ef5fdc
AO
498static void net_tx_pkt_do_sw_csum(struct NetTxPkt *pkt,
499 struct iovec *iov, uint32_t iov_len,
500 uint16_t csl)
e263cd49 501{
e263cd49
DF
502 uint32_t csum_cntr;
503 uint16_t csum = 0;
eb700029 504 uint32_t cso;
e263cd49 505 /* num of iovec without vhdr */
e263cd49 506 size_t csum_offset = pkt->virt_hdr.csum_start + pkt->virt_hdr.csum_offset;
9a8d9492 507 uint16_t l3_proto = eth_get_l3_proto(iov, 1, iov->iov_len);
e263cd49
DF
508
509 /* Put zero to checksum field */
510 iov_from_buf(iov, iov_len, csum_offset, &csum, sizeof csum);
511
512 /* Calculate L4 TCP/UDP checksum */
9a8d9492
A
513 csum_cntr = 0;
514 cso = 0;
e263cd49 515 /* add pseudo header to csum */
9a8d9492
A
516 if (l3_proto == ETH_P_IP) {
517 csum_cntr = eth_calc_ip4_pseudo_hdr_csum(
518 pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base,
519 csl, &cso);
520 } else if (l3_proto == ETH_P_IPV6) {
521 csum_cntr = eth_calc_ip6_pseudo_hdr_csum(
522 pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base,
523 csl, pkt->l4proto, &cso);
524 }
eb700029
DF
525
526 /* data checksum */
527 csum_cntr +=
528 net_checksum_add_iov(iov, iov_len, pkt->virt_hdr.csum_start, csl, cso);
e263cd49
DF
529
530 /* Put the checksum obtained into the packet */
0dacea92 531 csum = cpu_to_be16(net_checksum_finish_nozero(csum_cntr));
e263cd49
DF
532 iov_from_buf(iov, iov_len, csum_offset, &csum, sizeof csum);
533}
534
605d52e6 535#define NET_MAX_FRAG_SG_LIST (64)
e263cd49 536
605d52e6 537static size_t net_tx_pkt_fetch_fragment(struct NetTxPkt *pkt,
02ef5fdc
AO
538 int *src_idx, size_t *src_offset, size_t src_len,
539 struct iovec *dst, int *dst_idx)
e263cd49
DF
540{
541 size_t fetched = 0;
542 struct iovec *src = pkt->vec;
543
02ef5fdc 544 while (fetched < src_len) {
e263cd49
DF
545
546 /* no more place in fragment iov */
605d52e6 547 if (*dst_idx == NET_MAX_FRAG_SG_LIST) {
e263cd49
DF
548 break;
549 }
550
551 /* no more data in iovec */
605d52e6 552 if (*src_idx == (pkt->payload_frags + NET_TX_PKT_PL_START_FRAG)) {
e263cd49
DF
553 break;
554 }
555
556
557 dst[*dst_idx].iov_base = src[*src_idx].iov_base + *src_offset;
558 dst[*dst_idx].iov_len = MIN(src[*src_idx].iov_len - *src_offset,
02ef5fdc 559 src_len - fetched);
e263cd49
DF
560
561 *src_offset += dst[*dst_idx].iov_len;
562 fetched += dst[*dst_idx].iov_len;
563
564 if (*src_offset == src[*src_idx].iov_len) {
565 *src_offset = 0;
566 (*src_idx)++;
567 }
568
569 (*dst_idx)++;
570 }
571
572 return fetched;
573}
574
ffbd2dbd
AO
575static void net_tx_pkt_sendv(
576 void *opaque, const struct iovec *iov, int iov_cnt,
577 const struct iovec *virt_iov, int virt_iov_cnt)
eb700029 578{
ffbd2dbd
AO
579 NetClientState *nc = opaque;
580
581 if (qemu_get_using_vnet_hdr(nc->peer)) {
582 qemu_sendv_packet(nc, virt_iov, virt_iov_cnt);
eb700029
DF
583 } else {
584 qemu_sendv_packet(nc, iov, iov_cnt);
585 }
586}
587
02ef5fdc
AO
588static bool net_tx_pkt_tcp_fragment_init(struct NetTxPkt *pkt,
589 struct iovec *fragment,
590 int *pl_idx,
591 size_t *l4hdr_len,
592 int *src_idx,
593 size_t *src_offset,
594 size_t *src_len)
595{
596 struct iovec *l4 = fragment + NET_TX_PKT_PL_START_FRAG;
597 size_t bytes_read = 0;
598 struct tcp_hdr *th;
599
600 if (!pkt->payload_frags) {
601 return false;
602 }
603
604 l4->iov_len = pkt->virt_hdr.hdr_len - pkt->hdr_len;
605 l4->iov_base = g_malloc(l4->iov_len);
606
607 *src_idx = NET_TX_PKT_PL_START_FRAG;
608 while (pkt->vec[*src_idx].iov_len < l4->iov_len - bytes_read) {
609 memcpy((char *)l4->iov_base + bytes_read, pkt->vec[*src_idx].iov_base,
610 pkt->vec[*src_idx].iov_len);
611
612 bytes_read += pkt->vec[*src_idx].iov_len;
613
614 (*src_idx)++;
615 if (*src_idx >= pkt->payload_frags + NET_TX_PKT_PL_START_FRAG) {
616 g_free(l4->iov_base);
617 return false;
618 }
619 }
620
621 *src_offset = l4->iov_len - bytes_read;
622 memcpy((char *)l4->iov_base + bytes_read, pkt->vec[*src_idx].iov_base,
623 *src_offset);
624
625 th = l4->iov_base;
626 th->th_flags &= ~(TH_FIN | TH_PUSH);
627
628 *pl_idx = NET_TX_PKT_PL_START_FRAG + 1;
629 *l4hdr_len = l4->iov_len;
630 *src_len = pkt->virt_hdr.gso_size;
631
632 return true;
633}
634
635static void net_tx_pkt_tcp_fragment_deinit(struct iovec *fragment)
636{
637 g_free(fragment[NET_TX_PKT_PL_START_FRAG].iov_base);
638}
639
640static void net_tx_pkt_tcp_fragment_fix(struct NetTxPkt *pkt,
641 struct iovec *fragment,
642 size_t fragment_len,
643 uint8_t gso_type)
644{
645 struct iovec *l3hdr = fragment + NET_TX_PKT_L3HDR_FRAG;
646 struct iovec *l4hdr = fragment + NET_TX_PKT_PL_START_FRAG;
647 struct ip_header *ip = l3hdr->iov_base;
648 struct ip6_header *ip6 = l3hdr->iov_base;
649 size_t len = l3hdr->iov_len + l4hdr->iov_len + fragment_len;
650
651 switch (gso_type) {
652 case VIRTIO_NET_HDR_GSO_TCPV4:
653 ip->ip_len = cpu_to_be16(len);
654 eth_fix_ip4_checksum(l3hdr->iov_base, l3hdr->iov_len);
655 break;
656
657 case VIRTIO_NET_HDR_GSO_TCPV6:
658 len -= sizeof(struct ip6_header);
659 ip6->ip6_ctlun.ip6_un1.ip6_un1_plen = cpu_to_be16(len);
660 break;
661 }
662}
663
664static void net_tx_pkt_tcp_fragment_advance(struct NetTxPkt *pkt,
665 struct iovec *fragment,
666 size_t fragment_len,
667 uint8_t gso_type)
668{
669 struct iovec *l3hdr = fragment + NET_TX_PKT_L3HDR_FRAG;
670 struct iovec *l4hdr = fragment + NET_TX_PKT_PL_START_FRAG;
671 struct ip_header *ip = l3hdr->iov_base;
672 struct tcp_hdr *th = l4hdr->iov_base;
673
674 if (gso_type == VIRTIO_NET_HDR_GSO_TCPV4) {
675 ip->ip_id = cpu_to_be16(be16_to_cpu(ip->ip_id) + 1);
676 }
677
678 th->th_seq = cpu_to_be32(be32_to_cpu(th->th_seq) + fragment_len);
679 th->th_flags &= ~TH_CWR;
680}
681
682static void net_tx_pkt_udp_fragment_init(struct NetTxPkt *pkt,
683 int *pl_idx,
684 size_t *l4hdr_len,
685 int *src_idx, size_t *src_offset,
686 size_t *src_len)
687{
688 *pl_idx = NET_TX_PKT_PL_START_FRAG;
689 *l4hdr_len = 0;
690 *src_idx = NET_TX_PKT_PL_START_FRAG;
691 *src_offset = 0;
692 *src_len = IP_FRAG_ALIGN_SIZE(pkt->virt_hdr.gso_size);
693}
694
695static void net_tx_pkt_udp_fragment_fix(struct NetTxPkt *pkt,
696 struct iovec *fragment,
697 size_t fragment_offset,
698 size_t fragment_len)
699{
700 bool more_frags = fragment_offset + fragment_len < pkt->payload_len;
701 uint16_t orig_flags;
702 struct iovec *l3hdr = fragment + NET_TX_PKT_L3HDR_FRAG;
703 struct ip_header *ip = l3hdr->iov_base;
704 uint16_t frag_off_units = fragment_offset / IP_FRAG_UNIT_SIZE;
705 uint16_t new_ip_off;
706
707 assert(fragment_offset % IP_FRAG_UNIT_SIZE == 0);
708 assert((frag_off_units & ~IP_OFFMASK) == 0);
709
710 orig_flags = be16_to_cpu(ip->ip_off) & ~(IP_OFFMASK | IP_MF);
711 new_ip_off = frag_off_units | orig_flags | (more_frags ? IP_MF : 0);
712 ip->ip_off = cpu_to_be16(new_ip_off);
713 ip->ip_len = cpu_to_be16(l3hdr->iov_len + fragment_len);
714
715 eth_fix_ip4_checksum(l3hdr->iov_base, l3hdr->iov_len);
716}
717
605d52e6 718static bool net_tx_pkt_do_sw_fragmentation(struct NetTxPkt *pkt,
a51db580 719 NetTxPktSend callback,
ffbd2dbd 720 void *context)
e263cd49 721{
02ef5fdc
AO
722 uint8_t gso_type = pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN;
723
605d52e6 724 struct iovec fragment[NET_MAX_FRAG_SG_LIST];
02ef5fdc
AO
725 size_t fragment_len;
726 size_t l4hdr_len;
727 size_t src_len;
728
729 int src_idx, dst_idx, pl_idx;
730 size_t src_offset;
e263cd49 731 size_t fragment_offset = 0;
ffbd2dbd
AO
732 struct virtio_net_hdr virt_hdr = {
733 .flags = pkt->virt_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM ?
734 VIRTIO_NET_HDR_F_DATA_VALID : 0
735 };
e263cd49 736
e263cd49 737 /* Copy headers */
ffbd2dbd
AO
738 fragment[NET_TX_PKT_VHDR_FRAG].iov_base = &virt_hdr;
739 fragment[NET_TX_PKT_VHDR_FRAG].iov_len = sizeof(virt_hdr);
02ef5fdc
AO
740 fragment[NET_TX_PKT_L2HDR_FRAG] = pkt->vec[NET_TX_PKT_L2HDR_FRAG];
741 fragment[NET_TX_PKT_L3HDR_FRAG] = pkt->vec[NET_TX_PKT_L3HDR_FRAG];
e263cd49 742
02ef5fdc
AO
743 switch (gso_type) {
744 case VIRTIO_NET_HDR_GSO_TCPV4:
745 case VIRTIO_NET_HDR_GSO_TCPV6:
746 if (!net_tx_pkt_tcp_fragment_init(pkt, fragment, &pl_idx, &l4hdr_len,
747 &src_idx, &src_offset, &src_len)) {
748 return false;
749 }
750 break;
e263cd49 751
02ef5fdc
AO
752 case VIRTIO_NET_HDR_GSO_UDP:
753 net_tx_pkt_do_sw_csum(pkt, &pkt->vec[NET_TX_PKT_L2HDR_FRAG],
754 pkt->payload_frags + NET_TX_PKT_PL_START_FRAG - 1,
755 pkt->payload_len);
756 net_tx_pkt_udp_fragment_init(pkt, &pl_idx, &l4hdr_len,
757 &src_idx, &src_offset, &src_len);
758 break;
e263cd49 759
02ef5fdc
AO
760 default:
761 abort();
762 }
e263cd49 763
02ef5fdc
AO
764 /* Put as much data as possible and send */
765 while (true) {
766 dst_idx = pl_idx;
767 fragment_len = net_tx_pkt_fetch_fragment(pkt,
768 &src_idx, &src_offset, src_len, fragment, &dst_idx);
769 if (!fragment_len) {
770 break;
771 }
e263cd49 772
02ef5fdc
AO
773 switch (gso_type) {
774 case VIRTIO_NET_HDR_GSO_TCPV4:
775 case VIRTIO_NET_HDR_GSO_TCPV6:
776 net_tx_pkt_tcp_fragment_fix(pkt, fragment, fragment_len, gso_type);
777 net_tx_pkt_do_sw_csum(pkt, fragment + NET_TX_PKT_L2HDR_FRAG,
778 dst_idx - NET_TX_PKT_L2HDR_FRAG,
779 l4hdr_len + fragment_len);
780 break;
781
782 case VIRTIO_NET_HDR_GSO_UDP:
783 net_tx_pkt_udp_fragment_fix(pkt, fragment, fragment_offset,
784 fragment_len);
785 break;
786 }
e263cd49 787
ffbd2dbd
AO
788 callback(context,
789 fragment + NET_TX_PKT_L2HDR_FRAG, dst_idx - NET_TX_PKT_L2HDR_FRAG,
790 fragment + NET_TX_PKT_VHDR_FRAG, dst_idx - NET_TX_PKT_VHDR_FRAG);
e263cd49 791
02ef5fdc
AO
792 if (gso_type == VIRTIO_NET_HDR_GSO_TCPV4 ||
793 gso_type == VIRTIO_NET_HDR_GSO_TCPV6) {
794 net_tx_pkt_tcp_fragment_advance(pkt, fragment, fragment_len,
795 gso_type);
796 }
797
e263cd49 798 fragment_offset += fragment_len;
02ef5fdc 799 }
e263cd49 800
02ef5fdc
AO
801 if (gso_type == VIRTIO_NET_HDR_GSO_TCPV4 ||
802 gso_type == VIRTIO_NET_HDR_GSO_TCPV6) {
803 net_tx_pkt_tcp_fragment_deinit(fragment);
804 }
e263cd49
DF
805
806 return true;
807}
808
605d52e6 809bool net_tx_pkt_send(struct NetTxPkt *pkt, NetClientState *nc)
e263cd49 810{
ffbd2dbd
AO
811 bool offload = qemu_get_using_vnet_hdr(nc->peer);
812 return net_tx_pkt_send_custom(pkt, offload, net_tx_pkt_sendv, nc);
813}
55daf493 814
ffbd2dbd 815bool net_tx_pkt_send_custom(struct NetTxPkt *pkt, bool offload,
a51db580 816 NetTxPktSend callback, void *context)
ffbd2dbd 817{
e263cd49
DF
818 assert(pkt);
819
4cf3a638
AO
820 uint8_t gso_type = pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN;
821
e263cd49
DF
822 /*
823 * Since underlying infrastructure does not support IP datagrams longer
824 * than 64K we should drop such packets and don't even try to send
825 */
4cf3a638 826 if (VIRTIO_NET_HDR_GSO_NONE != gso_type) {
e263cd49
DF
827 if (pkt->payload_len >
828 ETH_MAX_IP_DGRAM_LEN -
605d52e6 829 pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len) {
e263cd49
DF
830 return false;
831 }
832 }
833
4cf3a638 834 if (offload || gso_type == VIRTIO_NET_HDR_GSO_NONE) {
02ef5fdc
AO
835 if (!offload && pkt->virt_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
836 net_tx_pkt_do_sw_csum(pkt, &pkt->vec[NET_TX_PKT_L2HDR_FRAG],
837 pkt->payload_frags + NET_TX_PKT_PL_START_FRAG - 1,
838 pkt->payload_len);
839 }
840
e219d309 841 net_tx_pkt_fix_ip6_payload_len(pkt);
ffbd2dbd
AO
842 callback(context, pkt->vec + NET_TX_PKT_L2HDR_FRAG,
843 pkt->payload_frags + NET_TX_PKT_PL_START_FRAG - NET_TX_PKT_L2HDR_FRAG,
844 pkt->vec + NET_TX_PKT_VHDR_FRAG,
845 pkt->payload_frags + NET_TX_PKT_PL_START_FRAG - NET_TX_PKT_VHDR_FRAG);
e263cd49
DF
846 return true;
847 }
848
ffbd2dbd 849 return net_tx_pkt_do_sw_fragmentation(pkt, callback, context);
eb700029 850}
e219d309
A
851
852void net_tx_pkt_fix_ip6_payload_len(struct NetTxPkt *pkt)
853{
854 struct iovec *l2 = &pkt->vec[NET_TX_PKT_L2HDR_FRAG];
855 if (eth_get_l3_proto(l2, 1, l2->iov_len) == ETH_P_IPV6) {
e219d309
A
856 /*
857 * TODO: if qemu would support >64K packets - add jumbo option check
858 * something like that:
859 * 'if (ip6->ip6_plen == 0 && !has_jumbo_option(ip6)) {'
860 */
2a5f744e 861 if (pkt->l3_hdr.ip6.ip6_plen == 0) {
e219d309 862 if (pkt->payload_len <= ETH_MAX_IP_DGRAM_LEN) {
2a5f744e 863 pkt->l3_hdr.ip6.ip6_plen = htons(pkt->payload_len);
e219d309
A
864 }
865 /*
866 * TODO: if qemu would support >64K packets
867 * add jumbo option for packets greater then 65,535 bytes
868 */
869 }
870 }
871}