]> git.proxmox.com Git - mirror_qemu.git/blob - net/eth.c
Merge tag 'net-pull-request' of https://github.com/jasowang/qemu into staging
[mirror_qemu.git] / net / eth.c
1 /*
2 * QEMU network structures definitions and helper functions
3 *
4 * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
5 *
6 * Developed by Daynix Computing LTD (http://www.daynix.com)
7 *
8 * Authors:
9 * Dmitry Fleytman <dmitry@daynix.com>
10 * Tamir Shomer <tamirs@daynix.com>
11 * Yan Vugenfirer <yan@daynix.com>
12 *
13 * This work is licensed under the terms of the GNU GPL, version 2 or later.
14 * See the COPYING file in the top-level directory.
15 *
16 */
17
18 #include "qemu/osdep.h"
19 #include "qemu/log.h"
20 #include "net/eth.h"
21 #include "net/checksum.h"
22 #include "net/tap.h"
23
24 void eth_setup_vlan_headers(struct eth_header *ehdr, size_t *ehdr_size,
25 uint16_t vlan_tag, uint16_t vlan_ethtype)
26 {
27 struct vlan_header *vhdr = PKT_GET_VLAN_HDR(ehdr);
28
29 memmove(vhdr + 1, vhdr, *ehdr_size - ETH_HLEN);
30 vhdr->h_tci = cpu_to_be16(vlan_tag);
31 vhdr->h_proto = ehdr->h_proto;
32 ehdr->h_proto = cpu_to_be16(vlan_ethtype);
33 *ehdr_size += sizeof(*vhdr);
34 }
35
36 uint8_t
37 eth_get_gso_type(uint16_t l3_proto, uint8_t *l3_hdr, uint8_t l4proto)
38 {
39 uint8_t ecn_state = 0;
40
41 if (l3_proto == ETH_P_IP) {
42 struct ip_header *iphdr = (struct ip_header *) l3_hdr;
43
44 if (IP_HEADER_VERSION(iphdr) == IP_HEADER_VERSION_4) {
45 if (IPTOS_ECN(iphdr->ip_tos) == IPTOS_ECN_CE) {
46 ecn_state = VIRTIO_NET_HDR_GSO_ECN;
47 }
48 if (l4proto == IP_PROTO_TCP) {
49 return VIRTIO_NET_HDR_GSO_TCPV4 | ecn_state;
50 } else if (l4proto == IP_PROTO_UDP) {
51 return VIRTIO_NET_HDR_GSO_UDP | ecn_state;
52 }
53 }
54 } else if (l3_proto == ETH_P_IPV6) {
55 struct ip6_header *ip6hdr = (struct ip6_header *) l3_hdr;
56
57 if (IP6_ECN(ip6hdr->ip6_ecn_acc) == IP6_ECN_CE) {
58 ecn_state = VIRTIO_NET_HDR_GSO_ECN;
59 }
60
61 if (l4proto == IP_PROTO_TCP) {
62 return VIRTIO_NET_HDR_GSO_TCPV6 | ecn_state;
63 }
64 }
65 qemu_log_mask(LOG_UNIMP, "%s: probably not GSO frame, "
66 "unknown L3 protocol: 0x%04"PRIx16"\n", __func__, l3_proto);
67
68 return VIRTIO_NET_HDR_GSO_NONE | ecn_state;
69 }
70
71 uint16_t
72 eth_get_l3_proto(const struct iovec *l2hdr_iov, int iovcnt, size_t l2hdr_len)
73 {
74 uint16_t proto;
75 size_t copied;
76 size_t size = iov_size(l2hdr_iov, iovcnt);
77 size_t proto_offset = l2hdr_len - sizeof(proto);
78
79 if (size < proto_offset) {
80 return ETH_P_UNKNOWN;
81 }
82
83 copied = iov_to_buf(l2hdr_iov, iovcnt, proto_offset,
84 &proto, sizeof(proto));
85
86 return (copied == sizeof(proto)) ? be16_to_cpu(proto) : ETH_P_UNKNOWN;
87 }
88
89 static bool
90 _eth_copy_chunk(size_t input_size,
91 const struct iovec *iov, int iovcnt,
92 size_t offset, size_t length,
93 void *buffer)
94 {
95 size_t copied;
96
97 if (input_size < offset) {
98 return false;
99 }
100
101 copied = iov_to_buf(iov, iovcnt, offset, buffer, length);
102
103 if (copied < length) {
104 return false;
105 }
106
107 return true;
108 }
109
110 static bool
111 _eth_tcp_has_data(bool is_ip4,
112 const struct ip_header *ip4_hdr,
113 const struct ip6_header *ip6_hdr,
114 size_t full_ip6hdr_len,
115 const struct tcp_header *tcp)
116 {
117 uint32_t l4len;
118
119 if (is_ip4) {
120 l4len = be16_to_cpu(ip4_hdr->ip_len) - IP_HDR_GET_LEN(ip4_hdr);
121 } else {
122 size_t opts_len = full_ip6hdr_len - sizeof(struct ip6_header);
123 l4len = be16_to_cpu(ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen) - opts_len;
124 }
125
126 return l4len > TCP_HEADER_DATA_OFFSET(tcp);
127 }
128
129 void eth_get_protocols(const struct iovec *iov, size_t iovcnt, size_t iovoff,
130 bool *hasip4, bool *hasip6,
131 size_t *l3hdr_off,
132 size_t *l4hdr_off,
133 size_t *l5hdr_off,
134 eth_ip6_hdr_info *ip6hdr_info,
135 eth_ip4_hdr_info *ip4hdr_info,
136 eth_l4_hdr_info *l4hdr_info)
137 {
138 int proto;
139 bool fragment = false;
140 size_t input_size = iov_size(iov, iovcnt);
141 size_t copied;
142 uint8_t ip_p;
143
144 *hasip4 = *hasip6 = false;
145 *l3hdr_off = iovoff + eth_get_l2_hdr_length_iov(iov, iovcnt, iovoff);
146 l4hdr_info->proto = ETH_L4_HDR_PROTO_INVALID;
147
148 proto = eth_get_l3_proto(iov, iovcnt, *l3hdr_off);
149
150 if (proto == ETH_P_IP) {
151 struct ip_header *iphdr = &ip4hdr_info->ip4_hdr;
152
153 if (input_size < *l3hdr_off) {
154 return;
155 }
156
157 copied = iov_to_buf(iov, iovcnt, *l3hdr_off, iphdr, sizeof(*iphdr));
158 if (copied < sizeof(*iphdr) ||
159 IP_HEADER_VERSION(iphdr) != IP_HEADER_VERSION_4) {
160 return;
161 }
162
163 *hasip4 = true;
164 ip_p = iphdr->ip_p;
165 ip4hdr_info->fragment = IP4_IS_FRAGMENT(iphdr);
166 *l4hdr_off = *l3hdr_off + IP_HDR_GET_LEN(iphdr);
167
168 fragment = ip4hdr_info->fragment;
169 } else if (proto == ETH_P_IPV6) {
170 if (!eth_parse_ipv6_hdr(iov, iovcnt, *l3hdr_off, ip6hdr_info)) {
171 return;
172 }
173
174 *hasip6 = true;
175 ip_p = ip6hdr_info->l4proto;
176 *l4hdr_off = *l3hdr_off + ip6hdr_info->full_hdr_len;
177 fragment = ip6hdr_info->fragment;
178 } else {
179 return;
180 }
181
182 if (fragment) {
183 return;
184 }
185
186 switch (ip_p) {
187 case IP_PROTO_TCP:
188 if (_eth_copy_chunk(input_size,
189 iov, iovcnt,
190 *l4hdr_off, sizeof(l4hdr_info->hdr.tcp),
191 &l4hdr_info->hdr.tcp)) {
192 l4hdr_info->proto = ETH_L4_HDR_PROTO_TCP;
193 *l5hdr_off = *l4hdr_off +
194 TCP_HEADER_DATA_OFFSET(&l4hdr_info->hdr.tcp);
195
196 l4hdr_info->has_tcp_data =
197 _eth_tcp_has_data(proto == ETH_P_IP,
198 &ip4hdr_info->ip4_hdr,
199 &ip6hdr_info->ip6_hdr,
200 *l4hdr_off - *l3hdr_off,
201 &l4hdr_info->hdr.tcp);
202 }
203 break;
204
205 case IP_PROTO_UDP:
206 if (_eth_copy_chunk(input_size,
207 iov, iovcnt,
208 *l4hdr_off, sizeof(l4hdr_info->hdr.udp),
209 &l4hdr_info->hdr.udp)) {
210 l4hdr_info->proto = ETH_L4_HDR_PROTO_UDP;
211 *l5hdr_off = *l4hdr_off + sizeof(l4hdr_info->hdr.udp);
212 }
213 break;
214
215 case IP_PROTO_SCTP:
216 l4hdr_info->proto = ETH_L4_HDR_PROTO_SCTP;
217 break;
218 }
219 }
220
221 size_t
222 eth_strip_vlan(const struct iovec *iov, int iovcnt, size_t iovoff,
223 void *new_ehdr_buf,
224 uint16_t *payload_offset, uint16_t *tci)
225 {
226 struct vlan_header vlan_hdr;
227 struct eth_header *new_ehdr = new_ehdr_buf;
228
229 size_t copied = iov_to_buf(iov, iovcnt, iovoff,
230 new_ehdr, sizeof(*new_ehdr));
231
232 if (copied < sizeof(*new_ehdr)) {
233 return 0;
234 }
235
236 switch (be16_to_cpu(new_ehdr->h_proto)) {
237 case ETH_P_VLAN:
238 case ETH_P_DVLAN:
239 copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr),
240 &vlan_hdr, sizeof(vlan_hdr));
241
242 if (copied < sizeof(vlan_hdr)) {
243 return 0;
244 }
245
246 new_ehdr->h_proto = vlan_hdr.h_proto;
247
248 *tci = be16_to_cpu(vlan_hdr.h_tci);
249 *payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr);
250
251 if (be16_to_cpu(new_ehdr->h_proto) == ETH_P_VLAN) {
252
253 copied = iov_to_buf(iov, iovcnt, *payload_offset,
254 PKT_GET_VLAN_HDR(new_ehdr), sizeof(vlan_hdr));
255
256 if (copied < sizeof(vlan_hdr)) {
257 return 0;
258 }
259
260 *payload_offset += sizeof(vlan_hdr);
261
262 return sizeof(struct eth_header) + sizeof(struct vlan_header);
263 } else {
264 return sizeof(struct eth_header);
265 }
266 default:
267 return 0;
268 }
269 }
270
271 size_t
272 eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff, int index,
273 uint16_t vet, uint16_t vet_ext, void *new_ehdr_buf,
274 uint16_t *payload_offset, uint16_t *tci)
275 {
276 struct vlan_header vlan_hdr;
277 uint16_t *new_ehdr_proto;
278 size_t new_ehdr_size;
279 size_t copied;
280
281 switch (index) {
282 case 0:
283 new_ehdr_proto = &PKT_GET_ETH_HDR(new_ehdr_buf)->h_proto;
284 new_ehdr_size = sizeof(struct eth_header);
285 copied = iov_to_buf(iov, iovcnt, iovoff, new_ehdr_buf, new_ehdr_size);
286 break;
287
288 case 1:
289 new_ehdr_proto = &PKT_GET_VLAN_HDR(new_ehdr_buf)->h_proto;
290 new_ehdr_size = sizeof(struct eth_header) + sizeof(struct vlan_header);
291 copied = iov_to_buf(iov, iovcnt, iovoff, new_ehdr_buf, new_ehdr_size);
292 if (be16_to_cpu(PKT_GET_ETH_HDR(new_ehdr_buf)->h_proto) != vet_ext) {
293 return 0;
294 }
295 break;
296
297 default:
298 return 0;
299 }
300
301 if (copied < new_ehdr_size || be16_to_cpu(*new_ehdr_proto) != vet) {
302 return 0;
303 }
304
305 copied = iov_to_buf(iov, iovcnt, iovoff + new_ehdr_size,
306 &vlan_hdr, sizeof(vlan_hdr));
307 if (copied < sizeof(vlan_hdr)) {
308 return 0;
309 }
310
311 *new_ehdr_proto = vlan_hdr.h_proto;
312 *payload_offset = iovoff + new_ehdr_size + sizeof(vlan_hdr);
313 *tci = be16_to_cpu(vlan_hdr.h_tci);
314
315 return new_ehdr_size;
316 }
317
318 void
319 eth_fix_ip4_checksum(void *l3hdr, size_t l3hdr_len)
320 {
321 struct ip_header *iphdr = (struct ip_header *) l3hdr;
322 iphdr->ip_sum = 0;
323 iphdr->ip_sum = cpu_to_be16(net_raw_checksum(l3hdr, l3hdr_len));
324 }
325
326 uint32_t
327 eth_calc_ip4_pseudo_hdr_csum(struct ip_header *iphdr,
328 uint16_t csl,
329 uint32_t *cso)
330 {
331 struct ip_pseudo_header ipph;
332 ipph.ip_src = iphdr->ip_src;
333 ipph.ip_dst = iphdr->ip_dst;
334 ipph.ip_payload = cpu_to_be16(csl);
335 ipph.ip_proto = iphdr->ip_p;
336 ipph.zeros = 0;
337 *cso = sizeof(ipph);
338 return net_checksum_add(*cso, (uint8_t *) &ipph);
339 }
340
341 uint32_t
342 eth_calc_ip6_pseudo_hdr_csum(struct ip6_header *iphdr,
343 uint16_t csl,
344 uint8_t l4_proto,
345 uint32_t *cso)
346 {
347 struct ip6_pseudo_header ipph;
348 ipph.ip6_src = iphdr->ip6_src;
349 ipph.ip6_dst = iphdr->ip6_dst;
350 ipph.len = cpu_to_be16(csl);
351 ipph.zero[0] = 0;
352 ipph.zero[1] = 0;
353 ipph.zero[2] = 0;
354 ipph.next_hdr = l4_proto;
355 *cso = sizeof(ipph);
356 return net_checksum_add(*cso, (uint8_t *)&ipph);
357 }
358
359 static bool
360 eth_is_ip6_extension_header_type(uint8_t hdr_type)
361 {
362 switch (hdr_type) {
363 case IP6_HOP_BY_HOP:
364 case IP6_ROUTING:
365 case IP6_FRAGMENT:
366 case IP6_AUTHENTICATION:
367 case IP6_DESTINATON:
368 case IP6_MOBILITY:
369 return true;
370 default:
371 return false;
372 }
373 }
374
375 static bool
376 _eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags,
377 size_t ext_hdr_offset,
378 struct ip6_ext_hdr *ext_hdr,
379 struct in6_address *dst_addr)
380 {
381 struct ip6_ext_hdr_routing rt_hdr;
382 size_t input_size = iov_size(pkt, pkt_frags);
383 size_t bytes_read;
384
385 if (input_size < ext_hdr_offset + sizeof(rt_hdr) + sizeof(*dst_addr)) {
386 return false;
387 }
388
389 bytes_read = iov_to_buf(pkt, pkt_frags, ext_hdr_offset,
390 &rt_hdr, sizeof(rt_hdr));
391 assert(bytes_read == sizeof(rt_hdr));
392 if ((rt_hdr.rtype != 2) || (rt_hdr.segleft != 1)) {
393 return false;
394 }
395 bytes_read = iov_to_buf(pkt, pkt_frags, ext_hdr_offset + sizeof(rt_hdr),
396 dst_addr, sizeof(*dst_addr));
397 assert(bytes_read == sizeof(*dst_addr));
398
399 return true;
400 }
401
402 static bool
403 _eth_get_rss_ex_src_addr(const struct iovec *pkt, int pkt_frags,
404 size_t dsthdr_offset,
405 struct ip6_ext_hdr *ext_hdr,
406 struct in6_address *src_addr)
407 {
408 size_t bytes_left = (ext_hdr->ip6r_len + 1) * 8 - sizeof(*ext_hdr);
409 struct ip6_option_hdr opthdr;
410 size_t opt_offset = dsthdr_offset + sizeof(*ext_hdr);
411
412 while (bytes_left > sizeof(opthdr)) {
413 size_t input_size = iov_size(pkt, pkt_frags);
414 size_t bytes_read, optlen;
415
416 if (input_size < opt_offset) {
417 return false;
418 }
419
420 bytes_read = iov_to_buf(pkt, pkt_frags, opt_offset,
421 &opthdr, sizeof(opthdr));
422
423 if (bytes_read != sizeof(opthdr)) {
424 return false;
425 }
426
427 optlen = (opthdr.type == IP6_OPT_PAD1) ? 1
428 : (opthdr.len + sizeof(opthdr));
429
430 if (optlen > bytes_left) {
431 return false;
432 }
433
434 if (opthdr.type == IP6_OPT_HOME) {
435 size_t input_size = iov_size(pkt, pkt_frags);
436
437 if (input_size < opt_offset + sizeof(opthdr)) {
438 return false;
439 }
440
441 bytes_read = iov_to_buf(pkt, pkt_frags,
442 opt_offset + sizeof(opthdr),
443 src_addr, sizeof(*src_addr));
444
445 return bytes_read == sizeof(*src_addr);
446 }
447
448 opt_offset += optlen;
449 bytes_left -= optlen;
450 }
451
452 return false;
453 }
454
455 bool eth_parse_ipv6_hdr(const struct iovec *pkt, int pkt_frags,
456 size_t ip6hdr_off, eth_ip6_hdr_info *info)
457 {
458 struct ip6_ext_hdr ext_hdr;
459 size_t bytes_read;
460 uint8_t curr_ext_hdr_type;
461 size_t input_size = iov_size(pkt, pkt_frags);
462
463 info->rss_ex_dst_valid = false;
464 info->rss_ex_src_valid = false;
465 info->fragment = false;
466
467 if (input_size < ip6hdr_off) {
468 return false;
469 }
470
471 bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off,
472 &info->ip6_hdr, sizeof(info->ip6_hdr));
473 if (bytes_read < sizeof(info->ip6_hdr)) {
474 return false;
475 }
476
477 info->full_hdr_len = sizeof(struct ip6_header);
478
479 curr_ext_hdr_type = info->ip6_hdr.ip6_nxt;
480
481 if (!eth_is_ip6_extension_header_type(curr_ext_hdr_type)) {
482 info->l4proto = info->ip6_hdr.ip6_nxt;
483 info->has_ext_hdrs = false;
484 return true;
485 }
486
487 info->has_ext_hdrs = true;
488
489 do {
490 if (input_size < ip6hdr_off + info->full_hdr_len) {
491 return false;
492 }
493
494 bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off + info->full_hdr_len,
495 &ext_hdr, sizeof(ext_hdr));
496
497 if (bytes_read < sizeof(ext_hdr)) {
498 return false;
499 }
500
501 if (curr_ext_hdr_type == IP6_ROUTING) {
502 if (ext_hdr.ip6r_len == sizeof(struct in6_address) / 8) {
503 info->rss_ex_dst_valid =
504 _eth_get_rss_ex_dst_addr(pkt, pkt_frags,
505 ip6hdr_off + info->full_hdr_len,
506 &ext_hdr, &info->rss_ex_dst);
507 }
508 } else if (curr_ext_hdr_type == IP6_DESTINATON) {
509 info->rss_ex_src_valid =
510 _eth_get_rss_ex_src_addr(pkt, pkt_frags,
511 ip6hdr_off + info->full_hdr_len,
512 &ext_hdr, &info->rss_ex_src);
513 } else if (curr_ext_hdr_type == IP6_FRAGMENT) {
514 info->fragment = true;
515 }
516
517 info->full_hdr_len += (ext_hdr.ip6r_len + 1) * IP6_EXT_GRANULARITY;
518 curr_ext_hdr_type = ext_hdr.ip6r_nxt;
519 } while (eth_is_ip6_extension_header_type(curr_ext_hdr_type));
520
521 info->l4proto = ext_hdr.ip6r_nxt;
522 return true;
523 }
524
525 bool eth_pad_short_frame(uint8_t *padded_pkt, size_t *padded_buflen,
526 const void *pkt, size_t pkt_size)
527 {
528 assert(padded_buflen && *padded_buflen >= ETH_ZLEN);
529
530 if (pkt_size >= ETH_ZLEN) {
531 return false;
532 }
533
534 /* pad to minimum Ethernet frame length */
535 memcpy(padded_pkt, pkt, pkt_size);
536 memset(&padded_pkt[pkt_size], 0, ETH_ZLEN - pkt_size);
537 *padded_buflen = ETH_ZLEN;
538
539 return true;
540 }