]> git.proxmox.com Git - mirror_qemu.git/blob - hw/net/vmxnet3.c
vmxnet3: Introduce 'x-old-msi-offsets' back-compat property
[mirror_qemu.git] / hw / net / vmxnet3.c
1 /*
2 * QEMU VMWARE VMXNET3 paravirtual NIC
3 *
4 * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
5 *
6 * Developed by Daynix Computing LTD (http://www.daynix.com)
7 *
8 * Authors:
9 * Dmitry Fleytman <dmitry@daynix.com>
10 * Tamir Shomer <tamirs@daynix.com>
11 * Yan Vugenfirer <yan@daynix.com>
12 *
13 * This work is licensed under the terms of the GNU GPL, version 2.
14 * See the COPYING file in the top-level directory.
15 *
16 */
17
18 #include "hw/hw.h"
19 #include "hw/pci/pci.h"
20 #include "net/net.h"
21 #include "net/tap.h"
22 #include "net/checksum.h"
23 #include "sysemu/sysemu.h"
24 #include "qemu-common.h"
25 #include "qemu/bswap.h"
26 #include "hw/pci/msix.h"
27 #include "hw/pci/msi.h"
28
29 #include "vmxnet3.h"
30 #include "vmxnet_debug.h"
31 #include "vmware_utils.h"
32 #include "vmxnet_tx_pkt.h"
33 #include "vmxnet_rx_pkt.h"
34
35 #define PCI_DEVICE_ID_VMWARE_VMXNET3_REVISION 0x1
36 #define VMXNET3_MSIX_BAR_SIZE 0x2000
37 #define MIN_BUF_SIZE 60
38
39 /* Compatability flags for migration */
40 #define VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT 0
41 #define VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS \
42 (1 << VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT)
43
44 #define VMXNET3_MSI_OFFSET(s) \
45 ((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0x50 : 0x84)
46 #define VMXNET3_MSIX_OFFSET(s) \
47 ((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0 : 0x9c)
48
49 #define VMXNET3_BAR0_IDX (0)
50 #define VMXNET3_BAR1_IDX (1)
51 #define VMXNET3_MSIX_BAR_IDX (2)
52
53 #define VMXNET3_OFF_MSIX_TABLE (0x000)
54 #define VMXNET3_OFF_MSIX_PBA(s) \
55 ((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0x800 : 0x1000)
56
57 /* Link speed in Mbps should be shifted by 16 */
58 #define VMXNET3_LINK_SPEED (1000 << 16)
59
60 /* Link status: 1 - up, 0 - down. */
61 #define VMXNET3_LINK_STATUS_UP 0x1
62
63 /* Least significant bit should be set for revision and version */
64 #define VMXNET3_UPT_REVISION 0x1
65 #define VMXNET3_DEVICE_REVISION 0x1
66
67 /* Number of interrupt vectors for non-MSIx modes */
68 #define VMXNET3_MAX_NMSIX_INTRS (1)
69
70 /* Macros for rings descriptors access */
71 #define VMXNET3_READ_TX_QUEUE_DESCR8(dpa, field) \
72 (vmw_shmem_ld8(dpa + offsetof(struct Vmxnet3_TxQueueDesc, field)))
73
74 #define VMXNET3_WRITE_TX_QUEUE_DESCR8(dpa, field, value) \
75 (vmw_shmem_st8(dpa + offsetof(struct Vmxnet3_TxQueueDesc, field, value)))
76
77 #define VMXNET3_READ_TX_QUEUE_DESCR32(dpa, field) \
78 (vmw_shmem_ld32(dpa + offsetof(struct Vmxnet3_TxQueueDesc, field)))
79
80 #define VMXNET3_WRITE_TX_QUEUE_DESCR32(dpa, field, value) \
81 (vmw_shmem_st32(dpa + offsetof(struct Vmxnet3_TxQueueDesc, field), value))
82
83 #define VMXNET3_READ_TX_QUEUE_DESCR64(dpa, field) \
84 (vmw_shmem_ld64(dpa + offsetof(struct Vmxnet3_TxQueueDesc, field)))
85
86 #define VMXNET3_WRITE_TX_QUEUE_DESCR64(dpa, field, value) \
87 (vmw_shmem_st64(dpa + offsetof(struct Vmxnet3_TxQueueDesc, field), value))
88
89 #define VMXNET3_READ_RX_QUEUE_DESCR64(dpa, field) \
90 (vmw_shmem_ld64(dpa + offsetof(struct Vmxnet3_RxQueueDesc, field)))
91
92 #define VMXNET3_READ_RX_QUEUE_DESCR32(dpa, field) \
93 (vmw_shmem_ld32(dpa + offsetof(struct Vmxnet3_RxQueueDesc, field)))
94
95 #define VMXNET3_WRITE_RX_QUEUE_DESCR64(dpa, field, value) \
96 (vmw_shmem_st64(dpa + offsetof(struct Vmxnet3_RxQueueDesc, field), value))
97
98 #define VMXNET3_WRITE_RX_QUEUE_DESCR8(dpa, field, value) \
99 (vmw_shmem_st8(dpa + offsetof(struct Vmxnet3_RxQueueDesc, field), value))
100
101 /* Macros for guest driver shared area access */
102 #define VMXNET3_READ_DRV_SHARED64(shpa, field) \
103 (vmw_shmem_ld64(shpa + offsetof(struct Vmxnet3_DriverShared, field)))
104
105 #define VMXNET3_READ_DRV_SHARED32(shpa, field) \
106 (vmw_shmem_ld32(shpa + offsetof(struct Vmxnet3_DriverShared, field)))
107
108 #define VMXNET3_WRITE_DRV_SHARED32(shpa, field, val) \
109 (vmw_shmem_st32(shpa + offsetof(struct Vmxnet3_DriverShared, field), val))
110
111 #define VMXNET3_READ_DRV_SHARED16(shpa, field) \
112 (vmw_shmem_ld16(shpa + offsetof(struct Vmxnet3_DriverShared, field)))
113
114 #define VMXNET3_READ_DRV_SHARED8(shpa, field) \
115 (vmw_shmem_ld8(shpa + offsetof(struct Vmxnet3_DriverShared, field)))
116
117 #define VMXNET3_READ_DRV_SHARED(shpa, field, b, l) \
118 (vmw_shmem_read(shpa + offsetof(struct Vmxnet3_DriverShared, field), b, l))
119
120 #define VMXNET_FLAG_IS_SET(field, flag) (((field) & (flag)) == (flag))
121
122 #define TYPE_VMXNET3 "vmxnet3"
123 #define VMXNET3(obj) OBJECT_CHECK(VMXNET3State, (obj), TYPE_VMXNET3)
124
125 /* Cyclic ring abstraction */
126 typedef struct {
127 hwaddr pa;
128 size_t size;
129 size_t cell_size;
130 size_t next;
131 uint8_t gen;
132 } Vmxnet3Ring;
133
134 static inline void vmxnet3_ring_init(Vmxnet3Ring *ring,
135 hwaddr pa,
136 size_t size,
137 size_t cell_size,
138 bool zero_region)
139 {
140 ring->pa = pa;
141 ring->size = size;
142 ring->cell_size = cell_size;
143 ring->gen = VMXNET3_INIT_GEN;
144 ring->next = 0;
145
146 if (zero_region) {
147 vmw_shmem_set(pa, 0, size * cell_size);
148 }
149 }
150
151 #define VMXNET3_RING_DUMP(macro, ring_name, ridx, r) \
152 macro("%s#%d: base %" PRIx64 " size %zu cell_size %zu gen %d next %zu", \
153 (ring_name), (ridx), \
154 (r)->pa, (r)->size, (r)->cell_size, (r)->gen, (r)->next)
155
156 static inline void vmxnet3_ring_inc(Vmxnet3Ring *ring)
157 {
158 if (++ring->next >= ring->size) {
159 ring->next = 0;
160 ring->gen ^= 1;
161 }
162 }
163
164 static inline void vmxnet3_ring_dec(Vmxnet3Ring *ring)
165 {
166 if (ring->next-- == 0) {
167 ring->next = ring->size - 1;
168 ring->gen ^= 1;
169 }
170 }
171
172 static inline hwaddr vmxnet3_ring_curr_cell_pa(Vmxnet3Ring *ring)
173 {
174 return ring->pa + ring->next * ring->cell_size;
175 }
176
177 static inline void vmxnet3_ring_read_curr_cell(Vmxnet3Ring *ring, void *buff)
178 {
179 vmw_shmem_read(vmxnet3_ring_curr_cell_pa(ring), buff, ring->cell_size);
180 }
181
182 static inline void vmxnet3_ring_write_curr_cell(Vmxnet3Ring *ring, void *buff)
183 {
184 vmw_shmem_write(vmxnet3_ring_curr_cell_pa(ring), buff, ring->cell_size);
185 }
186
187 static inline size_t vmxnet3_ring_curr_cell_idx(Vmxnet3Ring *ring)
188 {
189 return ring->next;
190 }
191
192 static inline uint8_t vmxnet3_ring_curr_gen(Vmxnet3Ring *ring)
193 {
194 return ring->gen;
195 }
196
197 /* Debug trace-related functions */
198 static inline void
199 vmxnet3_dump_tx_descr(struct Vmxnet3_TxDesc *descr)
200 {
201 VMW_PKPRN("TX DESCR: "
202 "addr %" PRIx64 ", len: %d, gen: %d, rsvd: %d, "
203 "dtype: %d, ext1: %d, msscof: %d, hlen: %d, om: %d, "
204 "eop: %d, cq: %d, ext2: %d, ti: %d, tci: %d",
205 le64_to_cpu(descr->addr), descr->len, descr->gen, descr->rsvd,
206 descr->dtype, descr->ext1, descr->msscof, descr->hlen, descr->om,
207 descr->eop, descr->cq, descr->ext2, descr->ti, descr->tci);
208 }
209
210 static inline void
211 vmxnet3_dump_virt_hdr(struct virtio_net_hdr *vhdr)
212 {
213 VMW_PKPRN("VHDR: flags 0x%x, gso_type: 0x%x, hdr_len: %d, gso_size: %d, "
214 "csum_start: %d, csum_offset: %d",
215 vhdr->flags, vhdr->gso_type, vhdr->hdr_len, vhdr->gso_size,
216 vhdr->csum_start, vhdr->csum_offset);
217 }
218
219 static inline void
220 vmxnet3_dump_rx_descr(struct Vmxnet3_RxDesc *descr)
221 {
222 VMW_PKPRN("RX DESCR: addr %" PRIx64 ", len: %d, gen: %d, rsvd: %d, "
223 "dtype: %d, ext1: %d, btype: %d",
224 le64_to_cpu(descr->addr), descr->len, descr->gen,
225 descr->rsvd, descr->dtype, descr->ext1, descr->btype);
226 }
227
228 /* Device state and helper functions */
229 #define VMXNET3_RX_RINGS_PER_QUEUE (2)
230
231 typedef struct {
232 Vmxnet3Ring tx_ring;
233 Vmxnet3Ring comp_ring;
234
235 uint8_t intr_idx;
236 hwaddr tx_stats_pa;
237 struct UPT1_TxStats txq_stats;
238 } Vmxnet3TxqDescr;
239
240 typedef struct {
241 Vmxnet3Ring rx_ring[VMXNET3_RX_RINGS_PER_QUEUE];
242 Vmxnet3Ring comp_ring;
243 uint8_t intr_idx;
244 hwaddr rx_stats_pa;
245 struct UPT1_RxStats rxq_stats;
246 } Vmxnet3RxqDescr;
247
248 typedef struct {
249 bool is_masked;
250 bool is_pending;
251 bool is_asserted;
252 } Vmxnet3IntState;
253
254 typedef struct {
255 PCIDevice parent_obj;
256 NICState *nic;
257 NICConf conf;
258 MemoryRegion bar0;
259 MemoryRegion bar1;
260 MemoryRegion msix_bar;
261
262 Vmxnet3RxqDescr rxq_descr[VMXNET3_DEVICE_MAX_RX_QUEUES];
263 Vmxnet3TxqDescr txq_descr[VMXNET3_DEVICE_MAX_TX_QUEUES];
264
265 /* Whether MSI-X support was installed successfully */
266 bool msix_used;
267 /* Whether MSI support was installed successfully */
268 bool msi_used;
269 hwaddr drv_shmem;
270 hwaddr temp_shared_guest_driver_memory;
271
272 uint8_t txq_num;
273
274 /* This boolean tells whether RX packet being indicated has to */
275 /* be split into head and body chunks from different RX rings */
276 bool rx_packets_compound;
277
278 bool rx_vlan_stripping;
279 bool lro_supported;
280
281 uint8_t rxq_num;
282
283 /* Network MTU */
284 uint32_t mtu;
285
286 /* Maximum number of fragments for indicated TX packets */
287 uint32_t max_tx_frags;
288
289 /* Maximum number of fragments for indicated RX packets */
290 uint16_t max_rx_frags;
291
292 /* Index for events interrupt */
293 uint8_t event_int_idx;
294
295 /* Whether automatic interrupts masking enabled */
296 bool auto_int_masking;
297
298 bool peer_has_vhdr;
299
300 /* TX packets to QEMU interface */
301 struct VmxnetTxPkt *tx_pkt;
302 uint32_t offload_mode;
303 uint32_t cso_or_gso_size;
304 uint16_t tci;
305 bool needs_vlan;
306
307 struct VmxnetRxPkt *rx_pkt;
308
309 bool tx_sop;
310 bool skip_current_tx_pkt;
311
312 uint32_t device_active;
313 uint32_t last_command;
314
315 uint32_t link_status_and_speed;
316
317 Vmxnet3IntState interrupt_states[VMXNET3_MAX_INTRS];
318
319 uint32_t temp_mac; /* To store the low part first */
320
321 MACAddr perm_mac;
322 uint32_t vlan_table[VMXNET3_VFT_SIZE];
323 uint32_t rx_mode;
324 MACAddr *mcast_list;
325 uint32_t mcast_list_len;
326 uint32_t mcast_list_buff_size; /* needed for live migration. */
327
328 /* Compatability flags for migration */
329 uint32_t compat_flags;
330 } VMXNET3State;
331
332 /* Interrupt management */
333
334 /*
335 *This function returns sign whether interrupt line is in asserted state
336 * This depends on the type of interrupt used. For INTX interrupt line will
337 * be asserted until explicit deassertion, for MSI(X) interrupt line will
338 * be deasserted automatically due to notification semantics of the MSI(X)
339 * interrupts
340 */
341 static bool _vmxnet3_assert_interrupt_line(VMXNET3State *s, uint32_t int_idx)
342 {
343 PCIDevice *d = PCI_DEVICE(s);
344
345 if (s->msix_used && msix_enabled(d)) {
346 VMW_IRPRN("Sending MSI-X notification for vector %u", int_idx);
347 msix_notify(d, int_idx);
348 return false;
349 }
350 if (s->msi_used && msi_enabled(d)) {
351 VMW_IRPRN("Sending MSI notification for vector %u", int_idx);
352 msi_notify(d, int_idx);
353 return false;
354 }
355
356 VMW_IRPRN("Asserting line for interrupt %u", int_idx);
357 pci_irq_assert(d);
358 return true;
359 }
360
361 static void _vmxnet3_deassert_interrupt_line(VMXNET3State *s, int lidx)
362 {
363 PCIDevice *d = PCI_DEVICE(s);
364
365 /*
366 * This function should never be called for MSI(X) interrupts
367 * because deassertion never required for message interrupts
368 */
369 assert(!s->msix_used || !msix_enabled(d));
370 /*
371 * This function should never be called for MSI(X) interrupts
372 * because deassertion never required for message interrupts
373 */
374 assert(!s->msi_used || !msi_enabled(d));
375
376 VMW_IRPRN("Deasserting line for interrupt %u", lidx);
377 pci_irq_deassert(d);
378 }
379
380 static void vmxnet3_update_interrupt_line_state(VMXNET3State *s, int lidx)
381 {
382 if (!s->interrupt_states[lidx].is_pending &&
383 s->interrupt_states[lidx].is_asserted) {
384 VMW_IRPRN("New interrupt line state for index %d is DOWN", lidx);
385 _vmxnet3_deassert_interrupt_line(s, lidx);
386 s->interrupt_states[lidx].is_asserted = false;
387 return;
388 }
389
390 if (s->interrupt_states[lidx].is_pending &&
391 !s->interrupt_states[lidx].is_masked &&
392 !s->interrupt_states[lidx].is_asserted) {
393 VMW_IRPRN("New interrupt line state for index %d is UP", lidx);
394 s->interrupt_states[lidx].is_asserted =
395 _vmxnet3_assert_interrupt_line(s, lidx);
396 s->interrupt_states[lidx].is_pending = false;
397 return;
398 }
399 }
400
401 static void vmxnet3_trigger_interrupt(VMXNET3State *s, int lidx)
402 {
403 PCIDevice *d = PCI_DEVICE(s);
404 s->interrupt_states[lidx].is_pending = true;
405 vmxnet3_update_interrupt_line_state(s, lidx);
406
407 if (s->msix_used && msix_enabled(d) && s->auto_int_masking) {
408 goto do_automask;
409 }
410
411 if (s->msi_used && msi_enabled(d) && s->auto_int_masking) {
412 goto do_automask;
413 }
414
415 return;
416
417 do_automask:
418 s->interrupt_states[lidx].is_masked = true;
419 vmxnet3_update_interrupt_line_state(s, lidx);
420 }
421
422 static bool vmxnet3_interrupt_asserted(VMXNET3State *s, int lidx)
423 {
424 return s->interrupt_states[lidx].is_asserted;
425 }
426
427 static void vmxnet3_clear_interrupt(VMXNET3State *s, int int_idx)
428 {
429 s->interrupt_states[int_idx].is_pending = false;
430 if (s->auto_int_masking) {
431 s->interrupt_states[int_idx].is_masked = true;
432 }
433 vmxnet3_update_interrupt_line_state(s, int_idx);
434 }
435
436 static void
437 vmxnet3_on_interrupt_mask_changed(VMXNET3State *s, int lidx, bool is_masked)
438 {
439 s->interrupt_states[lidx].is_masked = is_masked;
440 vmxnet3_update_interrupt_line_state(s, lidx);
441 }
442
443 static bool vmxnet3_verify_driver_magic(hwaddr dshmem)
444 {
445 return (VMXNET3_READ_DRV_SHARED32(dshmem, magic) == VMXNET3_REV1_MAGIC);
446 }
447
448 #define VMXNET3_GET_BYTE(x, byte_num) (((x) >> (byte_num)*8) & 0xFF)
449 #define VMXNET3_MAKE_BYTE(byte_num, val) \
450 (((uint32_t)((val) & 0xFF)) << (byte_num)*8)
451
452 static void vmxnet3_set_variable_mac(VMXNET3State *s, uint32_t h, uint32_t l)
453 {
454 s->conf.macaddr.a[0] = VMXNET3_GET_BYTE(l, 0);
455 s->conf.macaddr.a[1] = VMXNET3_GET_BYTE(l, 1);
456 s->conf.macaddr.a[2] = VMXNET3_GET_BYTE(l, 2);
457 s->conf.macaddr.a[3] = VMXNET3_GET_BYTE(l, 3);
458 s->conf.macaddr.a[4] = VMXNET3_GET_BYTE(h, 0);
459 s->conf.macaddr.a[5] = VMXNET3_GET_BYTE(h, 1);
460
461 VMW_CFPRN("Variable MAC: " VMXNET_MF, VMXNET_MA(s->conf.macaddr.a));
462
463 qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
464 }
465
466 static uint64_t vmxnet3_get_mac_low(MACAddr *addr)
467 {
468 return VMXNET3_MAKE_BYTE(0, addr->a[0]) |
469 VMXNET3_MAKE_BYTE(1, addr->a[1]) |
470 VMXNET3_MAKE_BYTE(2, addr->a[2]) |
471 VMXNET3_MAKE_BYTE(3, addr->a[3]);
472 }
473
474 static uint64_t vmxnet3_get_mac_high(MACAddr *addr)
475 {
476 return VMXNET3_MAKE_BYTE(0, addr->a[4]) |
477 VMXNET3_MAKE_BYTE(1, addr->a[5]);
478 }
479
480 static void
481 vmxnet3_inc_tx_consumption_counter(VMXNET3State *s, int qidx)
482 {
483 vmxnet3_ring_inc(&s->txq_descr[qidx].tx_ring);
484 }
485
486 static inline void
487 vmxnet3_inc_rx_consumption_counter(VMXNET3State *s, int qidx, int ridx)
488 {
489 vmxnet3_ring_inc(&s->rxq_descr[qidx].rx_ring[ridx]);
490 }
491
492 static inline void
493 vmxnet3_inc_tx_completion_counter(VMXNET3State *s, int qidx)
494 {
495 vmxnet3_ring_inc(&s->txq_descr[qidx].comp_ring);
496 }
497
498 static void
499 vmxnet3_inc_rx_completion_counter(VMXNET3State *s, int qidx)
500 {
501 vmxnet3_ring_inc(&s->rxq_descr[qidx].comp_ring);
502 }
503
504 static void
505 vmxnet3_dec_rx_completion_counter(VMXNET3State *s, int qidx)
506 {
507 vmxnet3_ring_dec(&s->rxq_descr[qidx].comp_ring);
508 }
509
510 static void vmxnet3_complete_packet(VMXNET3State *s, int qidx, uint32 tx_ridx)
511 {
512 struct Vmxnet3_TxCompDesc txcq_descr;
513
514 VMXNET3_RING_DUMP(VMW_RIPRN, "TXC", qidx, &s->txq_descr[qidx].comp_ring);
515
516 txcq_descr.txdIdx = tx_ridx;
517 txcq_descr.gen = vmxnet3_ring_curr_gen(&s->txq_descr[qidx].comp_ring);
518
519 vmxnet3_ring_write_curr_cell(&s->txq_descr[qidx].comp_ring, &txcq_descr);
520
521 /* Flush changes in TX descriptor before changing the counter value */
522 smp_wmb();
523
524 vmxnet3_inc_tx_completion_counter(s, qidx);
525 vmxnet3_trigger_interrupt(s, s->txq_descr[qidx].intr_idx);
526 }
527
528 static bool
529 vmxnet3_setup_tx_offloads(VMXNET3State *s)
530 {
531 switch (s->offload_mode) {
532 case VMXNET3_OM_NONE:
533 vmxnet_tx_pkt_build_vheader(s->tx_pkt, false, false, 0);
534 break;
535
536 case VMXNET3_OM_CSUM:
537 vmxnet_tx_pkt_build_vheader(s->tx_pkt, false, true, 0);
538 VMW_PKPRN("L4 CSO requested\n");
539 break;
540
541 case VMXNET3_OM_TSO:
542 vmxnet_tx_pkt_build_vheader(s->tx_pkt, true, true,
543 s->cso_or_gso_size);
544 vmxnet_tx_pkt_update_ip_checksums(s->tx_pkt);
545 VMW_PKPRN("GSO offload requested.");
546 break;
547
548 default:
549 g_assert_not_reached();
550 return false;
551 }
552
553 return true;
554 }
555
556 static void
557 vmxnet3_tx_retrieve_metadata(VMXNET3State *s,
558 const struct Vmxnet3_TxDesc *txd)
559 {
560 s->offload_mode = txd->om;
561 s->cso_or_gso_size = txd->msscof;
562 s->tci = txd->tci;
563 s->needs_vlan = txd->ti;
564 }
565
566 typedef enum {
567 VMXNET3_PKT_STATUS_OK,
568 VMXNET3_PKT_STATUS_ERROR,
569 VMXNET3_PKT_STATUS_DISCARD,/* only for tx */
570 VMXNET3_PKT_STATUS_OUT_OF_BUF /* only for rx */
571 } Vmxnet3PktStatus;
572
573 static void
574 vmxnet3_on_tx_done_update_stats(VMXNET3State *s, int qidx,
575 Vmxnet3PktStatus status)
576 {
577 size_t tot_len = vmxnet_tx_pkt_get_total_len(s->tx_pkt);
578 struct UPT1_TxStats *stats = &s->txq_descr[qidx].txq_stats;
579
580 switch (status) {
581 case VMXNET3_PKT_STATUS_OK:
582 switch (vmxnet_tx_pkt_get_packet_type(s->tx_pkt)) {
583 case ETH_PKT_BCAST:
584 stats->bcastPktsTxOK++;
585 stats->bcastBytesTxOK += tot_len;
586 break;
587 case ETH_PKT_MCAST:
588 stats->mcastPktsTxOK++;
589 stats->mcastBytesTxOK += tot_len;
590 break;
591 case ETH_PKT_UCAST:
592 stats->ucastPktsTxOK++;
593 stats->ucastBytesTxOK += tot_len;
594 break;
595 default:
596 g_assert_not_reached();
597 }
598
599 if (s->offload_mode == VMXNET3_OM_TSO) {
600 /*
601 * According to VMWARE headers this statistic is a number
602 * of packets after segmentation but since we don't have
603 * this information in QEMU model, the best we can do is to
604 * provide number of non-segmented packets
605 */
606 stats->TSOPktsTxOK++;
607 stats->TSOBytesTxOK += tot_len;
608 }
609 break;
610
611 case VMXNET3_PKT_STATUS_DISCARD:
612 stats->pktsTxDiscard++;
613 break;
614
615 case VMXNET3_PKT_STATUS_ERROR:
616 stats->pktsTxError++;
617 break;
618
619 default:
620 g_assert_not_reached();
621 }
622 }
623
624 static void
625 vmxnet3_on_rx_done_update_stats(VMXNET3State *s,
626 int qidx,
627 Vmxnet3PktStatus status)
628 {
629 struct UPT1_RxStats *stats = &s->rxq_descr[qidx].rxq_stats;
630 size_t tot_len = vmxnet_rx_pkt_get_total_len(s->rx_pkt);
631
632 switch (status) {
633 case VMXNET3_PKT_STATUS_OUT_OF_BUF:
634 stats->pktsRxOutOfBuf++;
635 break;
636
637 case VMXNET3_PKT_STATUS_ERROR:
638 stats->pktsRxError++;
639 break;
640 case VMXNET3_PKT_STATUS_OK:
641 switch (vmxnet_rx_pkt_get_packet_type(s->rx_pkt)) {
642 case ETH_PKT_BCAST:
643 stats->bcastPktsRxOK++;
644 stats->bcastBytesRxOK += tot_len;
645 break;
646 case ETH_PKT_MCAST:
647 stats->mcastPktsRxOK++;
648 stats->mcastBytesRxOK += tot_len;
649 break;
650 case ETH_PKT_UCAST:
651 stats->ucastPktsRxOK++;
652 stats->ucastBytesRxOK += tot_len;
653 break;
654 default:
655 g_assert_not_reached();
656 }
657
658 if (tot_len > s->mtu) {
659 stats->LROPktsRxOK++;
660 stats->LROBytesRxOK += tot_len;
661 }
662 break;
663 default:
664 g_assert_not_reached();
665 }
666 }
667
668 static inline bool
669 vmxnet3_pop_next_tx_descr(VMXNET3State *s,
670 int qidx,
671 struct Vmxnet3_TxDesc *txd,
672 uint32_t *descr_idx)
673 {
674 Vmxnet3Ring *ring = &s->txq_descr[qidx].tx_ring;
675
676 vmxnet3_ring_read_curr_cell(ring, txd);
677 if (txd->gen == vmxnet3_ring_curr_gen(ring)) {
678 /* Only read after generation field verification */
679 smp_rmb();
680 /* Re-read to be sure we got the latest version */
681 vmxnet3_ring_read_curr_cell(ring, txd);
682 VMXNET3_RING_DUMP(VMW_RIPRN, "TX", qidx, ring);
683 *descr_idx = vmxnet3_ring_curr_cell_idx(ring);
684 vmxnet3_inc_tx_consumption_counter(s, qidx);
685 return true;
686 }
687
688 return false;
689 }
690
691 static bool
692 vmxnet3_send_packet(VMXNET3State *s, uint32_t qidx)
693 {
694 Vmxnet3PktStatus status = VMXNET3_PKT_STATUS_OK;
695
696 if (!vmxnet3_setup_tx_offloads(s)) {
697 status = VMXNET3_PKT_STATUS_ERROR;
698 goto func_exit;
699 }
700
701 /* debug prints */
702 vmxnet3_dump_virt_hdr(vmxnet_tx_pkt_get_vhdr(s->tx_pkt));
703 vmxnet_tx_pkt_dump(s->tx_pkt);
704
705 if (!vmxnet_tx_pkt_send(s->tx_pkt, qemu_get_queue(s->nic))) {
706 status = VMXNET3_PKT_STATUS_DISCARD;
707 goto func_exit;
708 }
709
710 func_exit:
711 vmxnet3_on_tx_done_update_stats(s, qidx, status);
712 return (status == VMXNET3_PKT_STATUS_OK);
713 }
714
715 static void vmxnet3_process_tx_queue(VMXNET3State *s, int qidx)
716 {
717 struct Vmxnet3_TxDesc txd;
718 uint32_t txd_idx;
719 uint32_t data_len;
720 hwaddr data_pa;
721
722 for (;;) {
723 if (!vmxnet3_pop_next_tx_descr(s, qidx, &txd, &txd_idx)) {
724 break;
725 }
726
727 vmxnet3_dump_tx_descr(&txd);
728
729 if (!s->skip_current_tx_pkt) {
730 data_len = (txd.len > 0) ? txd.len : VMXNET3_MAX_TX_BUF_SIZE;
731 data_pa = le64_to_cpu(txd.addr);
732
733 if (!vmxnet_tx_pkt_add_raw_fragment(s->tx_pkt,
734 data_pa,
735 data_len)) {
736 s->skip_current_tx_pkt = true;
737 }
738 }
739
740 if (s->tx_sop) {
741 vmxnet3_tx_retrieve_metadata(s, &txd);
742 s->tx_sop = false;
743 }
744
745 if (txd.eop) {
746 if (!s->skip_current_tx_pkt && vmxnet_tx_pkt_parse(s->tx_pkt)) {
747 if (s->needs_vlan) {
748 vmxnet_tx_pkt_setup_vlan_header(s->tx_pkt, s->tci);
749 }
750
751 vmxnet3_send_packet(s, qidx);
752 } else {
753 vmxnet3_on_tx_done_update_stats(s, qidx,
754 VMXNET3_PKT_STATUS_ERROR);
755 }
756
757 vmxnet3_complete_packet(s, qidx, txd_idx);
758 s->tx_sop = true;
759 s->skip_current_tx_pkt = false;
760 vmxnet_tx_pkt_reset(s->tx_pkt);
761 }
762 }
763 }
764
765 static inline void
766 vmxnet3_read_next_rx_descr(VMXNET3State *s, int qidx, int ridx,
767 struct Vmxnet3_RxDesc *dbuf, uint32_t *didx)
768 {
769 Vmxnet3Ring *ring = &s->rxq_descr[qidx].rx_ring[ridx];
770 *didx = vmxnet3_ring_curr_cell_idx(ring);
771 vmxnet3_ring_read_curr_cell(ring, dbuf);
772 }
773
774 static inline uint8_t
775 vmxnet3_get_rx_ring_gen(VMXNET3State *s, int qidx, int ridx)
776 {
777 return s->rxq_descr[qidx].rx_ring[ridx].gen;
778 }
779
780 static inline hwaddr
781 vmxnet3_pop_rxc_descr(VMXNET3State *s, int qidx, uint32_t *descr_gen)
782 {
783 uint8_t ring_gen;
784 struct Vmxnet3_RxCompDesc rxcd;
785
786 hwaddr daddr =
787 vmxnet3_ring_curr_cell_pa(&s->rxq_descr[qidx].comp_ring);
788
789 cpu_physical_memory_read(daddr, &rxcd, sizeof(struct Vmxnet3_RxCompDesc));
790 ring_gen = vmxnet3_ring_curr_gen(&s->rxq_descr[qidx].comp_ring);
791
792 if (rxcd.gen != ring_gen) {
793 *descr_gen = ring_gen;
794 vmxnet3_inc_rx_completion_counter(s, qidx);
795 return daddr;
796 }
797
798 return 0;
799 }
800
801 static inline void
802 vmxnet3_revert_rxc_descr(VMXNET3State *s, int qidx)
803 {
804 vmxnet3_dec_rx_completion_counter(s, qidx);
805 }
806
807 #define RXQ_IDX (0)
808 #define RX_HEAD_BODY_RING (0)
809 #define RX_BODY_ONLY_RING (1)
810
811 static bool
812 vmxnet3_get_next_head_rx_descr(VMXNET3State *s,
813 struct Vmxnet3_RxDesc *descr_buf,
814 uint32_t *descr_idx,
815 uint32_t *ridx)
816 {
817 for (;;) {
818 uint32_t ring_gen;
819 vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING,
820 descr_buf, descr_idx);
821
822 /* If no more free descriptors - return */
823 ring_gen = vmxnet3_get_rx_ring_gen(s, RXQ_IDX, RX_HEAD_BODY_RING);
824 if (descr_buf->gen != ring_gen) {
825 return false;
826 }
827
828 /* Only read after generation field verification */
829 smp_rmb();
830 /* Re-read to be sure we got the latest version */
831 vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING,
832 descr_buf, descr_idx);
833
834 /* Mark current descriptor as used/skipped */
835 vmxnet3_inc_rx_consumption_counter(s, RXQ_IDX, RX_HEAD_BODY_RING);
836
837 /* If this is what we are looking for - return */
838 if (descr_buf->btype == VMXNET3_RXD_BTYPE_HEAD) {
839 *ridx = RX_HEAD_BODY_RING;
840 return true;
841 }
842 }
843 }
844
845 static bool
846 vmxnet3_get_next_body_rx_descr(VMXNET3State *s,
847 struct Vmxnet3_RxDesc *d,
848 uint32_t *didx,
849 uint32_t *ridx)
850 {
851 vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING, d, didx);
852
853 /* Try to find corresponding descriptor in head/body ring */
854 if (d->gen == vmxnet3_get_rx_ring_gen(s, RXQ_IDX, RX_HEAD_BODY_RING)) {
855 /* Only read after generation field verification */
856 smp_rmb();
857 /* Re-read to be sure we got the latest version */
858 vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING, d, didx);
859 if (d->btype == VMXNET3_RXD_BTYPE_BODY) {
860 vmxnet3_inc_rx_consumption_counter(s, RXQ_IDX, RX_HEAD_BODY_RING);
861 *ridx = RX_HEAD_BODY_RING;
862 return true;
863 }
864 }
865
866 /*
867 * If there is no free descriptors on head/body ring or next free
868 * descriptor is a head descriptor switch to body only ring
869 */
870 vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_BODY_ONLY_RING, d, didx);
871
872 /* If no more free descriptors - return */
873 if (d->gen == vmxnet3_get_rx_ring_gen(s, RXQ_IDX, RX_BODY_ONLY_RING)) {
874 /* Only read after generation field verification */
875 smp_rmb();
876 /* Re-read to be sure we got the latest version */
877 vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_BODY_ONLY_RING, d, didx);
878 assert(d->btype == VMXNET3_RXD_BTYPE_BODY);
879 *ridx = RX_BODY_ONLY_RING;
880 vmxnet3_inc_rx_consumption_counter(s, RXQ_IDX, RX_BODY_ONLY_RING);
881 return true;
882 }
883
884 return false;
885 }
886
887 static inline bool
888 vmxnet3_get_next_rx_descr(VMXNET3State *s, bool is_head,
889 struct Vmxnet3_RxDesc *descr_buf,
890 uint32_t *descr_idx,
891 uint32_t *ridx)
892 {
893 if (is_head || !s->rx_packets_compound) {
894 return vmxnet3_get_next_head_rx_descr(s, descr_buf, descr_idx, ridx);
895 } else {
896 return vmxnet3_get_next_body_rx_descr(s, descr_buf, descr_idx, ridx);
897 }
898 }
899
900 /* In case packet was csum offloaded (either NEEDS_CSUM or DATA_VALID),
901 * the implementation always passes an RxCompDesc with a "Checksum
902 * calculated and found correct" to the OS (cnc=0 and tuc=1, see
903 * vmxnet3_rx_update_descr). This emulates the observed ESXi behavior.
904 *
905 * Therefore, if packet has the NEEDS_CSUM set, we must calculate
906 * and place a fully computed checksum into the tcp/udp header.
907 * Otherwise, the OS driver will receive a checksum-correct indication
908 * (CHECKSUM_UNNECESSARY), but with the actual tcp/udp checksum field
909 * having just the pseudo header csum value.
910 *
911 * While this is not a problem if packet is destined for local delivery,
912 * in the case the host OS performs forwarding, it will forward an
913 * incorrectly checksummed packet.
914 */
915 static void vmxnet3_rx_need_csum_calculate(struct VmxnetRxPkt *pkt,
916 const void *pkt_data,
917 size_t pkt_len)
918 {
919 struct virtio_net_hdr *vhdr;
920 bool isip4, isip6, istcp, isudp;
921 uint8_t *data;
922 int len;
923
924 if (!vmxnet_rx_pkt_has_virt_hdr(pkt)) {
925 return;
926 }
927
928 vhdr = vmxnet_rx_pkt_get_vhdr(pkt);
929 if (!VMXNET_FLAG_IS_SET(vhdr->flags, VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
930 return;
931 }
932
933 vmxnet_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
934 if (!(isip4 || isip6) || !(istcp || isudp)) {
935 return;
936 }
937
938 vmxnet3_dump_virt_hdr(vhdr);
939
940 /* Validate packet len: csum_start + scum_offset + length of csum field */
941 if (pkt_len < (vhdr->csum_start + vhdr->csum_offset + 2)) {
942 VMW_PKPRN("packet len:%zu < csum_start(%d) + csum_offset(%d) + 2, "
943 "cannot calculate checksum",
944 pkt_len, vhdr->csum_start, vhdr->csum_offset);
945 return;
946 }
947
948 data = (uint8_t *)pkt_data + vhdr->csum_start;
949 len = pkt_len - vhdr->csum_start;
950 /* Put the checksum obtained into the packet */
951 stw_be_p(data + vhdr->csum_offset, net_raw_checksum(data, len));
952
953 vhdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
954 vhdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID;
955 }
956
957 static void vmxnet3_rx_update_descr(struct VmxnetRxPkt *pkt,
958 struct Vmxnet3_RxCompDesc *rxcd)
959 {
960 int csum_ok, is_gso;
961 bool isip4, isip6, istcp, isudp;
962 struct virtio_net_hdr *vhdr;
963 uint8_t offload_type;
964
965 if (vmxnet_rx_pkt_is_vlan_stripped(pkt)) {
966 rxcd->ts = 1;
967 rxcd->tci = vmxnet_rx_pkt_get_vlan_tag(pkt);
968 }
969
970 if (!vmxnet_rx_pkt_has_virt_hdr(pkt)) {
971 goto nocsum;
972 }
973
974 vhdr = vmxnet_rx_pkt_get_vhdr(pkt);
975 /*
976 * Checksum is valid when lower level tell so or when lower level
977 * requires checksum offload telling that packet produced/bridged
978 * locally and did travel over network after last checksum calculation
979 * or production
980 */
981 csum_ok = VMXNET_FLAG_IS_SET(vhdr->flags, VIRTIO_NET_HDR_F_DATA_VALID) ||
982 VMXNET_FLAG_IS_SET(vhdr->flags, VIRTIO_NET_HDR_F_NEEDS_CSUM);
983
984 offload_type = vhdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN;
985 is_gso = (offload_type != VIRTIO_NET_HDR_GSO_NONE) ? 1 : 0;
986
987 if (!csum_ok && !is_gso) {
988 goto nocsum;
989 }
990
991 vmxnet_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
992 if ((!istcp && !isudp) || (!isip4 && !isip6)) {
993 goto nocsum;
994 }
995
996 rxcd->cnc = 0;
997 rxcd->v4 = isip4 ? 1 : 0;
998 rxcd->v6 = isip6 ? 1 : 0;
999 rxcd->tcp = istcp ? 1 : 0;
1000 rxcd->udp = isudp ? 1 : 0;
1001 rxcd->fcs = rxcd->tuc = rxcd->ipc = 1;
1002 return;
1003
1004 nocsum:
1005 rxcd->cnc = 1;
1006 return;
1007 }
1008
1009 static void
1010 vmxnet3_physical_memory_writev(const struct iovec *iov,
1011 size_t start_iov_off,
1012 hwaddr target_addr,
1013 size_t bytes_to_copy)
1014 {
1015 size_t curr_off = 0;
1016 size_t copied = 0;
1017
1018 while (bytes_to_copy) {
1019 if (start_iov_off < (curr_off + iov->iov_len)) {
1020 size_t chunk_len =
1021 MIN((curr_off + iov->iov_len) - start_iov_off, bytes_to_copy);
1022
1023 cpu_physical_memory_write(target_addr + copied,
1024 iov->iov_base + start_iov_off - curr_off,
1025 chunk_len);
1026
1027 copied += chunk_len;
1028 start_iov_off += chunk_len;
1029 curr_off = start_iov_off;
1030 bytes_to_copy -= chunk_len;
1031 } else {
1032 curr_off += iov->iov_len;
1033 }
1034 iov++;
1035 }
1036 }
1037
1038 static bool
1039 vmxnet3_indicate_packet(VMXNET3State *s)
1040 {
1041 struct Vmxnet3_RxDesc rxd;
1042 bool is_head = true;
1043 uint32_t rxd_idx;
1044 uint32_t rx_ridx = 0;
1045
1046 struct Vmxnet3_RxCompDesc rxcd;
1047 uint32_t new_rxcd_gen = VMXNET3_INIT_GEN;
1048 hwaddr new_rxcd_pa = 0;
1049 hwaddr ready_rxcd_pa = 0;
1050 struct iovec *data = vmxnet_rx_pkt_get_iovec(s->rx_pkt);
1051 size_t bytes_copied = 0;
1052 size_t bytes_left = vmxnet_rx_pkt_get_total_len(s->rx_pkt);
1053 uint16_t num_frags = 0;
1054 size_t chunk_size;
1055
1056 vmxnet_rx_pkt_dump(s->rx_pkt);
1057
1058 while (bytes_left > 0) {
1059
1060 /* cannot add more frags to packet */
1061 if (num_frags == s->max_rx_frags) {
1062 break;
1063 }
1064
1065 new_rxcd_pa = vmxnet3_pop_rxc_descr(s, RXQ_IDX, &new_rxcd_gen);
1066 if (!new_rxcd_pa) {
1067 break;
1068 }
1069
1070 if (!vmxnet3_get_next_rx_descr(s, is_head, &rxd, &rxd_idx, &rx_ridx)) {
1071 break;
1072 }
1073
1074 chunk_size = MIN(bytes_left, rxd.len);
1075 vmxnet3_physical_memory_writev(data, bytes_copied,
1076 le64_to_cpu(rxd.addr), chunk_size);
1077 bytes_copied += chunk_size;
1078 bytes_left -= chunk_size;
1079
1080 vmxnet3_dump_rx_descr(&rxd);
1081
1082 if (ready_rxcd_pa != 0) {
1083 cpu_physical_memory_write(ready_rxcd_pa, &rxcd, sizeof(rxcd));
1084 }
1085
1086 memset(&rxcd, 0, sizeof(struct Vmxnet3_RxCompDesc));
1087 rxcd.rxdIdx = rxd_idx;
1088 rxcd.len = chunk_size;
1089 rxcd.sop = is_head;
1090 rxcd.gen = new_rxcd_gen;
1091 rxcd.rqID = RXQ_IDX + rx_ridx * s->rxq_num;
1092
1093 if (bytes_left == 0) {
1094 vmxnet3_rx_update_descr(s->rx_pkt, &rxcd);
1095 }
1096
1097 VMW_RIPRN("RX Completion descriptor: rxRing: %lu rxIdx %lu len %lu "
1098 "sop %d csum_correct %lu",
1099 (unsigned long) rx_ridx,
1100 (unsigned long) rxcd.rxdIdx,
1101 (unsigned long) rxcd.len,
1102 (int) rxcd.sop,
1103 (unsigned long) rxcd.tuc);
1104
1105 is_head = false;
1106 ready_rxcd_pa = new_rxcd_pa;
1107 new_rxcd_pa = 0;
1108 num_frags++;
1109 }
1110
1111 if (ready_rxcd_pa != 0) {
1112 rxcd.eop = 1;
1113 rxcd.err = (bytes_left != 0);
1114 cpu_physical_memory_write(ready_rxcd_pa, &rxcd, sizeof(rxcd));
1115
1116 /* Flush RX descriptor changes */
1117 smp_wmb();
1118 }
1119
1120 if (new_rxcd_pa != 0) {
1121 vmxnet3_revert_rxc_descr(s, RXQ_IDX);
1122 }
1123
1124 vmxnet3_trigger_interrupt(s, s->rxq_descr[RXQ_IDX].intr_idx);
1125
1126 if (bytes_left == 0) {
1127 vmxnet3_on_rx_done_update_stats(s, RXQ_IDX, VMXNET3_PKT_STATUS_OK);
1128 return true;
1129 } else if (num_frags == s->max_rx_frags) {
1130 vmxnet3_on_rx_done_update_stats(s, RXQ_IDX, VMXNET3_PKT_STATUS_ERROR);
1131 return false;
1132 } else {
1133 vmxnet3_on_rx_done_update_stats(s, RXQ_IDX,
1134 VMXNET3_PKT_STATUS_OUT_OF_BUF);
1135 return false;
1136 }
1137 }
1138
1139 static void
1140 vmxnet3_io_bar0_write(void *opaque, hwaddr addr,
1141 uint64_t val, unsigned size)
1142 {
1143 VMXNET3State *s = opaque;
1144
1145 if (VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_TXPROD,
1146 VMXNET3_DEVICE_MAX_TX_QUEUES, VMXNET3_REG_ALIGN)) {
1147 int tx_queue_idx =
1148 VMW_MULTIREG_IDX_BY_ADDR(addr, VMXNET3_REG_TXPROD,
1149 VMXNET3_REG_ALIGN);
1150 assert(tx_queue_idx <= s->txq_num);
1151 vmxnet3_process_tx_queue(s, tx_queue_idx);
1152 return;
1153 }
1154
1155 if (VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_IMR,
1156 VMXNET3_MAX_INTRS, VMXNET3_REG_ALIGN)) {
1157 int l = VMW_MULTIREG_IDX_BY_ADDR(addr, VMXNET3_REG_IMR,
1158 VMXNET3_REG_ALIGN);
1159
1160 VMW_CBPRN("Interrupt mask for line %d written: 0x%" PRIx64, l, val);
1161
1162 vmxnet3_on_interrupt_mask_changed(s, l, val);
1163 return;
1164 }
1165
1166 if (VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_RXPROD,
1167 VMXNET3_DEVICE_MAX_RX_QUEUES, VMXNET3_REG_ALIGN) ||
1168 VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_RXPROD2,
1169 VMXNET3_DEVICE_MAX_RX_QUEUES, VMXNET3_REG_ALIGN)) {
1170 return;
1171 }
1172
1173 VMW_WRPRN("BAR0 unknown write [%" PRIx64 "] = %" PRIx64 ", size %d",
1174 (uint64_t) addr, val, size);
1175 }
1176
1177 static uint64_t
1178 vmxnet3_io_bar0_read(void *opaque, hwaddr addr, unsigned size)
1179 {
1180 VMXNET3State *s = opaque;
1181
1182 if (VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_IMR,
1183 VMXNET3_MAX_INTRS, VMXNET3_REG_ALIGN)) {
1184 int l = VMW_MULTIREG_IDX_BY_ADDR(addr, VMXNET3_REG_IMR,
1185 VMXNET3_REG_ALIGN);
1186 return s->interrupt_states[l].is_masked;
1187 }
1188
1189 VMW_CBPRN("BAR0 unknown read [%" PRIx64 "], size %d", addr, size);
1190 return 0;
1191 }
1192
1193 static void vmxnet3_reset_interrupt_states(VMXNET3State *s)
1194 {
1195 int i;
1196 for (i = 0; i < ARRAY_SIZE(s->interrupt_states); i++) {
1197 s->interrupt_states[i].is_asserted = false;
1198 s->interrupt_states[i].is_pending = false;
1199 s->interrupt_states[i].is_masked = true;
1200 }
1201 }
1202
1203 static void vmxnet3_reset_mac(VMXNET3State *s)
1204 {
1205 memcpy(&s->conf.macaddr.a, &s->perm_mac.a, sizeof(s->perm_mac.a));
1206 VMW_CFPRN("MAC address set to: " VMXNET_MF, VMXNET_MA(s->conf.macaddr.a));
1207 }
1208
1209 static void vmxnet3_deactivate_device(VMXNET3State *s)
1210 {
1211 if (s->device_active) {
1212 VMW_CBPRN("Deactivating vmxnet3...");
1213 vmxnet_tx_pkt_reset(s->tx_pkt);
1214 vmxnet_tx_pkt_uninit(s->tx_pkt);
1215 vmxnet_rx_pkt_uninit(s->rx_pkt);
1216 s->device_active = false;
1217 }
1218 }
1219
1220 static void vmxnet3_reset(VMXNET3State *s)
1221 {
1222 VMW_CBPRN("Resetting vmxnet3...");
1223
1224 vmxnet3_deactivate_device(s);
1225 vmxnet3_reset_interrupt_states(s);
1226 s->drv_shmem = 0;
1227 s->tx_sop = true;
1228 s->skip_current_tx_pkt = false;
1229 }
1230
1231 static void vmxnet3_update_rx_mode(VMXNET3State *s)
1232 {
1233 s->rx_mode = VMXNET3_READ_DRV_SHARED32(s->drv_shmem,
1234 devRead.rxFilterConf.rxMode);
1235 VMW_CFPRN("RX mode: 0x%08X", s->rx_mode);
1236 }
1237
1238 static void vmxnet3_update_vlan_filters(VMXNET3State *s)
1239 {
1240 int i;
1241
1242 /* Copy configuration from shared memory */
1243 VMXNET3_READ_DRV_SHARED(s->drv_shmem,
1244 devRead.rxFilterConf.vfTable,
1245 s->vlan_table,
1246 sizeof(s->vlan_table));
1247
1248 /* Invert byte order when needed */
1249 for (i = 0; i < ARRAY_SIZE(s->vlan_table); i++) {
1250 s->vlan_table[i] = le32_to_cpu(s->vlan_table[i]);
1251 }
1252
1253 /* Dump configuration for debugging purposes */
1254 VMW_CFPRN("Configured VLANs:");
1255 for (i = 0; i < sizeof(s->vlan_table) * 8; i++) {
1256 if (VMXNET3_VFTABLE_ENTRY_IS_SET(s->vlan_table, i)) {
1257 VMW_CFPRN("\tVLAN %d is present", i);
1258 }
1259 }
1260 }
1261
1262 static void vmxnet3_update_mcast_filters(VMXNET3State *s)
1263 {
1264 uint16_t list_bytes =
1265 VMXNET3_READ_DRV_SHARED16(s->drv_shmem,
1266 devRead.rxFilterConf.mfTableLen);
1267
1268 s->mcast_list_len = list_bytes / sizeof(s->mcast_list[0]);
1269
1270 s->mcast_list = g_realloc(s->mcast_list, list_bytes);
1271 if (!s->mcast_list) {
1272 if (s->mcast_list_len == 0) {
1273 VMW_CFPRN("Current multicast list is empty");
1274 } else {
1275 VMW_ERPRN("Failed to allocate multicast list of %d elements",
1276 s->mcast_list_len);
1277 }
1278 s->mcast_list_len = 0;
1279 } else {
1280 int i;
1281 hwaddr mcast_list_pa =
1282 VMXNET3_READ_DRV_SHARED64(s->drv_shmem,
1283 devRead.rxFilterConf.mfTablePA);
1284
1285 cpu_physical_memory_read(mcast_list_pa, s->mcast_list, list_bytes);
1286 VMW_CFPRN("Current multicast list len is %d:", s->mcast_list_len);
1287 for (i = 0; i < s->mcast_list_len; i++) {
1288 VMW_CFPRN("\t" VMXNET_MF, VMXNET_MA(s->mcast_list[i].a));
1289 }
1290 }
1291 }
1292
1293 static void vmxnet3_setup_rx_filtering(VMXNET3State *s)
1294 {
1295 vmxnet3_update_rx_mode(s);
1296 vmxnet3_update_vlan_filters(s);
1297 vmxnet3_update_mcast_filters(s);
1298 }
1299
1300 static uint32_t vmxnet3_get_interrupt_config(VMXNET3State *s)
1301 {
1302 uint32_t interrupt_mode = VMXNET3_IT_AUTO | (VMXNET3_IMM_AUTO << 2);
1303 VMW_CFPRN("Interrupt config is 0x%X", interrupt_mode);
1304 return interrupt_mode;
1305 }
1306
1307 static void vmxnet3_fill_stats(VMXNET3State *s)
1308 {
1309 int i;
1310
1311 if (!s->device_active)
1312 return;
1313
1314 for (i = 0; i < s->txq_num; i++) {
1315 cpu_physical_memory_write(s->txq_descr[i].tx_stats_pa,
1316 &s->txq_descr[i].txq_stats,
1317 sizeof(s->txq_descr[i].txq_stats));
1318 }
1319
1320 for (i = 0; i < s->rxq_num; i++) {
1321 cpu_physical_memory_write(s->rxq_descr[i].rx_stats_pa,
1322 &s->rxq_descr[i].rxq_stats,
1323 sizeof(s->rxq_descr[i].rxq_stats));
1324 }
1325 }
1326
1327 static void vmxnet3_adjust_by_guest_type(VMXNET3State *s)
1328 {
1329 struct Vmxnet3_GOSInfo gos;
1330
1331 VMXNET3_READ_DRV_SHARED(s->drv_shmem, devRead.misc.driverInfo.gos,
1332 &gos, sizeof(gos));
1333 s->rx_packets_compound =
1334 (gos.gosType == VMXNET3_GOS_TYPE_WIN) ? false : true;
1335
1336 VMW_CFPRN("Guest type specifics: RXCOMPOUND: %d", s->rx_packets_compound);
1337 }
1338
1339 static void
1340 vmxnet3_dump_conf_descr(const char *name,
1341 struct Vmxnet3_VariableLenConfDesc *pm_descr)
1342 {
1343 VMW_CFPRN("%s descriptor dump: Version %u, Length %u",
1344 name, pm_descr->confVer, pm_descr->confLen);
1345
1346 };
1347
1348 static void vmxnet3_update_pm_state(VMXNET3State *s)
1349 {
1350 struct Vmxnet3_VariableLenConfDesc pm_descr;
1351
1352 pm_descr.confLen =
1353 VMXNET3_READ_DRV_SHARED32(s->drv_shmem, devRead.pmConfDesc.confLen);
1354 pm_descr.confVer =
1355 VMXNET3_READ_DRV_SHARED32(s->drv_shmem, devRead.pmConfDesc.confVer);
1356 pm_descr.confPA =
1357 VMXNET3_READ_DRV_SHARED64(s->drv_shmem, devRead.pmConfDesc.confPA);
1358
1359 vmxnet3_dump_conf_descr("PM State", &pm_descr);
1360 }
1361
1362 static void vmxnet3_update_features(VMXNET3State *s)
1363 {
1364 uint32_t guest_features;
1365 int rxcso_supported;
1366
1367 guest_features = VMXNET3_READ_DRV_SHARED32(s->drv_shmem,
1368 devRead.misc.uptFeatures);
1369
1370 rxcso_supported = VMXNET_FLAG_IS_SET(guest_features, UPT1_F_RXCSUM);
1371 s->rx_vlan_stripping = VMXNET_FLAG_IS_SET(guest_features, UPT1_F_RXVLAN);
1372 s->lro_supported = VMXNET_FLAG_IS_SET(guest_features, UPT1_F_LRO);
1373
1374 VMW_CFPRN("Features configuration: LRO: %d, RXCSUM: %d, VLANSTRIP: %d",
1375 s->lro_supported, rxcso_supported,
1376 s->rx_vlan_stripping);
1377 if (s->peer_has_vhdr) {
1378 qemu_set_offload(qemu_get_queue(s->nic)->peer,
1379 rxcso_supported,
1380 s->lro_supported,
1381 s->lro_supported,
1382 0,
1383 0);
1384 }
1385 }
1386
1387 static bool vmxnet3_verify_intx(VMXNET3State *s, int intx)
1388 {
1389 return s->msix_used || s->msi_used || (intx ==
1390 (pci_get_byte(s->parent_obj.config + PCI_INTERRUPT_PIN) - 1));
1391 }
1392
1393 static void vmxnet3_validate_interrupt_idx(bool is_msix, int idx)
1394 {
1395 int max_ints = is_msix ? VMXNET3_MAX_INTRS : VMXNET3_MAX_NMSIX_INTRS;
1396 if (idx >= max_ints) {
1397 hw_error("Bad interrupt index: %d\n", idx);
1398 }
1399 }
1400
1401 static void vmxnet3_validate_interrupts(VMXNET3State *s)
1402 {
1403 int i;
1404
1405 VMW_CFPRN("Verifying event interrupt index (%d)", s->event_int_idx);
1406 vmxnet3_validate_interrupt_idx(s->msix_used, s->event_int_idx);
1407
1408 for (i = 0; i < s->txq_num; i++) {
1409 int idx = s->txq_descr[i].intr_idx;
1410 VMW_CFPRN("Verifying TX queue %d interrupt index (%d)", i, idx);
1411 vmxnet3_validate_interrupt_idx(s->msix_used, idx);
1412 }
1413
1414 for (i = 0; i < s->rxq_num; i++) {
1415 int idx = s->rxq_descr[i].intr_idx;
1416 VMW_CFPRN("Verifying RX queue %d interrupt index (%d)", i, idx);
1417 vmxnet3_validate_interrupt_idx(s->msix_used, idx);
1418 }
1419 }
1420
1421 static void vmxnet3_validate_queues(VMXNET3State *s)
1422 {
1423 /*
1424 * txq_num and rxq_num are total number of queues
1425 * configured by guest. These numbers must not
1426 * exceed corresponding maximal values.
1427 */
1428
1429 if (s->txq_num > VMXNET3_DEVICE_MAX_TX_QUEUES) {
1430 hw_error("Bad TX queues number: %d\n", s->txq_num);
1431 }
1432
1433 if (s->rxq_num > VMXNET3_DEVICE_MAX_RX_QUEUES) {
1434 hw_error("Bad RX queues number: %d\n", s->rxq_num);
1435 }
1436 }
1437
1438 static void vmxnet3_activate_device(VMXNET3State *s)
1439 {
1440 int i;
1441 static const uint32_t VMXNET3_DEF_TX_THRESHOLD = 1;
1442 hwaddr qdescr_table_pa;
1443 uint64_t pa;
1444 uint32_t size;
1445
1446 /* Verify configuration consistency */
1447 if (!vmxnet3_verify_driver_magic(s->drv_shmem)) {
1448 VMW_ERPRN("Device configuration received from driver is invalid");
1449 return;
1450 }
1451
1452 /* Verify if device is active */
1453 if (s->device_active) {
1454 VMW_CFPRN("Vmxnet3 device is active");
1455 return;
1456 }
1457
1458 vmxnet3_adjust_by_guest_type(s);
1459 vmxnet3_update_features(s);
1460 vmxnet3_update_pm_state(s);
1461 vmxnet3_setup_rx_filtering(s);
1462 /* Cache fields from shared memory */
1463 s->mtu = VMXNET3_READ_DRV_SHARED32(s->drv_shmem, devRead.misc.mtu);
1464 VMW_CFPRN("MTU is %u", s->mtu);
1465
1466 s->max_rx_frags =
1467 VMXNET3_READ_DRV_SHARED16(s->drv_shmem, devRead.misc.maxNumRxSG);
1468
1469 if (s->max_rx_frags == 0) {
1470 s->max_rx_frags = 1;
1471 }
1472
1473 VMW_CFPRN("Max RX fragments is %u", s->max_rx_frags);
1474
1475 s->event_int_idx =
1476 VMXNET3_READ_DRV_SHARED8(s->drv_shmem, devRead.intrConf.eventIntrIdx);
1477 assert(vmxnet3_verify_intx(s, s->event_int_idx));
1478 VMW_CFPRN("Events interrupt line is %u", s->event_int_idx);
1479
1480 s->auto_int_masking =
1481 VMXNET3_READ_DRV_SHARED8(s->drv_shmem, devRead.intrConf.autoMask);
1482 VMW_CFPRN("Automatic interrupt masking is %d", (int)s->auto_int_masking);
1483
1484 s->txq_num =
1485 VMXNET3_READ_DRV_SHARED8(s->drv_shmem, devRead.misc.numTxQueues);
1486 s->rxq_num =
1487 VMXNET3_READ_DRV_SHARED8(s->drv_shmem, devRead.misc.numRxQueues);
1488
1489 VMW_CFPRN("Number of TX/RX queues %u/%u", s->txq_num, s->rxq_num);
1490 vmxnet3_validate_queues(s);
1491
1492 qdescr_table_pa =
1493 VMXNET3_READ_DRV_SHARED64(s->drv_shmem, devRead.misc.queueDescPA);
1494 VMW_CFPRN("TX queues descriptors table is at 0x%" PRIx64, qdescr_table_pa);
1495
1496 /*
1497 * Worst-case scenario is a packet that holds all TX rings space so
1498 * we calculate total size of all TX rings for max TX fragments number
1499 */
1500 s->max_tx_frags = 0;
1501
1502 /* TX queues */
1503 for (i = 0; i < s->txq_num; i++) {
1504 hwaddr qdescr_pa =
1505 qdescr_table_pa + i * sizeof(struct Vmxnet3_TxQueueDesc);
1506
1507 /* Read interrupt number for this TX queue */
1508 s->txq_descr[i].intr_idx =
1509 VMXNET3_READ_TX_QUEUE_DESCR8(qdescr_pa, conf.intrIdx);
1510 assert(vmxnet3_verify_intx(s, s->txq_descr[i].intr_idx));
1511
1512 VMW_CFPRN("TX Queue %d interrupt: %d", i, s->txq_descr[i].intr_idx);
1513
1514 /* Read rings memory locations for TX queues */
1515 pa = VMXNET3_READ_TX_QUEUE_DESCR64(qdescr_pa, conf.txRingBasePA);
1516 size = VMXNET3_READ_TX_QUEUE_DESCR32(qdescr_pa, conf.txRingSize);
1517
1518 vmxnet3_ring_init(&s->txq_descr[i].tx_ring, pa, size,
1519 sizeof(struct Vmxnet3_TxDesc), false);
1520 VMXNET3_RING_DUMP(VMW_CFPRN, "TX", i, &s->txq_descr[i].tx_ring);
1521
1522 s->max_tx_frags += size;
1523
1524 /* TXC ring */
1525 pa = VMXNET3_READ_TX_QUEUE_DESCR64(qdescr_pa, conf.compRingBasePA);
1526 size = VMXNET3_READ_TX_QUEUE_DESCR32(qdescr_pa, conf.compRingSize);
1527 vmxnet3_ring_init(&s->txq_descr[i].comp_ring, pa, size,
1528 sizeof(struct Vmxnet3_TxCompDesc), true);
1529 VMXNET3_RING_DUMP(VMW_CFPRN, "TXC", i, &s->txq_descr[i].comp_ring);
1530
1531 s->txq_descr[i].tx_stats_pa =
1532 qdescr_pa + offsetof(struct Vmxnet3_TxQueueDesc, stats);
1533
1534 memset(&s->txq_descr[i].txq_stats, 0,
1535 sizeof(s->txq_descr[i].txq_stats));
1536
1537 /* Fill device-managed parameters for queues */
1538 VMXNET3_WRITE_TX_QUEUE_DESCR32(qdescr_pa,
1539 ctrl.txThreshold,
1540 VMXNET3_DEF_TX_THRESHOLD);
1541 }
1542
1543 /* Preallocate TX packet wrapper */
1544 VMW_CFPRN("Max TX fragments is %u", s->max_tx_frags);
1545 vmxnet_tx_pkt_init(&s->tx_pkt, s->max_tx_frags, s->peer_has_vhdr);
1546 vmxnet_rx_pkt_init(&s->rx_pkt, s->peer_has_vhdr);
1547
1548 /* Read rings memory locations for RX queues */
1549 for (i = 0; i < s->rxq_num; i++) {
1550 int j;
1551 hwaddr qd_pa =
1552 qdescr_table_pa + s->txq_num * sizeof(struct Vmxnet3_TxQueueDesc) +
1553 i * sizeof(struct Vmxnet3_RxQueueDesc);
1554
1555 /* Read interrupt number for this RX queue */
1556 s->rxq_descr[i].intr_idx =
1557 VMXNET3_READ_TX_QUEUE_DESCR8(qd_pa, conf.intrIdx);
1558 assert(vmxnet3_verify_intx(s, s->rxq_descr[i].intr_idx));
1559
1560 VMW_CFPRN("RX Queue %d interrupt: %d", i, s->rxq_descr[i].intr_idx);
1561
1562 /* Read rings memory locations */
1563 for (j = 0; j < VMXNET3_RX_RINGS_PER_QUEUE; j++) {
1564 /* RX rings */
1565 pa = VMXNET3_READ_RX_QUEUE_DESCR64(qd_pa, conf.rxRingBasePA[j]);
1566 size = VMXNET3_READ_RX_QUEUE_DESCR32(qd_pa, conf.rxRingSize[j]);
1567 vmxnet3_ring_init(&s->rxq_descr[i].rx_ring[j], pa, size,
1568 sizeof(struct Vmxnet3_RxDesc), false);
1569 VMW_CFPRN("RX queue %d:%d: Base: %" PRIx64 ", Size: %d",
1570 i, j, pa, size);
1571 }
1572
1573 /* RXC ring */
1574 pa = VMXNET3_READ_RX_QUEUE_DESCR64(qd_pa, conf.compRingBasePA);
1575 size = VMXNET3_READ_RX_QUEUE_DESCR32(qd_pa, conf.compRingSize);
1576 vmxnet3_ring_init(&s->rxq_descr[i].comp_ring, pa, size,
1577 sizeof(struct Vmxnet3_RxCompDesc), true);
1578 VMW_CFPRN("RXC queue %d: Base: %" PRIx64 ", Size: %d", i, pa, size);
1579
1580 s->rxq_descr[i].rx_stats_pa =
1581 qd_pa + offsetof(struct Vmxnet3_RxQueueDesc, stats);
1582 memset(&s->rxq_descr[i].rxq_stats, 0,
1583 sizeof(s->rxq_descr[i].rxq_stats));
1584 }
1585
1586 vmxnet3_validate_interrupts(s);
1587
1588 /* Make sure everything is in place before device activation */
1589 smp_wmb();
1590
1591 vmxnet3_reset_mac(s);
1592
1593 s->device_active = true;
1594 }
1595
1596 static void vmxnet3_handle_command(VMXNET3State *s, uint64_t cmd)
1597 {
1598 s->last_command = cmd;
1599
1600 switch (cmd) {
1601 case VMXNET3_CMD_GET_PERM_MAC_HI:
1602 VMW_CBPRN("Set: Get upper part of permanent MAC");
1603 break;
1604
1605 case VMXNET3_CMD_GET_PERM_MAC_LO:
1606 VMW_CBPRN("Set: Get lower part of permanent MAC");
1607 break;
1608
1609 case VMXNET3_CMD_GET_STATS:
1610 VMW_CBPRN("Set: Get device statistics");
1611 vmxnet3_fill_stats(s);
1612 break;
1613
1614 case VMXNET3_CMD_ACTIVATE_DEV:
1615 VMW_CBPRN("Set: Activating vmxnet3 device");
1616 vmxnet3_activate_device(s);
1617 break;
1618
1619 case VMXNET3_CMD_UPDATE_RX_MODE:
1620 VMW_CBPRN("Set: Update rx mode");
1621 vmxnet3_update_rx_mode(s);
1622 break;
1623
1624 case VMXNET3_CMD_UPDATE_VLAN_FILTERS:
1625 VMW_CBPRN("Set: Update VLAN filters");
1626 vmxnet3_update_vlan_filters(s);
1627 break;
1628
1629 case VMXNET3_CMD_UPDATE_MAC_FILTERS:
1630 VMW_CBPRN("Set: Update MAC filters");
1631 vmxnet3_update_mcast_filters(s);
1632 break;
1633
1634 case VMXNET3_CMD_UPDATE_FEATURE:
1635 VMW_CBPRN("Set: Update features");
1636 vmxnet3_update_features(s);
1637 break;
1638
1639 case VMXNET3_CMD_UPDATE_PMCFG:
1640 VMW_CBPRN("Set: Update power management config");
1641 vmxnet3_update_pm_state(s);
1642 break;
1643
1644 case VMXNET3_CMD_GET_LINK:
1645 VMW_CBPRN("Set: Get link");
1646 break;
1647
1648 case VMXNET3_CMD_RESET_DEV:
1649 VMW_CBPRN("Set: Reset device");
1650 vmxnet3_reset(s);
1651 break;
1652
1653 case VMXNET3_CMD_QUIESCE_DEV:
1654 VMW_CBPRN("Set: VMXNET3_CMD_QUIESCE_DEV - deactivate the device");
1655 vmxnet3_deactivate_device(s);
1656 break;
1657
1658 case VMXNET3_CMD_GET_CONF_INTR:
1659 VMW_CBPRN("Set: VMXNET3_CMD_GET_CONF_INTR - interrupt configuration");
1660 break;
1661
1662 case VMXNET3_CMD_GET_ADAPTIVE_RING_INFO:
1663 VMW_CBPRN("Set: VMXNET3_CMD_GET_ADAPTIVE_RING_INFO - "
1664 "adaptive ring info flags");
1665 break;
1666
1667 case VMXNET3_CMD_GET_DID_LO:
1668 VMW_CBPRN("Set: Get lower part of device ID");
1669 break;
1670
1671 case VMXNET3_CMD_GET_DID_HI:
1672 VMW_CBPRN("Set: Get upper part of device ID");
1673 break;
1674
1675 case VMXNET3_CMD_GET_DEV_EXTRA_INFO:
1676 VMW_CBPRN("Set: Get device extra info");
1677 break;
1678
1679 default:
1680 VMW_CBPRN("Received unknown command: %" PRIx64, cmd);
1681 break;
1682 }
1683 }
1684
1685 static uint64_t vmxnet3_get_command_status(VMXNET3State *s)
1686 {
1687 uint64_t ret;
1688
1689 switch (s->last_command) {
1690 case VMXNET3_CMD_ACTIVATE_DEV:
1691 ret = (s->device_active) ? 0 : 1;
1692 VMW_CFPRN("Device active: %" PRIx64, ret);
1693 break;
1694
1695 case VMXNET3_CMD_RESET_DEV:
1696 case VMXNET3_CMD_QUIESCE_DEV:
1697 case VMXNET3_CMD_GET_QUEUE_STATUS:
1698 case VMXNET3_CMD_GET_DEV_EXTRA_INFO:
1699 ret = 0;
1700 break;
1701
1702 case VMXNET3_CMD_GET_LINK:
1703 ret = s->link_status_and_speed;
1704 VMW_CFPRN("Link and speed: %" PRIx64, ret);
1705 break;
1706
1707 case VMXNET3_CMD_GET_PERM_MAC_LO:
1708 ret = vmxnet3_get_mac_low(&s->perm_mac);
1709 break;
1710
1711 case VMXNET3_CMD_GET_PERM_MAC_HI:
1712 ret = vmxnet3_get_mac_high(&s->perm_mac);
1713 break;
1714
1715 case VMXNET3_CMD_GET_CONF_INTR:
1716 ret = vmxnet3_get_interrupt_config(s);
1717 break;
1718
1719 case VMXNET3_CMD_GET_ADAPTIVE_RING_INFO:
1720 ret = VMXNET3_DISABLE_ADAPTIVE_RING;
1721 break;
1722
1723 case VMXNET3_CMD_GET_DID_LO:
1724 ret = PCI_DEVICE_ID_VMWARE_VMXNET3;
1725 break;
1726
1727 case VMXNET3_CMD_GET_DID_HI:
1728 ret = VMXNET3_DEVICE_REVISION;
1729 break;
1730
1731 default:
1732 VMW_WRPRN("Received request for unknown command: %x", s->last_command);
1733 ret = 0;
1734 break;
1735 }
1736
1737 return ret;
1738 }
1739
1740 static void vmxnet3_set_events(VMXNET3State *s, uint32_t val)
1741 {
1742 uint32_t events;
1743
1744 VMW_CBPRN("Setting events: 0x%x", val);
1745 events = VMXNET3_READ_DRV_SHARED32(s->drv_shmem, ecr) | val;
1746 VMXNET3_WRITE_DRV_SHARED32(s->drv_shmem, ecr, events);
1747 }
1748
1749 static void vmxnet3_ack_events(VMXNET3State *s, uint32_t val)
1750 {
1751 uint32_t events;
1752
1753 VMW_CBPRN("Clearing events: 0x%x", val);
1754 events = VMXNET3_READ_DRV_SHARED32(s->drv_shmem, ecr) & ~val;
1755 VMXNET3_WRITE_DRV_SHARED32(s->drv_shmem, ecr, events);
1756 }
1757
1758 static void
1759 vmxnet3_io_bar1_write(void *opaque,
1760 hwaddr addr,
1761 uint64_t val,
1762 unsigned size)
1763 {
1764 VMXNET3State *s = opaque;
1765
1766 switch (addr) {
1767 /* Vmxnet3 Revision Report Selection */
1768 case VMXNET3_REG_VRRS:
1769 VMW_CBPRN("Write BAR1 [VMXNET3_REG_VRRS] = %" PRIx64 ", size %d",
1770 val, size);
1771 break;
1772
1773 /* UPT Version Report Selection */
1774 case VMXNET3_REG_UVRS:
1775 VMW_CBPRN("Write BAR1 [VMXNET3_REG_UVRS] = %" PRIx64 ", size %d",
1776 val, size);
1777 break;
1778
1779 /* Driver Shared Address Low */
1780 case VMXNET3_REG_DSAL:
1781 VMW_CBPRN("Write BAR1 [VMXNET3_REG_DSAL] = %" PRIx64 ", size %d",
1782 val, size);
1783 /*
1784 * Guest driver will first write the low part of the shared
1785 * memory address. We save it to temp variable and set the
1786 * shared address only after we get the high part
1787 */
1788 if (val == 0) {
1789 vmxnet3_deactivate_device(s);
1790 }
1791 s->temp_shared_guest_driver_memory = val;
1792 s->drv_shmem = 0;
1793 break;
1794
1795 /* Driver Shared Address High */
1796 case VMXNET3_REG_DSAH:
1797 VMW_CBPRN("Write BAR1 [VMXNET3_REG_DSAH] = %" PRIx64 ", size %d",
1798 val, size);
1799 /*
1800 * Set the shared memory between guest driver and device.
1801 * We already should have low address part.
1802 */
1803 s->drv_shmem = s->temp_shared_guest_driver_memory | (val << 32);
1804 break;
1805
1806 /* Command */
1807 case VMXNET3_REG_CMD:
1808 VMW_CBPRN("Write BAR1 [VMXNET3_REG_CMD] = %" PRIx64 ", size %d",
1809 val, size);
1810 vmxnet3_handle_command(s, val);
1811 break;
1812
1813 /* MAC Address Low */
1814 case VMXNET3_REG_MACL:
1815 VMW_CBPRN("Write BAR1 [VMXNET3_REG_MACL] = %" PRIx64 ", size %d",
1816 val, size);
1817 s->temp_mac = val;
1818 break;
1819
1820 /* MAC Address High */
1821 case VMXNET3_REG_MACH:
1822 VMW_CBPRN("Write BAR1 [VMXNET3_REG_MACH] = %" PRIx64 ", size %d",
1823 val, size);
1824 vmxnet3_set_variable_mac(s, val, s->temp_mac);
1825 break;
1826
1827 /* Interrupt Cause Register */
1828 case VMXNET3_REG_ICR:
1829 VMW_CBPRN("Write BAR1 [VMXNET3_REG_ICR] = %" PRIx64 ", size %d",
1830 val, size);
1831 g_assert_not_reached();
1832 break;
1833
1834 /* Event Cause Register */
1835 case VMXNET3_REG_ECR:
1836 VMW_CBPRN("Write BAR1 [VMXNET3_REG_ECR] = %" PRIx64 ", size %d",
1837 val, size);
1838 vmxnet3_ack_events(s, val);
1839 break;
1840
1841 default:
1842 VMW_CBPRN("Unknown Write to BAR1 [%" PRIx64 "] = %" PRIx64 ", size %d",
1843 addr, val, size);
1844 break;
1845 }
1846 }
1847
1848 static uint64_t
1849 vmxnet3_io_bar1_read(void *opaque, hwaddr addr, unsigned size)
1850 {
1851 VMXNET3State *s = opaque;
1852 uint64_t ret = 0;
1853
1854 switch (addr) {
1855 /* Vmxnet3 Revision Report Selection */
1856 case VMXNET3_REG_VRRS:
1857 VMW_CBPRN("Read BAR1 [VMXNET3_REG_VRRS], size %d", size);
1858 ret = VMXNET3_DEVICE_REVISION;
1859 break;
1860
1861 /* UPT Version Report Selection */
1862 case VMXNET3_REG_UVRS:
1863 VMW_CBPRN("Read BAR1 [VMXNET3_REG_UVRS], size %d", size);
1864 ret = VMXNET3_UPT_REVISION;
1865 break;
1866
1867 /* Command */
1868 case VMXNET3_REG_CMD:
1869 VMW_CBPRN("Read BAR1 [VMXNET3_REG_CMD], size %d", size);
1870 ret = vmxnet3_get_command_status(s);
1871 break;
1872
1873 /* MAC Address Low */
1874 case VMXNET3_REG_MACL:
1875 VMW_CBPRN("Read BAR1 [VMXNET3_REG_MACL], size %d", size);
1876 ret = vmxnet3_get_mac_low(&s->conf.macaddr);
1877 break;
1878
1879 /* MAC Address High */
1880 case VMXNET3_REG_MACH:
1881 VMW_CBPRN("Read BAR1 [VMXNET3_REG_MACH], size %d", size);
1882 ret = vmxnet3_get_mac_high(&s->conf.macaddr);
1883 break;
1884
1885 /*
1886 * Interrupt Cause Register
1887 * Used for legacy interrupts only so interrupt index always 0
1888 */
1889 case VMXNET3_REG_ICR:
1890 VMW_CBPRN("Read BAR1 [VMXNET3_REG_ICR], size %d", size);
1891 if (vmxnet3_interrupt_asserted(s, 0)) {
1892 vmxnet3_clear_interrupt(s, 0);
1893 ret = true;
1894 } else {
1895 ret = false;
1896 }
1897 break;
1898
1899 default:
1900 VMW_CBPRN("Unknow read BAR1[%" PRIx64 "], %d bytes", addr, size);
1901 break;
1902 }
1903
1904 return ret;
1905 }
1906
1907 static int
1908 vmxnet3_can_receive(NetClientState *nc)
1909 {
1910 VMXNET3State *s = qemu_get_nic_opaque(nc);
1911 return s->device_active &&
1912 VMXNET_FLAG_IS_SET(s->link_status_and_speed, VMXNET3_LINK_STATUS_UP);
1913 }
1914
1915 static inline bool
1916 vmxnet3_is_registered_vlan(VMXNET3State *s, const void *data)
1917 {
1918 uint16_t vlan_tag = eth_get_pkt_tci(data) & VLAN_VID_MASK;
1919 if (IS_SPECIAL_VLAN_ID(vlan_tag)) {
1920 return true;
1921 }
1922
1923 return VMXNET3_VFTABLE_ENTRY_IS_SET(s->vlan_table, vlan_tag);
1924 }
1925
1926 static bool
1927 vmxnet3_is_allowed_mcast_group(VMXNET3State *s, const uint8_t *group_mac)
1928 {
1929 int i;
1930 for (i = 0; i < s->mcast_list_len; i++) {
1931 if (!memcmp(group_mac, s->mcast_list[i].a, sizeof(s->mcast_list[i]))) {
1932 return true;
1933 }
1934 }
1935 return false;
1936 }
1937
1938 static bool
1939 vmxnet3_rx_filter_may_indicate(VMXNET3State *s, const void *data,
1940 size_t size)
1941 {
1942 struct eth_header *ehdr = PKT_GET_ETH_HDR(data);
1943
1944 if (VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_PROMISC)) {
1945 return true;
1946 }
1947
1948 if (!vmxnet3_is_registered_vlan(s, data)) {
1949 return false;
1950 }
1951
1952 switch (vmxnet_rx_pkt_get_packet_type(s->rx_pkt)) {
1953 case ETH_PKT_UCAST:
1954 if (!VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_UCAST)) {
1955 return false;
1956 }
1957 if (memcmp(s->conf.macaddr.a, ehdr->h_dest, ETH_ALEN)) {
1958 return false;
1959 }
1960 break;
1961
1962 case ETH_PKT_BCAST:
1963 if (!VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_BCAST)) {
1964 return false;
1965 }
1966 break;
1967
1968 case ETH_PKT_MCAST:
1969 if (VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_ALL_MULTI)) {
1970 return true;
1971 }
1972 if (!VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_MCAST)) {
1973 return false;
1974 }
1975 if (!vmxnet3_is_allowed_mcast_group(s, ehdr->h_dest)) {
1976 return false;
1977 }
1978 break;
1979
1980 default:
1981 g_assert_not_reached();
1982 }
1983
1984 return true;
1985 }
1986
1987 static ssize_t
1988 vmxnet3_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1989 {
1990 VMXNET3State *s = qemu_get_nic_opaque(nc);
1991 size_t bytes_indicated;
1992 uint8_t min_buf[MIN_BUF_SIZE];
1993
1994 if (!vmxnet3_can_receive(nc)) {
1995 VMW_PKPRN("Cannot receive now");
1996 return -1;
1997 }
1998
1999 if (s->peer_has_vhdr) {
2000 vmxnet_rx_pkt_set_vhdr(s->rx_pkt, (struct virtio_net_hdr *)buf);
2001 buf += sizeof(struct virtio_net_hdr);
2002 size -= sizeof(struct virtio_net_hdr);
2003 }
2004
2005 /* Pad to minimum Ethernet frame length */
2006 if (size < sizeof(min_buf)) {
2007 memcpy(min_buf, buf, size);
2008 memset(&min_buf[size], 0, sizeof(min_buf) - size);
2009 buf = min_buf;
2010 size = sizeof(min_buf);
2011 }
2012
2013 vmxnet_rx_pkt_set_packet_type(s->rx_pkt,
2014 get_eth_packet_type(PKT_GET_ETH_HDR(buf)));
2015
2016 if (vmxnet3_rx_filter_may_indicate(s, buf, size)) {
2017 vmxnet_rx_pkt_set_protocols(s->rx_pkt, buf, size);
2018 vmxnet3_rx_need_csum_calculate(s->rx_pkt, buf, size);
2019 vmxnet_rx_pkt_attach_data(s->rx_pkt, buf, size, s->rx_vlan_stripping);
2020 bytes_indicated = vmxnet3_indicate_packet(s) ? size : -1;
2021 if (bytes_indicated < size) {
2022 VMW_PKPRN("RX: %zu of %zu bytes indicated", bytes_indicated, size);
2023 }
2024 } else {
2025 VMW_PKPRN("Packet dropped by RX filter");
2026 bytes_indicated = size;
2027 }
2028
2029 assert(size > 0);
2030 assert(bytes_indicated != 0);
2031 return bytes_indicated;
2032 }
2033
2034 static void vmxnet3_set_link_status(NetClientState *nc)
2035 {
2036 VMXNET3State *s = qemu_get_nic_opaque(nc);
2037
2038 if (nc->link_down) {
2039 s->link_status_and_speed &= ~VMXNET3_LINK_STATUS_UP;
2040 } else {
2041 s->link_status_and_speed |= VMXNET3_LINK_STATUS_UP;
2042 }
2043
2044 vmxnet3_set_events(s, VMXNET3_ECR_LINK);
2045 vmxnet3_trigger_interrupt(s, s->event_int_idx);
2046 }
2047
2048 static NetClientInfo net_vmxnet3_info = {
2049 .type = NET_CLIENT_OPTIONS_KIND_NIC,
2050 .size = sizeof(NICState),
2051 .receive = vmxnet3_receive,
2052 .link_status_changed = vmxnet3_set_link_status,
2053 };
2054
2055 static bool vmxnet3_peer_has_vnet_hdr(VMXNET3State *s)
2056 {
2057 NetClientState *nc = qemu_get_queue(s->nic);
2058
2059 if (qemu_has_vnet_hdr(nc->peer)) {
2060 return true;
2061 }
2062
2063 return false;
2064 }
2065
2066 static void vmxnet3_net_uninit(VMXNET3State *s)
2067 {
2068 g_free(s->mcast_list);
2069 vmxnet3_deactivate_device(s);
2070 qemu_del_nic(s->nic);
2071 }
2072
2073 static void vmxnet3_net_init(VMXNET3State *s)
2074 {
2075 DeviceState *d = DEVICE(s);
2076
2077 VMW_CBPRN("vmxnet3_net_init called...");
2078
2079 qemu_macaddr_default_if_unset(&s->conf.macaddr);
2080
2081 /* Windows guest will query the address that was set on init */
2082 memcpy(&s->perm_mac.a, &s->conf.macaddr.a, sizeof(s->perm_mac.a));
2083
2084 s->mcast_list = NULL;
2085 s->mcast_list_len = 0;
2086
2087 s->link_status_and_speed = VMXNET3_LINK_SPEED | VMXNET3_LINK_STATUS_UP;
2088
2089 VMW_CFPRN("Permanent MAC: " VMXNET_MF, VMXNET_MA(s->perm_mac.a));
2090
2091 s->nic = qemu_new_nic(&net_vmxnet3_info, &s->conf,
2092 object_get_typename(OBJECT(s)),
2093 d->id, s);
2094
2095 s->peer_has_vhdr = vmxnet3_peer_has_vnet_hdr(s);
2096 s->tx_sop = true;
2097 s->skip_current_tx_pkt = false;
2098 s->tx_pkt = NULL;
2099 s->rx_pkt = NULL;
2100 s->rx_vlan_stripping = false;
2101 s->lro_supported = false;
2102
2103 if (s->peer_has_vhdr) {
2104 qemu_set_vnet_hdr_len(qemu_get_queue(s->nic)->peer,
2105 sizeof(struct virtio_net_hdr));
2106
2107 qemu_using_vnet_hdr(qemu_get_queue(s->nic)->peer, 1);
2108 }
2109
2110 qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
2111 }
2112
2113 static void
2114 vmxnet3_unuse_msix_vectors(VMXNET3State *s, int num_vectors)
2115 {
2116 PCIDevice *d = PCI_DEVICE(s);
2117 int i;
2118 for (i = 0; i < num_vectors; i++) {
2119 msix_vector_unuse(d, i);
2120 }
2121 }
2122
2123 static bool
2124 vmxnet3_use_msix_vectors(VMXNET3State *s, int num_vectors)
2125 {
2126 PCIDevice *d = PCI_DEVICE(s);
2127 int i;
2128 for (i = 0; i < num_vectors; i++) {
2129 int res = msix_vector_use(d, i);
2130 if (0 > res) {
2131 VMW_WRPRN("Failed to use MSI-X vector %d, error %d", i, res);
2132 vmxnet3_unuse_msix_vectors(s, i);
2133 return false;
2134 }
2135 }
2136 return true;
2137 }
2138
2139 static bool
2140 vmxnet3_init_msix(VMXNET3State *s)
2141 {
2142 PCIDevice *d = PCI_DEVICE(s);
2143 int res = msix_init(d, VMXNET3_MAX_INTRS,
2144 &s->msix_bar,
2145 VMXNET3_MSIX_BAR_IDX, VMXNET3_OFF_MSIX_TABLE,
2146 &s->msix_bar,
2147 VMXNET3_MSIX_BAR_IDX, VMXNET3_OFF_MSIX_PBA(s),
2148 VMXNET3_MSIX_OFFSET(s));
2149
2150 if (0 > res) {
2151 VMW_WRPRN("Failed to initialize MSI-X, error %d", res);
2152 s->msix_used = false;
2153 } else {
2154 if (!vmxnet3_use_msix_vectors(s, VMXNET3_MAX_INTRS)) {
2155 VMW_WRPRN("Failed to use MSI-X vectors, error %d", res);
2156 msix_uninit(d, &s->msix_bar, &s->msix_bar);
2157 s->msix_used = false;
2158 } else {
2159 s->msix_used = true;
2160 }
2161 }
2162 return s->msix_used;
2163 }
2164
2165 static void
2166 vmxnet3_cleanup_msix(VMXNET3State *s)
2167 {
2168 PCIDevice *d = PCI_DEVICE(s);
2169
2170 if (s->msix_used) {
2171 vmxnet3_unuse_msix_vectors(s, VMXNET3_MAX_INTRS);
2172 msix_uninit(d, &s->msix_bar, &s->msix_bar);
2173 }
2174 }
2175
2176 #define VMXNET3_USE_64BIT (true)
2177 #define VMXNET3_PER_VECTOR_MASK (false)
2178
2179 static bool
2180 vmxnet3_init_msi(VMXNET3State *s)
2181 {
2182 PCIDevice *d = PCI_DEVICE(s);
2183 int res;
2184
2185 res = msi_init(d, VMXNET3_MSI_OFFSET(s), VMXNET3_MAX_NMSIX_INTRS,
2186 VMXNET3_USE_64BIT, VMXNET3_PER_VECTOR_MASK);
2187 if (0 > res) {
2188 VMW_WRPRN("Failed to initialize MSI, error %d", res);
2189 s->msi_used = false;
2190 } else {
2191 s->msi_used = true;
2192 }
2193
2194 return s->msi_used;
2195 }
2196
2197 static void
2198 vmxnet3_cleanup_msi(VMXNET3State *s)
2199 {
2200 PCIDevice *d = PCI_DEVICE(s);
2201
2202 if (s->msi_used) {
2203 msi_uninit(d);
2204 }
2205 }
2206
2207 static void
2208 vmxnet3_msix_save(QEMUFile *f, void *opaque)
2209 {
2210 PCIDevice *d = PCI_DEVICE(opaque);
2211 msix_save(d, f);
2212 }
2213
2214 static int
2215 vmxnet3_msix_load(QEMUFile *f, void *opaque, int version_id)
2216 {
2217 PCIDevice *d = PCI_DEVICE(opaque);
2218 msix_load(d, f);
2219 return 0;
2220 }
2221
2222 static const MemoryRegionOps b0_ops = {
2223 .read = vmxnet3_io_bar0_read,
2224 .write = vmxnet3_io_bar0_write,
2225 .endianness = DEVICE_LITTLE_ENDIAN,
2226 .impl = {
2227 .min_access_size = 4,
2228 .max_access_size = 4,
2229 },
2230 };
2231
2232 static const MemoryRegionOps b1_ops = {
2233 .read = vmxnet3_io_bar1_read,
2234 .write = vmxnet3_io_bar1_write,
2235 .endianness = DEVICE_LITTLE_ENDIAN,
2236 .impl = {
2237 .min_access_size = 4,
2238 .max_access_size = 4,
2239 },
2240 };
2241
2242 static void vmxnet3_pci_realize(PCIDevice *pci_dev, Error **errp)
2243 {
2244 DeviceState *dev = DEVICE(pci_dev);
2245 VMXNET3State *s = VMXNET3(pci_dev);
2246
2247 VMW_CBPRN("Starting init...");
2248
2249 memory_region_init_io(&s->bar0, OBJECT(s), &b0_ops, s,
2250 "vmxnet3-b0", VMXNET3_PT_REG_SIZE);
2251 pci_register_bar(pci_dev, VMXNET3_BAR0_IDX,
2252 PCI_BASE_ADDRESS_SPACE_MEMORY, &s->bar0);
2253
2254 memory_region_init_io(&s->bar1, OBJECT(s), &b1_ops, s,
2255 "vmxnet3-b1", VMXNET3_VD_REG_SIZE);
2256 pci_register_bar(pci_dev, VMXNET3_BAR1_IDX,
2257 PCI_BASE_ADDRESS_SPACE_MEMORY, &s->bar1);
2258
2259 memory_region_init(&s->msix_bar, OBJECT(s), "vmxnet3-msix-bar",
2260 VMXNET3_MSIX_BAR_SIZE);
2261 pci_register_bar(pci_dev, VMXNET3_MSIX_BAR_IDX,
2262 PCI_BASE_ADDRESS_SPACE_MEMORY, &s->msix_bar);
2263
2264 vmxnet3_reset_interrupt_states(s);
2265
2266 /* Interrupt pin A */
2267 pci_dev->config[PCI_INTERRUPT_PIN] = 0x01;
2268
2269 if (!vmxnet3_init_msix(s)) {
2270 VMW_WRPRN("Failed to initialize MSI-X, configuration is inconsistent.");
2271 }
2272
2273 if (!vmxnet3_init_msi(s)) {
2274 VMW_WRPRN("Failed to initialize MSI, configuration is inconsistent.");
2275 }
2276
2277 vmxnet3_net_init(s);
2278
2279 register_savevm(dev, "vmxnet3-msix", -1, 1,
2280 vmxnet3_msix_save, vmxnet3_msix_load, s);
2281 }
2282
2283 static void vmxnet3_instance_init(Object *obj)
2284 {
2285 VMXNET3State *s = VMXNET3(obj);
2286 device_add_bootindex_property(obj, &s->conf.bootindex,
2287 "bootindex", "/ethernet-phy@0",
2288 DEVICE(obj), NULL);
2289 }
2290
2291 static void vmxnet3_pci_uninit(PCIDevice *pci_dev)
2292 {
2293 DeviceState *dev = DEVICE(pci_dev);
2294 VMXNET3State *s = VMXNET3(pci_dev);
2295
2296 VMW_CBPRN("Starting uninit...");
2297
2298 unregister_savevm(dev, "vmxnet3-msix", s);
2299
2300 vmxnet3_net_uninit(s);
2301
2302 vmxnet3_cleanup_msix(s);
2303
2304 vmxnet3_cleanup_msi(s);
2305 }
2306
2307 static void vmxnet3_qdev_reset(DeviceState *dev)
2308 {
2309 PCIDevice *d = PCI_DEVICE(dev);
2310 VMXNET3State *s = VMXNET3(d);
2311
2312 VMW_CBPRN("Starting QDEV reset...");
2313 vmxnet3_reset(s);
2314 }
2315
2316 static bool vmxnet3_mc_list_needed(void *opaque)
2317 {
2318 return true;
2319 }
2320
2321 static int vmxnet3_mcast_list_pre_load(void *opaque)
2322 {
2323 VMXNET3State *s = opaque;
2324
2325 s->mcast_list = g_malloc(s->mcast_list_buff_size);
2326
2327 return 0;
2328 }
2329
2330
2331 static void vmxnet3_pre_save(void *opaque)
2332 {
2333 VMXNET3State *s = opaque;
2334
2335 s->mcast_list_buff_size = s->mcast_list_len * sizeof(MACAddr);
2336 }
2337
2338 static const VMStateDescription vmxstate_vmxnet3_mcast_list = {
2339 .name = "vmxnet3/mcast_list",
2340 .version_id = 1,
2341 .minimum_version_id = 1,
2342 .pre_load = vmxnet3_mcast_list_pre_load,
2343 .needed = vmxnet3_mc_list_needed,
2344 .fields = (VMStateField[]) {
2345 VMSTATE_VBUFFER_UINT32(mcast_list, VMXNET3State, 0, NULL, 0,
2346 mcast_list_buff_size),
2347 VMSTATE_END_OF_LIST()
2348 }
2349 };
2350
2351 static void vmxnet3_get_ring_from_file(QEMUFile *f, Vmxnet3Ring *r)
2352 {
2353 r->pa = qemu_get_be64(f);
2354 r->size = qemu_get_be32(f);
2355 r->cell_size = qemu_get_be32(f);
2356 r->next = qemu_get_be32(f);
2357 r->gen = qemu_get_byte(f);
2358 }
2359
2360 static void vmxnet3_put_ring_to_file(QEMUFile *f, Vmxnet3Ring *r)
2361 {
2362 qemu_put_be64(f, r->pa);
2363 qemu_put_be32(f, r->size);
2364 qemu_put_be32(f, r->cell_size);
2365 qemu_put_be32(f, r->next);
2366 qemu_put_byte(f, r->gen);
2367 }
2368
2369 static void vmxnet3_get_tx_stats_from_file(QEMUFile *f,
2370 struct UPT1_TxStats *tx_stat)
2371 {
2372 tx_stat->TSOPktsTxOK = qemu_get_be64(f);
2373 tx_stat->TSOBytesTxOK = qemu_get_be64(f);
2374 tx_stat->ucastPktsTxOK = qemu_get_be64(f);
2375 tx_stat->ucastBytesTxOK = qemu_get_be64(f);
2376 tx_stat->mcastPktsTxOK = qemu_get_be64(f);
2377 tx_stat->mcastBytesTxOK = qemu_get_be64(f);
2378 tx_stat->bcastPktsTxOK = qemu_get_be64(f);
2379 tx_stat->bcastBytesTxOK = qemu_get_be64(f);
2380 tx_stat->pktsTxError = qemu_get_be64(f);
2381 tx_stat->pktsTxDiscard = qemu_get_be64(f);
2382 }
2383
2384 static void vmxnet3_put_tx_stats_to_file(QEMUFile *f,
2385 struct UPT1_TxStats *tx_stat)
2386 {
2387 qemu_put_be64(f, tx_stat->TSOPktsTxOK);
2388 qemu_put_be64(f, tx_stat->TSOBytesTxOK);
2389 qemu_put_be64(f, tx_stat->ucastPktsTxOK);
2390 qemu_put_be64(f, tx_stat->ucastBytesTxOK);
2391 qemu_put_be64(f, tx_stat->mcastPktsTxOK);
2392 qemu_put_be64(f, tx_stat->mcastBytesTxOK);
2393 qemu_put_be64(f, tx_stat->bcastPktsTxOK);
2394 qemu_put_be64(f, tx_stat->bcastBytesTxOK);
2395 qemu_put_be64(f, tx_stat->pktsTxError);
2396 qemu_put_be64(f, tx_stat->pktsTxDiscard);
2397 }
2398
2399 static int vmxnet3_get_txq_descr(QEMUFile *f, void *pv, size_t size)
2400 {
2401 Vmxnet3TxqDescr *r = pv;
2402
2403 vmxnet3_get_ring_from_file(f, &r->tx_ring);
2404 vmxnet3_get_ring_from_file(f, &r->comp_ring);
2405 r->intr_idx = qemu_get_byte(f);
2406 r->tx_stats_pa = qemu_get_be64(f);
2407
2408 vmxnet3_get_tx_stats_from_file(f, &r->txq_stats);
2409
2410 return 0;
2411 }
2412
2413 static void vmxnet3_put_txq_descr(QEMUFile *f, void *pv, size_t size)
2414 {
2415 Vmxnet3TxqDescr *r = pv;
2416
2417 vmxnet3_put_ring_to_file(f, &r->tx_ring);
2418 vmxnet3_put_ring_to_file(f, &r->comp_ring);
2419 qemu_put_byte(f, r->intr_idx);
2420 qemu_put_be64(f, r->tx_stats_pa);
2421 vmxnet3_put_tx_stats_to_file(f, &r->txq_stats);
2422 }
2423
2424 static const VMStateInfo txq_descr_info = {
2425 .name = "txq_descr",
2426 .get = vmxnet3_get_txq_descr,
2427 .put = vmxnet3_put_txq_descr
2428 };
2429
2430 static void vmxnet3_get_rx_stats_from_file(QEMUFile *f,
2431 struct UPT1_RxStats *rx_stat)
2432 {
2433 rx_stat->LROPktsRxOK = qemu_get_be64(f);
2434 rx_stat->LROBytesRxOK = qemu_get_be64(f);
2435 rx_stat->ucastPktsRxOK = qemu_get_be64(f);
2436 rx_stat->ucastBytesRxOK = qemu_get_be64(f);
2437 rx_stat->mcastPktsRxOK = qemu_get_be64(f);
2438 rx_stat->mcastBytesRxOK = qemu_get_be64(f);
2439 rx_stat->bcastPktsRxOK = qemu_get_be64(f);
2440 rx_stat->bcastBytesRxOK = qemu_get_be64(f);
2441 rx_stat->pktsRxOutOfBuf = qemu_get_be64(f);
2442 rx_stat->pktsRxError = qemu_get_be64(f);
2443 }
2444
2445 static void vmxnet3_put_rx_stats_to_file(QEMUFile *f,
2446 struct UPT1_RxStats *rx_stat)
2447 {
2448 qemu_put_be64(f, rx_stat->LROPktsRxOK);
2449 qemu_put_be64(f, rx_stat->LROBytesRxOK);
2450 qemu_put_be64(f, rx_stat->ucastPktsRxOK);
2451 qemu_put_be64(f, rx_stat->ucastBytesRxOK);
2452 qemu_put_be64(f, rx_stat->mcastPktsRxOK);
2453 qemu_put_be64(f, rx_stat->mcastBytesRxOK);
2454 qemu_put_be64(f, rx_stat->bcastPktsRxOK);
2455 qemu_put_be64(f, rx_stat->bcastBytesRxOK);
2456 qemu_put_be64(f, rx_stat->pktsRxOutOfBuf);
2457 qemu_put_be64(f, rx_stat->pktsRxError);
2458 }
2459
2460 static int vmxnet3_get_rxq_descr(QEMUFile *f, void *pv, size_t size)
2461 {
2462 Vmxnet3RxqDescr *r = pv;
2463 int i;
2464
2465 for (i = 0; i < VMXNET3_RX_RINGS_PER_QUEUE; i++) {
2466 vmxnet3_get_ring_from_file(f, &r->rx_ring[i]);
2467 }
2468
2469 vmxnet3_get_ring_from_file(f, &r->comp_ring);
2470 r->intr_idx = qemu_get_byte(f);
2471 r->rx_stats_pa = qemu_get_be64(f);
2472
2473 vmxnet3_get_rx_stats_from_file(f, &r->rxq_stats);
2474
2475 return 0;
2476 }
2477
2478 static void vmxnet3_put_rxq_descr(QEMUFile *f, void *pv, size_t size)
2479 {
2480 Vmxnet3RxqDescr *r = pv;
2481 int i;
2482
2483 for (i = 0; i < VMXNET3_RX_RINGS_PER_QUEUE; i++) {
2484 vmxnet3_put_ring_to_file(f, &r->rx_ring[i]);
2485 }
2486
2487 vmxnet3_put_ring_to_file(f, &r->comp_ring);
2488 qemu_put_byte(f, r->intr_idx);
2489 qemu_put_be64(f, r->rx_stats_pa);
2490 vmxnet3_put_rx_stats_to_file(f, &r->rxq_stats);
2491 }
2492
2493 static int vmxnet3_post_load(void *opaque, int version_id)
2494 {
2495 VMXNET3State *s = opaque;
2496 PCIDevice *d = PCI_DEVICE(s);
2497
2498 vmxnet_tx_pkt_init(&s->tx_pkt, s->max_tx_frags, s->peer_has_vhdr);
2499 vmxnet_rx_pkt_init(&s->rx_pkt, s->peer_has_vhdr);
2500
2501 if (s->msix_used) {
2502 if (!vmxnet3_use_msix_vectors(s, VMXNET3_MAX_INTRS)) {
2503 VMW_WRPRN("Failed to re-use MSI-X vectors");
2504 msix_uninit(d, &s->msix_bar, &s->msix_bar);
2505 s->msix_used = false;
2506 return -1;
2507 }
2508 }
2509
2510 vmxnet3_validate_queues(s);
2511 vmxnet3_validate_interrupts(s);
2512
2513 return 0;
2514 }
2515
2516 static const VMStateInfo rxq_descr_info = {
2517 .name = "rxq_descr",
2518 .get = vmxnet3_get_rxq_descr,
2519 .put = vmxnet3_put_rxq_descr
2520 };
2521
2522 static int vmxnet3_get_int_state(QEMUFile *f, void *pv, size_t size)
2523 {
2524 Vmxnet3IntState *r = pv;
2525
2526 r->is_masked = qemu_get_byte(f);
2527 r->is_pending = qemu_get_byte(f);
2528 r->is_asserted = qemu_get_byte(f);
2529
2530 return 0;
2531 }
2532
2533 static void vmxnet3_put_int_state(QEMUFile *f, void *pv, size_t size)
2534 {
2535 Vmxnet3IntState *r = pv;
2536
2537 qemu_put_byte(f, r->is_masked);
2538 qemu_put_byte(f, r->is_pending);
2539 qemu_put_byte(f, r->is_asserted);
2540 }
2541
2542 static const VMStateInfo int_state_info = {
2543 .name = "int_state",
2544 .get = vmxnet3_get_int_state,
2545 .put = vmxnet3_put_int_state
2546 };
2547
2548 static const VMStateDescription vmstate_vmxnet3 = {
2549 .name = "vmxnet3",
2550 .version_id = 1,
2551 .minimum_version_id = 1,
2552 .pre_save = vmxnet3_pre_save,
2553 .post_load = vmxnet3_post_load,
2554 .fields = (VMStateField[]) {
2555 VMSTATE_PCI_DEVICE(parent_obj, VMXNET3State),
2556 VMSTATE_BOOL(rx_packets_compound, VMXNET3State),
2557 VMSTATE_BOOL(rx_vlan_stripping, VMXNET3State),
2558 VMSTATE_BOOL(lro_supported, VMXNET3State),
2559 VMSTATE_UINT32(rx_mode, VMXNET3State),
2560 VMSTATE_UINT32(mcast_list_len, VMXNET3State),
2561 VMSTATE_UINT32(mcast_list_buff_size, VMXNET3State),
2562 VMSTATE_UINT32_ARRAY(vlan_table, VMXNET3State, VMXNET3_VFT_SIZE),
2563 VMSTATE_UINT32(mtu, VMXNET3State),
2564 VMSTATE_UINT16(max_rx_frags, VMXNET3State),
2565 VMSTATE_UINT32(max_tx_frags, VMXNET3State),
2566 VMSTATE_UINT8(event_int_idx, VMXNET3State),
2567 VMSTATE_BOOL(auto_int_masking, VMXNET3State),
2568 VMSTATE_UINT8(txq_num, VMXNET3State),
2569 VMSTATE_UINT8(rxq_num, VMXNET3State),
2570 VMSTATE_UINT32(device_active, VMXNET3State),
2571 VMSTATE_UINT32(last_command, VMXNET3State),
2572 VMSTATE_UINT32(link_status_and_speed, VMXNET3State),
2573 VMSTATE_UINT32(temp_mac, VMXNET3State),
2574 VMSTATE_UINT64(drv_shmem, VMXNET3State),
2575 VMSTATE_UINT64(temp_shared_guest_driver_memory, VMXNET3State),
2576
2577 VMSTATE_ARRAY(txq_descr, VMXNET3State,
2578 VMXNET3_DEVICE_MAX_TX_QUEUES, 0, txq_descr_info,
2579 Vmxnet3TxqDescr),
2580 VMSTATE_ARRAY(rxq_descr, VMXNET3State,
2581 VMXNET3_DEVICE_MAX_RX_QUEUES, 0, rxq_descr_info,
2582 Vmxnet3RxqDescr),
2583 VMSTATE_ARRAY(interrupt_states, VMXNET3State, VMXNET3_MAX_INTRS,
2584 0, int_state_info, Vmxnet3IntState),
2585
2586 VMSTATE_END_OF_LIST()
2587 },
2588 .subsections = (const VMStateDescription*[]) {
2589 &vmxstate_vmxnet3_mcast_list,
2590 NULL
2591 }
2592 };
2593
2594 static Property vmxnet3_properties[] = {
2595 DEFINE_NIC_PROPERTIES(VMXNET3State, conf),
2596 DEFINE_PROP_BIT("x-old-msi-offsets", VMXNET3State, compat_flags,
2597 VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT, false),
2598 DEFINE_PROP_END_OF_LIST(),
2599 };
2600
2601 static void vmxnet3_class_init(ObjectClass *class, void *data)
2602 {
2603 DeviceClass *dc = DEVICE_CLASS(class);
2604 PCIDeviceClass *c = PCI_DEVICE_CLASS(class);
2605
2606 c->realize = vmxnet3_pci_realize;
2607 c->exit = vmxnet3_pci_uninit;
2608 c->vendor_id = PCI_VENDOR_ID_VMWARE;
2609 c->device_id = PCI_DEVICE_ID_VMWARE_VMXNET3;
2610 c->revision = PCI_DEVICE_ID_VMWARE_VMXNET3_REVISION;
2611 c->class_id = PCI_CLASS_NETWORK_ETHERNET;
2612 c->subsystem_vendor_id = PCI_VENDOR_ID_VMWARE;
2613 c->subsystem_id = PCI_DEVICE_ID_VMWARE_VMXNET3;
2614 dc->desc = "VMWare Paravirtualized Ethernet v3";
2615 dc->reset = vmxnet3_qdev_reset;
2616 dc->vmsd = &vmstate_vmxnet3;
2617 dc->props = vmxnet3_properties;
2618 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
2619 }
2620
2621 static const TypeInfo vmxnet3_info = {
2622 .name = TYPE_VMXNET3,
2623 .parent = TYPE_PCI_DEVICE,
2624 .instance_size = sizeof(VMXNET3State),
2625 .class_init = vmxnet3_class_init,
2626 .instance_init = vmxnet3_instance_init,
2627 };
2628
2629 static void vmxnet3_register_types(void)
2630 {
2631 VMW_CBPRN("vmxnet3_register_types called...");
2632 type_register_static(&vmxnet3_info);
2633 }
2634
2635 type_init(vmxnet3_register_types)