]> git.proxmox.com Git - qemu.git/blob - hw/net/e1000.c
e1000: NetClientInfo.receive_iov implemented
[qemu.git] / hw / net / e1000.c
1 /*
2 * QEMU e1000 emulation
3 *
4 * Software developer's manual:
5 * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
6 *
7 * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
8 * Copyright (c) 2008 Qumranet
9 * Based on work done by:
10 * Copyright (c) 2007 Dan Aloni
11 * Copyright (c) 2004 Antony T Curtis
12 *
13 * This library is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU Lesser General Public
15 * License as published by the Free Software Foundation; either
16 * version 2 of the License, or (at your option) any later version.
17 *
18 * This library is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 * Lesser General Public License for more details.
22 *
23 * You should have received a copy of the GNU Lesser General Public
24 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
25 */
26
27
28 #include "hw/hw.h"
29 #include "hw/pci/pci.h"
30 #include "net/net.h"
31 #include "net/checksum.h"
32 #include "hw/loader.h"
33 #include "sysemu/sysemu.h"
34 #include "sysemu/dma.h"
35 #include "qemu/iov.h"
36
37 #include "e1000_regs.h"
38
39 #define E1000_DEBUG
40
41 #ifdef E1000_DEBUG
42 enum {
43 DEBUG_GENERAL, DEBUG_IO, DEBUG_MMIO, DEBUG_INTERRUPT,
44 DEBUG_RX, DEBUG_TX, DEBUG_MDIC, DEBUG_EEPROM,
45 DEBUG_UNKNOWN, DEBUG_TXSUM, DEBUG_TXERR, DEBUG_RXERR,
46 DEBUG_RXFILTER, DEBUG_PHY, DEBUG_NOTYET,
47 };
48 #define DBGBIT(x) (1<<DEBUG_##x)
49 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
50
51 #define DBGOUT(what, fmt, ...) do { \
52 if (debugflags & DBGBIT(what)) \
53 fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
54 } while (0)
55 #else
56 #define DBGOUT(what, fmt, ...) do {} while (0)
57 #endif
58
59 #define IOPORT_SIZE 0x40
60 #define PNPMMIO_SIZE 0x20000
61 #define MIN_BUF_SIZE 60 /* Min. octets in an ethernet frame sans FCS */
62
63 /* this is the size past which hardware will drop packets when setting LPE=0 */
64 #define MAXIMUM_ETHERNET_VLAN_SIZE 1522
65 /* this is the size past which hardware will drop packets when setting LPE=1 */
66 #define MAXIMUM_ETHERNET_LPE_SIZE 16384
67
68 #define MAXIMUM_ETHERNET_HDR_LEN (14+4)
69
70 /*
71 * HW models:
72 * E1000_DEV_ID_82540EM works with Windows and Linux
73 * E1000_DEV_ID_82573L OK with windoze and Linux 2.6.22,
74 * appears to perform better than 82540EM, but breaks with Linux 2.6.18
75 * E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
76 * Others never tested
77 */
78 enum { E1000_DEVID = E1000_DEV_ID_82540EM };
79
80 /*
81 * May need to specify additional MAC-to-PHY entries --
82 * Intel's Windows driver refuses to initialize unless they match
83 */
84 enum {
85 PHY_ID2_INIT = E1000_DEVID == E1000_DEV_ID_82573L ? 0xcc2 :
86 E1000_DEVID == E1000_DEV_ID_82544GC_COPPER ? 0xc30 :
87 /* default to E1000_DEV_ID_82540EM */ 0xc20
88 };
89
90 typedef struct E1000State_st {
91 /*< private >*/
92 PCIDevice parent_obj;
93 /*< public >*/
94
95 NICState *nic;
96 NICConf conf;
97 MemoryRegion mmio;
98 MemoryRegion io;
99
100 uint32_t mac_reg[0x8000];
101 uint16_t phy_reg[0x20];
102 uint16_t eeprom_data[64];
103
104 uint32_t rxbuf_size;
105 uint32_t rxbuf_min_shift;
106 struct e1000_tx {
107 unsigned char header[256];
108 unsigned char vlan_header[4];
109 /* Fields vlan and data must not be reordered or separated. */
110 unsigned char vlan[4];
111 unsigned char data[0x10000];
112 uint16_t size;
113 unsigned char sum_needed;
114 unsigned char vlan_needed;
115 uint8_t ipcss;
116 uint8_t ipcso;
117 uint16_t ipcse;
118 uint8_t tucss;
119 uint8_t tucso;
120 uint16_t tucse;
121 uint8_t hdr_len;
122 uint16_t mss;
123 uint32_t paylen;
124 uint16_t tso_frames;
125 char tse;
126 int8_t ip;
127 int8_t tcp;
128 char cptse; // current packet tse bit
129 } tx;
130
131 struct {
132 uint32_t val_in; // shifted in from guest driver
133 uint16_t bitnum_in;
134 uint16_t bitnum_out;
135 uint16_t reading;
136 uint32_t old_eecd;
137 } eecd_state;
138
139 QEMUTimer *autoneg_timer;
140
141 QEMUTimer *mit_timer; /* Mitigation timer. */
142 bool mit_timer_on; /* Mitigation timer is running. */
143 bool mit_irq_level; /* Tracks interrupt pin level. */
144 uint32_t mit_ide; /* Tracks E1000_TXD_CMD_IDE bit. */
145
146 /* Compatibility flags for migration to/from qemu 1.3.0 and older */
147 #define E1000_FLAG_AUTONEG_BIT 0
148 #define E1000_FLAG_MIT_BIT 1
149 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
150 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
151 uint32_t compat_flags;
152 } E1000State;
153
154 #define TYPE_E1000 "e1000"
155
156 #define E1000(obj) \
157 OBJECT_CHECK(E1000State, (obj), TYPE_E1000)
158
159 #define defreg(x) x = (E1000_##x>>2)
160 enum {
161 defreg(CTRL), defreg(EECD), defreg(EERD), defreg(GPRC),
162 defreg(GPTC), defreg(ICR), defreg(ICS), defreg(IMC),
163 defreg(IMS), defreg(LEDCTL), defreg(MANC), defreg(MDIC),
164 defreg(MPC), defreg(PBA), defreg(RCTL), defreg(RDBAH),
165 defreg(RDBAL), defreg(RDH), defreg(RDLEN), defreg(RDT),
166 defreg(STATUS), defreg(SWSM), defreg(TCTL), defreg(TDBAH),
167 defreg(TDBAL), defreg(TDH), defreg(TDLEN), defreg(TDT),
168 defreg(TORH), defreg(TORL), defreg(TOTH), defreg(TOTL),
169 defreg(TPR), defreg(TPT), defreg(TXDCTL), defreg(WUFC),
170 defreg(RA), defreg(MTA), defreg(CRCERRS),defreg(VFTA),
171 defreg(VET), defreg(RDTR), defreg(RADV), defreg(TADV),
172 defreg(ITR),
173 };
174
175 static void
176 e1000_link_down(E1000State *s)
177 {
178 s->mac_reg[STATUS] &= ~E1000_STATUS_LU;
179 s->phy_reg[PHY_STATUS] &= ~MII_SR_LINK_STATUS;
180 }
181
182 static void
183 e1000_link_up(E1000State *s)
184 {
185 s->mac_reg[STATUS] |= E1000_STATUS_LU;
186 s->phy_reg[PHY_STATUS] |= MII_SR_LINK_STATUS;
187 }
188
189 static void
190 set_phy_ctrl(E1000State *s, int index, uint16_t val)
191 {
192 /*
193 * QEMU 1.3 does not support link auto-negotiation emulation, so if we
194 * migrate during auto negotiation, after migration the link will be
195 * down.
196 */
197 if (!(s->compat_flags & E1000_FLAG_AUTONEG)) {
198 return;
199 }
200 if ((val & MII_CR_AUTO_NEG_EN) && (val & MII_CR_RESTART_AUTO_NEG)) {
201 e1000_link_down(s);
202 s->phy_reg[PHY_STATUS] &= ~MII_SR_AUTONEG_COMPLETE;
203 DBGOUT(PHY, "Start link auto negotiation\n");
204 timer_mod(s->autoneg_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
205 }
206 }
207
208 static void
209 e1000_autoneg_timer(void *opaque)
210 {
211 E1000State *s = opaque;
212 if (!qemu_get_queue(s->nic)->link_down) {
213 e1000_link_up(s);
214 }
215 s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
216 DBGOUT(PHY, "Auto negotiation is completed\n");
217 }
218
219 static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
220 [PHY_CTRL] = set_phy_ctrl,
221 };
222
223 enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
224
225 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
226 static const char phy_regcap[0x20] = {
227 [PHY_STATUS] = PHY_R, [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
228 [PHY_ID1] = PHY_R, [M88E1000_PHY_SPEC_CTRL] = PHY_RW,
229 [PHY_CTRL] = PHY_RW, [PHY_1000T_CTRL] = PHY_RW,
230 [PHY_LP_ABILITY] = PHY_R, [PHY_1000T_STATUS] = PHY_R,
231 [PHY_AUTONEG_ADV] = PHY_RW, [M88E1000_RX_ERR_CNTR] = PHY_R,
232 [PHY_ID2] = PHY_R, [M88E1000_PHY_SPEC_STATUS] = PHY_R
233 };
234
235 static const uint16_t phy_reg_init[] = {
236 [PHY_CTRL] = 0x1140,
237 [PHY_STATUS] = 0x794d, /* link initially up with not completed autoneg */
238 [PHY_ID1] = 0x141, [PHY_ID2] = PHY_ID2_INIT,
239 [PHY_1000T_CTRL] = 0x0e00, [M88E1000_PHY_SPEC_CTRL] = 0x360,
240 [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60, [PHY_AUTONEG_ADV] = 0xde1,
241 [PHY_LP_ABILITY] = 0x1e0, [PHY_1000T_STATUS] = 0x3c00,
242 [M88E1000_PHY_SPEC_STATUS] = 0xac00,
243 };
244
245 static const uint32_t mac_reg_init[] = {
246 [PBA] = 0x00100030,
247 [LEDCTL] = 0x602,
248 [CTRL] = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
249 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
250 [STATUS] = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
251 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
252 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
253 E1000_STATUS_LU,
254 [MANC] = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
255 E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
256 E1000_MANC_RMCP_EN,
257 };
258
259 /* Helper function, *curr == 0 means the value is not set */
260 static inline void
261 mit_update_delay(uint32_t *curr, uint32_t value)
262 {
263 if (value && (*curr == 0 || value < *curr)) {
264 *curr = value;
265 }
266 }
267
268 static void
269 set_interrupt_cause(E1000State *s, int index, uint32_t val)
270 {
271 PCIDevice *d = PCI_DEVICE(s);
272 uint32_t pending_ints;
273 uint32_t mit_delay;
274
275 if (val && (E1000_DEVID >= E1000_DEV_ID_82547EI_MOBILE)) {
276 /* Only for 8257x */
277 val |= E1000_ICR_INT_ASSERTED;
278 }
279 s->mac_reg[ICR] = val;
280
281 /*
282 * Make sure ICR and ICS registers have the same value.
283 * The spec says that the ICS register is write-only. However in practice,
284 * on real hardware ICS is readable, and for reads it has the same value as
285 * ICR (except that ICS does not have the clear on read behaviour of ICR).
286 *
287 * The VxWorks PRO/1000 driver uses this behaviour.
288 */
289 s->mac_reg[ICS] = val;
290
291 pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
292 if (!s->mit_irq_level && pending_ints) {
293 /*
294 * Here we detect a potential raising edge. We postpone raising the
295 * interrupt line if we are inside the mitigation delay window
296 * (s->mit_timer_on == 1).
297 * We provide a partial implementation of interrupt mitigation,
298 * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
299 * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
300 * RADV; relative timers based on TIDV and RDTR are not implemented.
301 */
302 if (s->mit_timer_on) {
303 return;
304 }
305 if (s->compat_flags & E1000_FLAG_MIT) {
306 /* Compute the next mitigation delay according to pending
307 * interrupts and the current values of RADV (provided
308 * RDTR!=0), TADV and ITR.
309 * Then rearm the timer.
310 */
311 mit_delay = 0;
312 if (s->mit_ide &&
313 (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
314 mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
315 }
316 if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
317 mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
318 }
319 mit_update_delay(&mit_delay, s->mac_reg[ITR]);
320
321 if (mit_delay) {
322 s->mit_timer_on = 1;
323 timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
324 mit_delay * 256);
325 }
326 s->mit_ide = 0;
327 }
328 }
329
330 s->mit_irq_level = (pending_ints != 0);
331 qemu_set_irq(d->irq[0], s->mit_irq_level);
332 }
333
334 static void
335 e1000_mit_timer(void *opaque)
336 {
337 E1000State *s = opaque;
338
339 s->mit_timer_on = 0;
340 /* Call set_interrupt_cause to update the irq level (if necessary). */
341 set_interrupt_cause(s, 0, s->mac_reg[ICR]);
342 }
343
344 static void
345 set_ics(E1000State *s, int index, uint32_t val)
346 {
347 DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
348 s->mac_reg[IMS]);
349 set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
350 }
351
352 static int
353 rxbufsize(uint32_t v)
354 {
355 v &= E1000_RCTL_BSEX | E1000_RCTL_SZ_16384 | E1000_RCTL_SZ_8192 |
356 E1000_RCTL_SZ_4096 | E1000_RCTL_SZ_2048 | E1000_RCTL_SZ_1024 |
357 E1000_RCTL_SZ_512 | E1000_RCTL_SZ_256;
358 switch (v) {
359 case E1000_RCTL_BSEX | E1000_RCTL_SZ_16384:
360 return 16384;
361 case E1000_RCTL_BSEX | E1000_RCTL_SZ_8192:
362 return 8192;
363 case E1000_RCTL_BSEX | E1000_RCTL_SZ_4096:
364 return 4096;
365 case E1000_RCTL_SZ_1024:
366 return 1024;
367 case E1000_RCTL_SZ_512:
368 return 512;
369 case E1000_RCTL_SZ_256:
370 return 256;
371 }
372 return 2048;
373 }
374
375 static void e1000_reset(void *opaque)
376 {
377 E1000State *d = opaque;
378 uint8_t *macaddr = d->conf.macaddr.a;
379 int i;
380
381 timer_del(d->autoneg_timer);
382 timer_del(d->mit_timer);
383 d->mit_timer_on = 0;
384 d->mit_irq_level = 0;
385 d->mit_ide = 0;
386 memset(d->phy_reg, 0, sizeof d->phy_reg);
387 memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
388 memset(d->mac_reg, 0, sizeof d->mac_reg);
389 memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
390 d->rxbuf_min_shift = 1;
391 memset(&d->tx, 0, sizeof d->tx);
392
393 if (qemu_get_queue(d->nic)->link_down) {
394 e1000_link_down(d);
395 }
396
397 /* Some guests expect pre-initialized RAH/RAL (AddrValid flag + MACaddr) */
398 d->mac_reg[RA] = 0;
399 d->mac_reg[RA + 1] = E1000_RAH_AV;
400 for (i = 0; i < 4; i++) {
401 d->mac_reg[RA] |= macaddr[i] << (8 * i);
402 d->mac_reg[RA + 1] |= (i < 2) ? macaddr[i + 4] << (8 * i) : 0;
403 }
404 }
405
406 static void
407 set_ctrl(E1000State *s, int index, uint32_t val)
408 {
409 /* RST is self clearing */
410 s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
411 }
412
413 static void
414 set_rx_control(E1000State *s, int index, uint32_t val)
415 {
416 s->mac_reg[RCTL] = val;
417 s->rxbuf_size = rxbufsize(val);
418 s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
419 DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
420 s->mac_reg[RCTL]);
421 qemu_flush_queued_packets(qemu_get_queue(s->nic));
422 }
423
424 static void
425 set_mdic(E1000State *s, int index, uint32_t val)
426 {
427 uint32_t data = val & E1000_MDIC_DATA_MASK;
428 uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
429
430 if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
431 val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
432 else if (val & E1000_MDIC_OP_READ) {
433 DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
434 if (!(phy_regcap[addr] & PHY_R)) {
435 DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
436 val |= E1000_MDIC_ERROR;
437 } else
438 val = (val ^ data) | s->phy_reg[addr];
439 } else if (val & E1000_MDIC_OP_WRITE) {
440 DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
441 if (!(phy_regcap[addr] & PHY_W)) {
442 DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
443 val |= E1000_MDIC_ERROR;
444 } else {
445 if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
446 phyreg_writeops[addr](s, index, data);
447 }
448 s->phy_reg[addr] = data;
449 }
450 }
451 s->mac_reg[MDIC] = val | E1000_MDIC_READY;
452
453 if (val & E1000_MDIC_INT_EN) {
454 set_ics(s, 0, E1000_ICR_MDAC);
455 }
456 }
457
458 static uint32_t
459 get_eecd(E1000State *s, int index)
460 {
461 uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
462
463 DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
464 s->eecd_state.bitnum_out, s->eecd_state.reading);
465 if (!s->eecd_state.reading ||
466 ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
467 ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
468 ret |= E1000_EECD_DO;
469 return ret;
470 }
471
472 static void
473 set_eecd(E1000State *s, int index, uint32_t val)
474 {
475 uint32_t oldval = s->eecd_state.old_eecd;
476
477 s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
478 E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
479 if (!(E1000_EECD_CS & val)) // CS inactive; nothing to do
480 return;
481 if (E1000_EECD_CS & (val ^ oldval)) { // CS rise edge; reset state
482 s->eecd_state.val_in = 0;
483 s->eecd_state.bitnum_in = 0;
484 s->eecd_state.bitnum_out = 0;
485 s->eecd_state.reading = 0;
486 }
487 if (!(E1000_EECD_SK & (val ^ oldval))) // no clock edge
488 return;
489 if (!(E1000_EECD_SK & val)) { // falling edge
490 s->eecd_state.bitnum_out++;
491 return;
492 }
493 s->eecd_state.val_in <<= 1;
494 if (val & E1000_EECD_DI)
495 s->eecd_state.val_in |= 1;
496 if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
497 s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
498 s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
499 EEPROM_READ_OPCODE_MICROWIRE);
500 }
501 DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
502 s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
503 s->eecd_state.reading);
504 }
505
506 static uint32_t
507 flash_eerd_read(E1000State *s, int x)
508 {
509 unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
510
511 if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
512 return (s->mac_reg[EERD]);
513
514 if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
515 return (E1000_EEPROM_RW_REG_DONE | r);
516
517 return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
518 E1000_EEPROM_RW_REG_DONE | r);
519 }
520
521 static void
522 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
523 {
524 uint32_t sum;
525
526 if (cse && cse < n)
527 n = cse + 1;
528 if (sloc < n-1) {
529 sum = net_checksum_add(n-css, data+css);
530 cpu_to_be16wu((uint16_t *)(data + sloc),
531 net_checksum_finish(sum));
532 }
533 }
534
535 static inline int
536 vlan_enabled(E1000State *s)
537 {
538 return ((s->mac_reg[CTRL] & E1000_CTRL_VME) != 0);
539 }
540
541 static inline int
542 vlan_rx_filter_enabled(E1000State *s)
543 {
544 return ((s->mac_reg[RCTL] & E1000_RCTL_VFE) != 0);
545 }
546
547 static inline int
548 is_vlan_packet(E1000State *s, const uint8_t *buf)
549 {
550 return (be16_to_cpup((uint16_t *)(buf + 12)) ==
551 le16_to_cpup((uint16_t *)(s->mac_reg + VET)));
552 }
553
554 static inline int
555 is_vlan_txd(uint32_t txd_lower)
556 {
557 return ((txd_lower & E1000_TXD_CMD_VLE) != 0);
558 }
559
560 /* FCS aka Ethernet CRC-32. We don't get it from backends and can't
561 * fill it in, just pad descriptor length by 4 bytes unless guest
562 * told us to strip it off the packet. */
563 static inline int
564 fcs_len(E1000State *s)
565 {
566 return (s->mac_reg[RCTL] & E1000_RCTL_SECRC) ? 0 : 4;
567 }
568
569 static void
570 e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
571 {
572 NetClientState *nc = qemu_get_queue(s->nic);
573 if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
574 nc->info->receive(nc, buf, size);
575 } else {
576 qemu_send_packet(nc, buf, size);
577 }
578 }
579
580 static void
581 xmit_seg(E1000State *s)
582 {
583 uint16_t len, *sp;
584 unsigned int frames = s->tx.tso_frames, css, sofar, n;
585 struct e1000_tx *tp = &s->tx;
586
587 if (tp->tse && tp->cptse) {
588 css = tp->ipcss;
589 DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
590 frames, tp->size, css);
591 if (tp->ip) { // IPv4
592 cpu_to_be16wu((uint16_t *)(tp->data+css+2),
593 tp->size - css);
594 cpu_to_be16wu((uint16_t *)(tp->data+css+4),
595 be16_to_cpup((uint16_t *)(tp->data+css+4))+frames);
596 } else // IPv6
597 cpu_to_be16wu((uint16_t *)(tp->data+css+4),
598 tp->size - css);
599 css = tp->tucss;
600 len = tp->size - css;
601 DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", tp->tcp, css, len);
602 if (tp->tcp) {
603 sofar = frames * tp->mss;
604 cpu_to_be32wu((uint32_t *)(tp->data+css+4), // seq
605 be32_to_cpupu((uint32_t *)(tp->data+css+4))+sofar);
606 if (tp->paylen - sofar > tp->mss)
607 tp->data[css + 13] &= ~9; // PSH, FIN
608 } else // UDP
609 cpu_to_be16wu((uint16_t *)(tp->data+css+4), len);
610 if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
611 unsigned int phsum;
612 // add pseudo-header length before checksum calculation
613 sp = (uint16_t *)(tp->data + tp->tucso);
614 phsum = be16_to_cpup(sp) + len;
615 phsum = (phsum >> 16) + (phsum & 0xffff);
616 cpu_to_be16wu(sp, phsum);
617 }
618 tp->tso_frames++;
619 }
620
621 if (tp->sum_needed & E1000_TXD_POPTS_TXSM)
622 putsum(tp->data, tp->size, tp->tucso, tp->tucss, tp->tucse);
623 if (tp->sum_needed & E1000_TXD_POPTS_IXSM)
624 putsum(tp->data, tp->size, tp->ipcso, tp->ipcss, tp->ipcse);
625 if (tp->vlan_needed) {
626 memmove(tp->vlan, tp->data, 4);
627 memmove(tp->data, tp->data + 4, 8);
628 memcpy(tp->data + 8, tp->vlan_header, 4);
629 e1000_send_packet(s, tp->vlan, tp->size + 4);
630 } else
631 e1000_send_packet(s, tp->data, tp->size);
632 s->mac_reg[TPT]++;
633 s->mac_reg[GPTC]++;
634 n = s->mac_reg[TOTL];
635 if ((s->mac_reg[TOTL] += s->tx.size) < n)
636 s->mac_reg[TOTH]++;
637 }
638
639 static void
640 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
641 {
642 PCIDevice *d = PCI_DEVICE(s);
643 uint32_t txd_lower = le32_to_cpu(dp->lower.data);
644 uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
645 unsigned int split_size = txd_lower & 0xffff, bytes, sz, op;
646 unsigned int msh = 0xfffff;
647 uint64_t addr;
648 struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
649 struct e1000_tx *tp = &s->tx;
650
651 s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
652 if (dtype == E1000_TXD_CMD_DEXT) { // context descriptor
653 op = le32_to_cpu(xp->cmd_and_length);
654 tp->ipcss = xp->lower_setup.ip_fields.ipcss;
655 tp->ipcso = xp->lower_setup.ip_fields.ipcso;
656 tp->ipcse = le16_to_cpu(xp->lower_setup.ip_fields.ipcse);
657 tp->tucss = xp->upper_setup.tcp_fields.tucss;
658 tp->tucso = xp->upper_setup.tcp_fields.tucso;
659 tp->tucse = le16_to_cpu(xp->upper_setup.tcp_fields.tucse);
660 tp->paylen = op & 0xfffff;
661 tp->hdr_len = xp->tcp_seg_setup.fields.hdr_len;
662 tp->mss = le16_to_cpu(xp->tcp_seg_setup.fields.mss);
663 tp->ip = (op & E1000_TXD_CMD_IP) ? 1 : 0;
664 tp->tcp = (op & E1000_TXD_CMD_TCP) ? 1 : 0;
665 tp->tse = (op & E1000_TXD_CMD_TSE) ? 1 : 0;
666 tp->tso_frames = 0;
667 if (tp->tucso == 0) { // this is probably wrong
668 DBGOUT(TXSUM, "TCP/UDP: cso 0!\n");
669 tp->tucso = tp->tucss + (tp->tcp ? 16 : 6);
670 }
671 return;
672 } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
673 // data descriptor
674 if (tp->size == 0) {
675 tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
676 }
677 tp->cptse = ( txd_lower & E1000_TXD_CMD_TSE ) ? 1 : 0;
678 } else {
679 // legacy descriptor
680 tp->cptse = 0;
681 }
682
683 if (vlan_enabled(s) && is_vlan_txd(txd_lower) &&
684 (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
685 tp->vlan_needed = 1;
686 cpu_to_be16wu((uint16_t *)(tp->vlan_header),
687 le16_to_cpup((uint16_t *)(s->mac_reg + VET)));
688 cpu_to_be16wu((uint16_t *)(tp->vlan_header + 2),
689 le16_to_cpu(dp->upper.fields.special));
690 }
691
692 addr = le64_to_cpu(dp->buffer_addr);
693 if (tp->tse && tp->cptse) {
694 msh = tp->hdr_len + tp->mss;
695 do {
696 bytes = split_size;
697 if (tp->size + bytes > msh)
698 bytes = msh - tp->size;
699
700 bytes = MIN(sizeof(tp->data) - tp->size, bytes);
701 pci_dma_read(d, addr, tp->data + tp->size, bytes);
702 sz = tp->size + bytes;
703 if (sz >= tp->hdr_len && tp->size < tp->hdr_len) {
704 memmove(tp->header, tp->data, tp->hdr_len);
705 }
706 tp->size = sz;
707 addr += bytes;
708 if (sz == msh) {
709 xmit_seg(s);
710 memmove(tp->data, tp->header, tp->hdr_len);
711 tp->size = tp->hdr_len;
712 }
713 } while (split_size -= bytes);
714 } else if (!tp->tse && tp->cptse) {
715 // context descriptor TSE is not set, while data descriptor TSE is set
716 DBGOUT(TXERR, "TCP segmentation error\n");
717 } else {
718 split_size = MIN(sizeof(tp->data) - tp->size, split_size);
719 pci_dma_read(d, addr, tp->data + tp->size, split_size);
720 tp->size += split_size;
721 }
722
723 if (!(txd_lower & E1000_TXD_CMD_EOP))
724 return;
725 if (!(tp->tse && tp->cptse && tp->size < tp->hdr_len)) {
726 xmit_seg(s);
727 }
728 tp->tso_frames = 0;
729 tp->sum_needed = 0;
730 tp->vlan_needed = 0;
731 tp->size = 0;
732 tp->cptse = 0;
733 }
734
735 static uint32_t
736 txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
737 {
738 PCIDevice *d = PCI_DEVICE(s);
739 uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
740
741 if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
742 return 0;
743 txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
744 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
745 dp->upper.data = cpu_to_le32(txd_upper);
746 pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
747 &dp->upper, sizeof(dp->upper));
748 return E1000_ICR_TXDW;
749 }
750
751 static uint64_t tx_desc_base(E1000State *s)
752 {
753 uint64_t bah = s->mac_reg[TDBAH];
754 uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
755
756 return (bah << 32) + bal;
757 }
758
759 static void
760 start_xmit(E1000State *s)
761 {
762 PCIDevice *d = PCI_DEVICE(s);
763 dma_addr_t base;
764 struct e1000_tx_desc desc;
765 uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
766
767 if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
768 DBGOUT(TX, "tx disabled\n");
769 return;
770 }
771
772 while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
773 base = tx_desc_base(s) +
774 sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
775 pci_dma_read(d, base, &desc, sizeof(desc));
776
777 DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
778 (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
779 desc.upper.data);
780
781 process_tx_desc(s, &desc);
782 cause |= txdesc_writeback(s, base, &desc);
783
784 if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
785 s->mac_reg[TDH] = 0;
786 /*
787 * the following could happen only if guest sw assigns
788 * bogus values to TDT/TDLEN.
789 * there's nothing too intelligent we could do about this.
790 */
791 if (s->mac_reg[TDH] == tdh_start) {
792 DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
793 tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
794 break;
795 }
796 }
797 set_ics(s, 0, cause);
798 }
799
800 static int
801 receive_filter(E1000State *s, const uint8_t *buf, int size)
802 {
803 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
804 static const int mta_shift[] = {4, 3, 2, 0};
805 uint32_t f, rctl = s->mac_reg[RCTL], ra[2], *rp;
806
807 if (is_vlan_packet(s, buf) && vlan_rx_filter_enabled(s)) {
808 uint16_t vid = be16_to_cpup((uint16_t *)(buf + 14));
809 uint32_t vfta = le32_to_cpup((uint32_t *)(s->mac_reg + VFTA) +
810 ((vid >> 5) & 0x7f));
811 if ((vfta & (1 << (vid & 0x1f))) == 0)
812 return 0;
813 }
814
815 if (rctl & E1000_RCTL_UPE) // promiscuous
816 return 1;
817
818 if ((buf[0] & 1) && (rctl & E1000_RCTL_MPE)) // promiscuous mcast
819 return 1;
820
821 if ((rctl & E1000_RCTL_BAM) && !memcmp(buf, bcast, sizeof bcast))
822 return 1;
823
824 for (rp = s->mac_reg + RA; rp < s->mac_reg + RA + 32; rp += 2) {
825 if (!(rp[1] & E1000_RAH_AV))
826 continue;
827 ra[0] = cpu_to_le32(rp[0]);
828 ra[1] = cpu_to_le32(rp[1]);
829 if (!memcmp(buf, (uint8_t *)ra, 6)) {
830 DBGOUT(RXFILTER,
831 "unicast match[%d]: %02x:%02x:%02x:%02x:%02x:%02x\n",
832 (int)(rp - s->mac_reg - RA)/2,
833 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
834 return 1;
835 }
836 }
837 DBGOUT(RXFILTER, "unicast mismatch: %02x:%02x:%02x:%02x:%02x:%02x\n",
838 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
839
840 f = mta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3];
841 f = (((buf[5] << 8) | buf[4]) >> f) & 0xfff;
842 if (s->mac_reg[MTA + (f >> 5)] & (1 << (f & 0x1f)))
843 return 1;
844 DBGOUT(RXFILTER,
845 "dropping, inexact filter mismatch: %02x:%02x:%02x:%02x:%02x:%02x MO %d MTA[%d] %x\n",
846 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5],
847 (rctl >> E1000_RCTL_MO_SHIFT) & 3, f >> 5,
848 s->mac_reg[MTA + (f >> 5)]);
849
850 return 0;
851 }
852
853 static void
854 e1000_set_link_status(NetClientState *nc)
855 {
856 E1000State *s = qemu_get_nic_opaque(nc);
857 uint32_t old_status = s->mac_reg[STATUS];
858
859 if (nc->link_down) {
860 e1000_link_down(s);
861 } else {
862 e1000_link_up(s);
863 }
864
865 if (s->mac_reg[STATUS] != old_status)
866 set_ics(s, 0, E1000_ICR_LSC);
867 }
868
869 static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
870 {
871 int bufs;
872 /* Fast-path short packets */
873 if (total_size <= s->rxbuf_size) {
874 return s->mac_reg[RDH] != s->mac_reg[RDT];
875 }
876 if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
877 bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
878 } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
879 bufs = s->mac_reg[RDLEN] / sizeof(struct e1000_rx_desc) +
880 s->mac_reg[RDT] - s->mac_reg[RDH];
881 } else {
882 return false;
883 }
884 return total_size <= bufs * s->rxbuf_size;
885 }
886
887 static int
888 e1000_can_receive(NetClientState *nc)
889 {
890 E1000State *s = qemu_get_nic_opaque(nc);
891
892 return (s->mac_reg[STATUS] & E1000_STATUS_LU) &&
893 (s->mac_reg[RCTL] & E1000_RCTL_EN) && e1000_has_rxbufs(s, 1);
894 }
895
896 static uint64_t rx_desc_base(E1000State *s)
897 {
898 uint64_t bah = s->mac_reg[RDBAH];
899 uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
900
901 return (bah << 32) + bal;
902 }
903
904 static ssize_t
905 e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
906 {
907 E1000State *s = qemu_get_nic_opaque(nc);
908 PCIDevice *d = PCI_DEVICE(s);
909 struct e1000_rx_desc desc;
910 dma_addr_t base;
911 unsigned int n, rdt;
912 uint32_t rdh_start;
913 uint16_t vlan_special = 0;
914 uint8_t vlan_status = 0;
915 uint8_t min_buf[MIN_BUF_SIZE];
916 struct iovec min_iov;
917 uint8_t *filter_buf = iov->iov_base;
918 size_t size = iov_size(iov, iovcnt);
919 size_t iov_ofs = 0;
920 size_t desc_offset;
921 size_t desc_size;
922 size_t total_size;
923
924 if (!(s->mac_reg[STATUS] & E1000_STATUS_LU)) {
925 return -1;
926 }
927
928 if (!(s->mac_reg[RCTL] & E1000_RCTL_EN)) {
929 return -1;
930 }
931
932 /* Pad to minimum Ethernet frame length */
933 if (size < sizeof(min_buf)) {
934 iov_to_buf(iov, iovcnt, 0, min_buf, size);
935 memset(&min_buf[size], 0, sizeof(min_buf) - size);
936 min_iov.iov_base = filter_buf = min_buf;
937 min_iov.iov_len = size = sizeof(min_buf);
938 iovcnt = 1;
939 iov = &min_iov;
940 } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
941 /* This is very unlikely, but may happen. */
942 iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
943 filter_buf = min_buf;
944 }
945
946 /* Discard oversized packets if !LPE and !SBP. */
947 if ((size > MAXIMUM_ETHERNET_LPE_SIZE ||
948 (size > MAXIMUM_ETHERNET_VLAN_SIZE
949 && !(s->mac_reg[RCTL] & E1000_RCTL_LPE)))
950 && !(s->mac_reg[RCTL] & E1000_RCTL_SBP)) {
951 return size;
952 }
953
954 if (!receive_filter(s, filter_buf, size)) {
955 return size;
956 }
957
958 if (vlan_enabled(s) && is_vlan_packet(s, filter_buf)) {
959 vlan_special = cpu_to_le16(be16_to_cpup((uint16_t *)(filter_buf
960 + 14)));
961 iov_ofs = 4;
962 if (filter_buf == iov->iov_base) {
963 memmove(filter_buf + 4, filter_buf, 12);
964 } else {
965 iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
966 while (iov->iov_len <= iov_ofs) {
967 iov_ofs -= iov->iov_len;
968 iov++;
969 }
970 }
971 vlan_status = E1000_RXD_STAT_VP;
972 size -= 4;
973 }
974
975 rdh_start = s->mac_reg[RDH];
976 desc_offset = 0;
977 total_size = size + fcs_len(s);
978 if (!e1000_has_rxbufs(s, total_size)) {
979 set_ics(s, 0, E1000_ICS_RXO);
980 return -1;
981 }
982 do {
983 desc_size = total_size - desc_offset;
984 if (desc_size > s->rxbuf_size) {
985 desc_size = s->rxbuf_size;
986 }
987 base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
988 pci_dma_read(d, base, &desc, sizeof(desc));
989 desc.special = vlan_special;
990 desc.status |= (vlan_status | E1000_RXD_STAT_DD);
991 if (desc.buffer_addr) {
992 if (desc_offset < size) {
993 size_t iov_copy;
994 hwaddr ba = le64_to_cpu(desc.buffer_addr);
995 size_t copy_size = size - desc_offset;
996 if (copy_size > s->rxbuf_size) {
997 copy_size = s->rxbuf_size;
998 }
999 do {
1000 iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
1001 pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
1002 copy_size -= iov_copy;
1003 ba += iov_copy;
1004 iov_ofs += iov_copy;
1005 if (iov_ofs == iov->iov_len) {
1006 iov++;
1007 iov_ofs = 0;
1008 }
1009 } while (copy_size);
1010 }
1011 desc_offset += desc_size;
1012 desc.length = cpu_to_le16(desc_size);
1013 if (desc_offset >= total_size) {
1014 desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
1015 } else {
1016 /* Guest zeroing out status is not a hardware requirement.
1017 Clear EOP in case guest didn't do it. */
1018 desc.status &= ~E1000_RXD_STAT_EOP;
1019 }
1020 } else { // as per intel docs; skip descriptors with null buf addr
1021 DBGOUT(RX, "Null RX descriptor!!\n");
1022 }
1023 pci_dma_write(d, base, &desc, sizeof(desc));
1024
1025 if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
1026 s->mac_reg[RDH] = 0;
1027 /* see comment in start_xmit; same here */
1028 if (s->mac_reg[RDH] == rdh_start) {
1029 DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
1030 rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
1031 set_ics(s, 0, E1000_ICS_RXO);
1032 return -1;
1033 }
1034 } while (desc_offset < total_size);
1035
1036 s->mac_reg[GPRC]++;
1037 s->mac_reg[TPR]++;
1038 /* TOR - Total Octets Received:
1039 * This register includes bytes received in a packet from the <Destination
1040 * Address> field through the <CRC> field, inclusively.
1041 */
1042 n = s->mac_reg[TORL] + size + /* Always include FCS length. */ 4;
1043 if (n < s->mac_reg[TORL])
1044 s->mac_reg[TORH]++;
1045 s->mac_reg[TORL] = n;
1046
1047 n = E1000_ICS_RXT0;
1048 if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
1049 rdt += s->mac_reg[RDLEN] / sizeof(desc);
1050 if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
1051 s->rxbuf_min_shift)
1052 n |= E1000_ICS_RXDMT0;
1053
1054 set_ics(s, 0, n);
1055
1056 return size;
1057 }
1058
1059 static ssize_t
1060 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1061 {
1062 const struct iovec iov = {
1063 .iov_base = (uint8_t *)buf,
1064 .iov_len = size
1065 };
1066
1067 return e1000_receive_iov(nc, &iov, 1);
1068 }
1069
1070 static uint32_t
1071 mac_readreg(E1000State *s, int index)
1072 {
1073 return s->mac_reg[index];
1074 }
1075
1076 static uint32_t
1077 mac_icr_read(E1000State *s, int index)
1078 {
1079 uint32_t ret = s->mac_reg[ICR];
1080
1081 DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1082 set_interrupt_cause(s, 0, 0);
1083 return ret;
1084 }
1085
1086 static uint32_t
1087 mac_read_clr4(E1000State *s, int index)
1088 {
1089 uint32_t ret = s->mac_reg[index];
1090
1091 s->mac_reg[index] = 0;
1092 return ret;
1093 }
1094
1095 static uint32_t
1096 mac_read_clr8(E1000State *s, int index)
1097 {
1098 uint32_t ret = s->mac_reg[index];
1099
1100 s->mac_reg[index] = 0;
1101 s->mac_reg[index-1] = 0;
1102 return ret;
1103 }
1104
1105 static void
1106 mac_writereg(E1000State *s, int index, uint32_t val)
1107 {
1108 s->mac_reg[index] = val;
1109 }
1110
1111 static void
1112 set_rdt(E1000State *s, int index, uint32_t val)
1113 {
1114 s->mac_reg[index] = val & 0xffff;
1115 if (e1000_has_rxbufs(s, 1)) {
1116 qemu_flush_queued_packets(qemu_get_queue(s->nic));
1117 }
1118 }
1119
1120 static void
1121 set_16bit(E1000State *s, int index, uint32_t val)
1122 {
1123 s->mac_reg[index] = val & 0xffff;
1124 }
1125
1126 static void
1127 set_dlen(E1000State *s, int index, uint32_t val)
1128 {
1129 s->mac_reg[index] = val & 0xfff80;
1130 }
1131
1132 static void
1133 set_tctl(E1000State *s, int index, uint32_t val)
1134 {
1135 s->mac_reg[index] = val;
1136 s->mac_reg[TDT] &= 0xffff;
1137 start_xmit(s);
1138 }
1139
1140 static void
1141 set_icr(E1000State *s, int index, uint32_t val)
1142 {
1143 DBGOUT(INTERRUPT, "set_icr %x\n", val);
1144 set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1145 }
1146
1147 static void
1148 set_imc(E1000State *s, int index, uint32_t val)
1149 {
1150 s->mac_reg[IMS] &= ~val;
1151 set_ics(s, 0, 0);
1152 }
1153
1154 static void
1155 set_ims(E1000State *s, int index, uint32_t val)
1156 {
1157 s->mac_reg[IMS] |= val;
1158 set_ics(s, 0, 0);
1159 }
1160
1161 #define getreg(x) [x] = mac_readreg
1162 static uint32_t (*macreg_readops[])(E1000State *, int) = {
1163 getreg(PBA), getreg(RCTL), getreg(TDH), getreg(TXDCTL),
1164 getreg(WUFC), getreg(TDT), getreg(CTRL), getreg(LEDCTL),
1165 getreg(MANC), getreg(MDIC), getreg(SWSM), getreg(STATUS),
1166 getreg(TORL), getreg(TOTL), getreg(IMS), getreg(TCTL),
1167 getreg(RDH), getreg(RDT), getreg(VET), getreg(ICS),
1168 getreg(TDBAL), getreg(TDBAH), getreg(RDBAH), getreg(RDBAL),
1169 getreg(TDLEN), getreg(RDLEN), getreg(RDTR), getreg(RADV),
1170 getreg(TADV), getreg(ITR),
1171
1172 [TOTH] = mac_read_clr8, [TORH] = mac_read_clr8, [GPRC] = mac_read_clr4,
1173 [GPTC] = mac_read_clr4, [TPR] = mac_read_clr4, [TPT] = mac_read_clr4,
1174 [ICR] = mac_icr_read, [EECD] = get_eecd, [EERD] = flash_eerd_read,
1175 [CRCERRS ... MPC] = &mac_readreg,
1176 [RA ... RA+31] = &mac_readreg,
1177 [MTA ... MTA+127] = &mac_readreg,
1178 [VFTA ... VFTA+127] = &mac_readreg,
1179 };
1180 enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1181
1182 #define putreg(x) [x] = mac_writereg
1183 static void (*macreg_writeops[])(E1000State *, int, uint32_t) = {
1184 putreg(PBA), putreg(EERD), putreg(SWSM), putreg(WUFC),
1185 putreg(TDBAL), putreg(TDBAH), putreg(TXDCTL), putreg(RDBAH),
1186 putreg(RDBAL), putreg(LEDCTL), putreg(VET),
1187 [TDLEN] = set_dlen, [RDLEN] = set_dlen, [TCTL] = set_tctl,
1188 [TDT] = set_tctl, [MDIC] = set_mdic, [ICS] = set_ics,
1189 [TDH] = set_16bit, [RDH] = set_16bit, [RDT] = set_rdt,
1190 [IMC] = set_imc, [IMS] = set_ims, [ICR] = set_icr,
1191 [EECD] = set_eecd, [RCTL] = set_rx_control, [CTRL] = set_ctrl,
1192 [RDTR] = set_16bit, [RADV] = set_16bit, [TADV] = set_16bit,
1193 [ITR] = set_16bit,
1194 [RA ... RA+31] = &mac_writereg,
1195 [MTA ... MTA+127] = &mac_writereg,
1196 [VFTA ... VFTA+127] = &mac_writereg,
1197 };
1198
1199 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1200
1201 static void
1202 e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1203 unsigned size)
1204 {
1205 E1000State *s = opaque;
1206 unsigned int index = (addr & 0x1ffff) >> 2;
1207
1208 if (index < NWRITEOPS && macreg_writeops[index]) {
1209 macreg_writeops[index](s, index, val);
1210 } else if (index < NREADOPS && macreg_readops[index]) {
1211 DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n", index<<2, val);
1212 } else {
1213 DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1214 index<<2, val);
1215 }
1216 }
1217
1218 static uint64_t
1219 e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1220 {
1221 E1000State *s = opaque;
1222 unsigned int index = (addr & 0x1ffff) >> 2;
1223
1224 if (index < NREADOPS && macreg_readops[index])
1225 {
1226 return macreg_readops[index](s, index);
1227 }
1228 DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1229 return 0;
1230 }
1231
1232 static const MemoryRegionOps e1000_mmio_ops = {
1233 .read = e1000_mmio_read,
1234 .write = e1000_mmio_write,
1235 .endianness = DEVICE_LITTLE_ENDIAN,
1236 .impl = {
1237 .min_access_size = 4,
1238 .max_access_size = 4,
1239 },
1240 };
1241
1242 static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1243 unsigned size)
1244 {
1245 E1000State *s = opaque;
1246
1247 (void)s;
1248 return 0;
1249 }
1250
1251 static void e1000_io_write(void *opaque, hwaddr addr,
1252 uint64_t val, unsigned size)
1253 {
1254 E1000State *s = opaque;
1255
1256 (void)s;
1257 }
1258
1259 static const MemoryRegionOps e1000_io_ops = {
1260 .read = e1000_io_read,
1261 .write = e1000_io_write,
1262 .endianness = DEVICE_LITTLE_ENDIAN,
1263 };
1264
1265 static bool is_version_1(void *opaque, int version_id)
1266 {
1267 return version_id == 1;
1268 }
1269
1270 static void e1000_pre_save(void *opaque)
1271 {
1272 E1000State *s = opaque;
1273 NetClientState *nc = qemu_get_queue(s->nic);
1274
1275 /* If the mitigation timer is active, emulate a timeout now. */
1276 if (s->mit_timer_on) {
1277 e1000_mit_timer(s);
1278 }
1279
1280 if (!(s->compat_flags & E1000_FLAG_AUTONEG)) {
1281 return;
1282 }
1283
1284 /*
1285 * If link is down and auto-negotiation is ongoing, complete
1286 * auto-negotiation immediately. This allows is to look at
1287 * MII_SR_AUTONEG_COMPLETE to infer link status on load.
1288 */
1289 if (nc->link_down &&
1290 s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN &&
1291 s->phy_reg[PHY_CTRL] & MII_CR_RESTART_AUTO_NEG) {
1292 s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
1293 }
1294 }
1295
1296 static int e1000_post_load(void *opaque, int version_id)
1297 {
1298 E1000State *s = opaque;
1299 NetClientState *nc = qemu_get_queue(s->nic);
1300
1301 if (!(s->compat_flags & E1000_FLAG_MIT)) {
1302 s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1303 s->mac_reg[TADV] = 0;
1304 s->mit_irq_level = false;
1305 }
1306 s->mit_ide = 0;
1307 s->mit_timer_on = false;
1308
1309 /* nc.link_down can't be migrated, so infer link_down according
1310 * to link status bit in mac_reg[STATUS].
1311 * Alternatively, restart link negotiation if it was in progress. */
1312 nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1313
1314 if (!(s->compat_flags & E1000_FLAG_AUTONEG)) {
1315 return 0;
1316 }
1317
1318 if (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN &&
1319 s->phy_reg[PHY_CTRL] & MII_CR_RESTART_AUTO_NEG &&
1320 !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1321 nc->link_down = false;
1322 timer_mod(s->autoneg_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1323 }
1324
1325 return 0;
1326 }
1327
1328 static bool e1000_mit_state_needed(void *opaque)
1329 {
1330 E1000State *s = opaque;
1331
1332 return s->compat_flags & E1000_FLAG_MIT;
1333 }
1334
1335 static const VMStateDescription vmstate_e1000_mit_state = {
1336 .name = "e1000/mit_state",
1337 .version_id = 1,
1338 .minimum_version_id = 1,
1339 .minimum_version_id_old = 1,
1340 .fields = (VMStateField[]) {
1341 VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1342 VMSTATE_UINT32(mac_reg[RADV], E1000State),
1343 VMSTATE_UINT32(mac_reg[TADV], E1000State),
1344 VMSTATE_UINT32(mac_reg[ITR], E1000State),
1345 VMSTATE_BOOL(mit_irq_level, E1000State),
1346 VMSTATE_END_OF_LIST()
1347 }
1348 };
1349
1350 static const VMStateDescription vmstate_e1000 = {
1351 .name = "e1000",
1352 .version_id = 2,
1353 .minimum_version_id = 1,
1354 .minimum_version_id_old = 1,
1355 .pre_save = e1000_pre_save,
1356 .post_load = e1000_post_load,
1357 .fields = (VMStateField []) {
1358 VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1359 VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1360 VMSTATE_UNUSED(4), /* Was mmio_base. */
1361 VMSTATE_UINT32(rxbuf_size, E1000State),
1362 VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1363 VMSTATE_UINT32(eecd_state.val_in, E1000State),
1364 VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1365 VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1366 VMSTATE_UINT16(eecd_state.reading, E1000State),
1367 VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1368 VMSTATE_UINT8(tx.ipcss, E1000State),
1369 VMSTATE_UINT8(tx.ipcso, E1000State),
1370 VMSTATE_UINT16(tx.ipcse, E1000State),
1371 VMSTATE_UINT8(tx.tucss, E1000State),
1372 VMSTATE_UINT8(tx.tucso, E1000State),
1373 VMSTATE_UINT16(tx.tucse, E1000State),
1374 VMSTATE_UINT32(tx.paylen, E1000State),
1375 VMSTATE_UINT8(tx.hdr_len, E1000State),
1376 VMSTATE_UINT16(tx.mss, E1000State),
1377 VMSTATE_UINT16(tx.size, E1000State),
1378 VMSTATE_UINT16(tx.tso_frames, E1000State),
1379 VMSTATE_UINT8(tx.sum_needed, E1000State),
1380 VMSTATE_INT8(tx.ip, E1000State),
1381 VMSTATE_INT8(tx.tcp, E1000State),
1382 VMSTATE_BUFFER(tx.header, E1000State),
1383 VMSTATE_BUFFER(tx.data, E1000State),
1384 VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1385 VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1386 VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1387 VMSTATE_UINT32(mac_reg[EECD], E1000State),
1388 VMSTATE_UINT32(mac_reg[EERD], E1000State),
1389 VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1390 VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1391 VMSTATE_UINT32(mac_reg[ICR], E1000State),
1392 VMSTATE_UINT32(mac_reg[ICS], E1000State),
1393 VMSTATE_UINT32(mac_reg[IMC], E1000State),
1394 VMSTATE_UINT32(mac_reg[IMS], E1000State),
1395 VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1396 VMSTATE_UINT32(mac_reg[MANC], E1000State),
1397 VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1398 VMSTATE_UINT32(mac_reg[MPC], E1000State),
1399 VMSTATE_UINT32(mac_reg[PBA], E1000State),
1400 VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1401 VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1402 VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1403 VMSTATE_UINT32(mac_reg[RDH], E1000State),
1404 VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1405 VMSTATE_UINT32(mac_reg[RDT], E1000State),
1406 VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1407 VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1408 VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1409 VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1410 VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1411 VMSTATE_UINT32(mac_reg[TDH], E1000State),
1412 VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1413 VMSTATE_UINT32(mac_reg[TDT], E1000State),
1414 VMSTATE_UINT32(mac_reg[TORH], E1000State),
1415 VMSTATE_UINT32(mac_reg[TORL], E1000State),
1416 VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1417 VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1418 VMSTATE_UINT32(mac_reg[TPR], E1000State),
1419 VMSTATE_UINT32(mac_reg[TPT], E1000State),
1420 VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1421 VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1422 VMSTATE_UINT32(mac_reg[VET], E1000State),
1423 VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1424 VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1425 VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1426 VMSTATE_END_OF_LIST()
1427 },
1428 .subsections = (VMStateSubsection[]) {
1429 {
1430 .vmsd = &vmstate_e1000_mit_state,
1431 .needed = e1000_mit_state_needed,
1432 }, {
1433 /* empty */
1434 }
1435 }
1436 };
1437
1438 static const uint16_t e1000_eeprom_template[64] = {
1439 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0x0000, 0x0000, 0x0000,
1440 0x3000, 0x1000, 0x6403, E1000_DEVID, 0x8086, E1000_DEVID, 0x8086, 0x3040,
1441 0x0008, 0x2000, 0x7e14, 0x0048, 0x1000, 0x00d8, 0x0000, 0x2700,
1442 0x6cc9, 0x3150, 0x0722, 0x040b, 0x0984, 0x0000, 0xc000, 0x0706,
1443 0x1008, 0x0000, 0x0f04, 0x7fff, 0x4d01, 0xffff, 0xffff, 0xffff,
1444 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
1445 0x0100, 0x4000, 0x121c, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
1446 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000,
1447 };
1448
1449 /* PCI interface */
1450
1451 static void
1452 e1000_mmio_setup(E1000State *d)
1453 {
1454 int i;
1455 const uint32_t excluded_regs[] = {
1456 E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1457 E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1458 };
1459
1460 memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1461 "e1000-mmio", PNPMMIO_SIZE);
1462 memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1463 for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1464 memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1465 excluded_regs[i+1] - excluded_regs[i] - 4);
1466 memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1467 }
1468
1469 static void
1470 e1000_cleanup(NetClientState *nc)
1471 {
1472 E1000State *s = qemu_get_nic_opaque(nc);
1473
1474 s->nic = NULL;
1475 }
1476
1477 static void
1478 pci_e1000_uninit(PCIDevice *dev)
1479 {
1480 E1000State *d = E1000(dev);
1481
1482 timer_del(d->autoneg_timer);
1483 timer_free(d->autoneg_timer);
1484 timer_del(d->mit_timer);
1485 timer_free(d->mit_timer);
1486 memory_region_destroy(&d->mmio);
1487 memory_region_destroy(&d->io);
1488 qemu_del_nic(d->nic);
1489 }
1490
1491 static NetClientInfo net_e1000_info = {
1492 .type = NET_CLIENT_OPTIONS_KIND_NIC,
1493 .size = sizeof(NICState),
1494 .can_receive = e1000_can_receive,
1495 .receive = e1000_receive,
1496 .receive_iov = e1000_receive_iov,
1497 .cleanup = e1000_cleanup,
1498 .link_status_changed = e1000_set_link_status,
1499 };
1500
1501 static int pci_e1000_init(PCIDevice *pci_dev)
1502 {
1503 DeviceState *dev = DEVICE(pci_dev);
1504 E1000State *d = E1000(pci_dev);
1505 uint8_t *pci_conf;
1506 uint16_t checksum = 0;
1507 int i;
1508 uint8_t *macaddr;
1509
1510 pci_conf = pci_dev->config;
1511
1512 /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1513 pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1514
1515 pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1516
1517 e1000_mmio_setup(d);
1518
1519 pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1520
1521 pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1522
1523 memmove(d->eeprom_data, e1000_eeprom_template,
1524 sizeof e1000_eeprom_template);
1525 qemu_macaddr_default_if_unset(&d->conf.macaddr);
1526 macaddr = d->conf.macaddr.a;
1527 for (i = 0; i < 3; i++)
1528 d->eeprom_data[i] = (macaddr[2*i+1]<<8) | macaddr[2*i];
1529 for (i = 0; i < EEPROM_CHECKSUM_REG; i++)
1530 checksum += d->eeprom_data[i];
1531 checksum = (uint16_t) EEPROM_SUM - checksum;
1532 d->eeprom_data[EEPROM_CHECKSUM_REG] = checksum;
1533
1534 d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1535 object_get_typename(OBJECT(d)), dev->id, d);
1536
1537 qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1538
1539 add_boot_device_path(d->conf.bootindex, dev, "/ethernet-phy@0");
1540
1541 d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1542 d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1543
1544 return 0;
1545 }
1546
1547 static void qdev_e1000_reset(DeviceState *dev)
1548 {
1549 E1000State *d = E1000(dev);
1550 e1000_reset(d);
1551 }
1552
1553 static Property e1000_properties[] = {
1554 DEFINE_NIC_PROPERTIES(E1000State, conf),
1555 DEFINE_PROP_BIT("autonegotiation", E1000State,
1556 compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1557 DEFINE_PROP_BIT("mitigation", E1000State,
1558 compat_flags, E1000_FLAG_MIT_BIT, true),
1559 DEFINE_PROP_END_OF_LIST(),
1560 };
1561
1562 static void e1000_class_init(ObjectClass *klass, void *data)
1563 {
1564 DeviceClass *dc = DEVICE_CLASS(klass);
1565 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1566
1567 k->init = pci_e1000_init;
1568 k->exit = pci_e1000_uninit;
1569 k->romfile = "efi-e1000.rom";
1570 k->vendor_id = PCI_VENDOR_ID_INTEL;
1571 k->device_id = E1000_DEVID;
1572 k->revision = 0x03;
1573 k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1574 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1575 dc->desc = "Intel Gigabit Ethernet";
1576 dc->reset = qdev_e1000_reset;
1577 dc->vmsd = &vmstate_e1000;
1578 dc->props = e1000_properties;
1579 }
1580
1581 static const TypeInfo e1000_info = {
1582 .name = TYPE_E1000,
1583 .parent = TYPE_PCI_DEVICE,
1584 .instance_size = sizeof(E1000State),
1585 .class_init = e1000_class_init,
1586 };
1587
1588 static void e1000_register_types(void)
1589 {
1590 type_register_static(&e1000_info);
1591 }
1592
1593 type_init(e1000_register_types)