]> git.proxmox.com Git - qemu.git/blob - hw/net/e1000.c
ec8ecd7b94934532c8d77138cfcd41cf825f7618
[qemu.git] / hw / net / e1000.c
1 /*
2 * QEMU e1000 emulation
3 *
4 * Software developer's manual:
5 * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
6 *
7 * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
8 * Copyright (c) 2008 Qumranet
9 * Based on work done by:
10 * Copyright (c) 2007 Dan Aloni
11 * Copyright (c) 2004 Antony T Curtis
12 *
13 * This library is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU Lesser General Public
15 * License as published by the Free Software Foundation; either
16 * version 2 of the License, or (at your option) any later version.
17 *
18 * This library is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 * Lesser General Public License for more details.
22 *
23 * You should have received a copy of the GNU Lesser General Public
24 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
25 */
26
27
28 #include "hw/hw.h"
29 #include "hw/pci/pci.h"
30 #include "net/net.h"
31 #include "net/checksum.h"
32 #include "hw/loader.h"
33 #include "sysemu/sysemu.h"
34 #include "sysemu/dma.h"
35 #include "qemu/iov.h"
36
37 #include "e1000_regs.h"
38
39 #define E1000_DEBUG
40
41 #ifdef E1000_DEBUG
42 enum {
43 DEBUG_GENERAL, DEBUG_IO, DEBUG_MMIO, DEBUG_INTERRUPT,
44 DEBUG_RX, DEBUG_TX, DEBUG_MDIC, DEBUG_EEPROM,
45 DEBUG_UNKNOWN, DEBUG_TXSUM, DEBUG_TXERR, DEBUG_RXERR,
46 DEBUG_RXFILTER, DEBUG_PHY, DEBUG_NOTYET,
47 };
48 #define DBGBIT(x) (1<<DEBUG_##x)
49 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
50
51 #define DBGOUT(what, fmt, ...) do { \
52 if (debugflags & DBGBIT(what)) \
53 fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
54 } while (0)
55 #else
56 #define DBGOUT(what, fmt, ...) do {} while (0)
57 #endif
58
59 #define IOPORT_SIZE 0x40
60 #define PNPMMIO_SIZE 0x20000
61 #define MIN_BUF_SIZE 60 /* Min. octets in an ethernet frame sans FCS */
62
63 /* this is the size past which hardware will drop packets when setting LPE=0 */
64 #define MAXIMUM_ETHERNET_VLAN_SIZE 1522
65 /* this is the size past which hardware will drop packets when setting LPE=1 */
66 #define MAXIMUM_ETHERNET_LPE_SIZE 16384
67
68 #define MAXIMUM_ETHERNET_HDR_LEN (14+4)
69
70 /*
71 * HW models:
72 * E1000_DEV_ID_82540EM works with Windows and Linux
73 * E1000_DEV_ID_82573L OK with windoze and Linux 2.6.22,
74 * appears to perform better than 82540EM, but breaks with Linux 2.6.18
75 * E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
76 * Others never tested
77 */
78 enum { E1000_DEVID = E1000_DEV_ID_82540EM };
79
80 /*
81 * May need to specify additional MAC-to-PHY entries --
82 * Intel's Windows driver refuses to initialize unless they match
83 */
84 enum {
85 PHY_ID2_INIT = E1000_DEVID == E1000_DEV_ID_82573L ? 0xcc2 :
86 E1000_DEVID == E1000_DEV_ID_82544GC_COPPER ? 0xc30 :
87 /* default to E1000_DEV_ID_82540EM */ 0xc20
88 };
89
90 typedef struct E1000State_st {
91 /*< private >*/
92 PCIDevice parent_obj;
93 /*< public >*/
94
95 NICState *nic;
96 NICConf conf;
97 MemoryRegion mmio;
98 MemoryRegion io;
99
100 uint32_t mac_reg[0x8000];
101 uint16_t phy_reg[0x20];
102 uint16_t eeprom_data[64];
103
104 uint32_t rxbuf_size;
105 uint32_t rxbuf_min_shift;
106 struct e1000_tx {
107 unsigned char header[256];
108 unsigned char vlan_header[4];
109 /* Fields vlan and data must not be reordered or separated. */
110 unsigned char vlan[4];
111 unsigned char data[0x10000];
112 uint16_t size;
113 unsigned char sum_needed;
114 unsigned char vlan_needed;
115 uint8_t ipcss;
116 uint8_t ipcso;
117 uint16_t ipcse;
118 uint8_t tucss;
119 uint8_t tucso;
120 uint16_t tucse;
121 uint8_t hdr_len;
122 uint16_t mss;
123 uint32_t paylen;
124 uint16_t tso_frames;
125 char tse;
126 int8_t ip;
127 int8_t tcp;
128 char cptse; // current packet tse bit
129 } tx;
130
131 struct {
132 uint32_t val_in; // shifted in from guest driver
133 uint16_t bitnum_in;
134 uint16_t bitnum_out;
135 uint16_t reading;
136 uint32_t old_eecd;
137 } eecd_state;
138
139 QEMUTimer *autoneg_timer;
140
141 QEMUTimer *mit_timer; /* Mitigation timer. */
142 bool mit_timer_on; /* Mitigation timer is running. */
143 bool mit_irq_level; /* Tracks interrupt pin level. */
144 uint32_t mit_ide; /* Tracks E1000_TXD_CMD_IDE bit. */
145
146 /* Compatibility flags for migration to/from qemu 1.3.0 and older */
147 #define E1000_FLAG_AUTONEG_BIT 0
148 #define E1000_FLAG_MIT_BIT 1
149 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
150 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
151 uint32_t compat_flags;
152 } E1000State;
153
154 #define TYPE_E1000 "e1000"
155
156 #define E1000(obj) \
157 OBJECT_CHECK(E1000State, (obj), TYPE_E1000)
158
159 #define defreg(x) x = (E1000_##x>>2)
160 enum {
161 defreg(CTRL), defreg(EECD), defreg(EERD), defreg(GPRC),
162 defreg(GPTC), defreg(ICR), defreg(ICS), defreg(IMC),
163 defreg(IMS), defreg(LEDCTL), defreg(MANC), defreg(MDIC),
164 defreg(MPC), defreg(PBA), defreg(RCTL), defreg(RDBAH),
165 defreg(RDBAL), defreg(RDH), defreg(RDLEN), defreg(RDT),
166 defreg(STATUS), defreg(SWSM), defreg(TCTL), defreg(TDBAH),
167 defreg(TDBAL), defreg(TDH), defreg(TDLEN), defreg(TDT),
168 defreg(TORH), defreg(TORL), defreg(TOTH), defreg(TOTL),
169 defreg(TPR), defreg(TPT), defreg(TXDCTL), defreg(WUFC),
170 defreg(RA), defreg(MTA), defreg(CRCERRS),defreg(VFTA),
171 defreg(VET), defreg(RDTR), defreg(RADV), defreg(TADV),
172 defreg(ITR),
173 };
174
175 static void
176 e1000_link_down(E1000State *s)
177 {
178 s->mac_reg[STATUS] &= ~E1000_STATUS_LU;
179 s->phy_reg[PHY_STATUS] &= ~MII_SR_LINK_STATUS;
180 }
181
182 static void
183 e1000_link_up(E1000State *s)
184 {
185 s->mac_reg[STATUS] |= E1000_STATUS_LU;
186 s->phy_reg[PHY_STATUS] |= MII_SR_LINK_STATUS;
187 }
188
189 static void
190 set_phy_ctrl(E1000State *s, int index, uint16_t val)
191 {
192 /*
193 * QEMU 1.3 does not support link auto-negotiation emulation, so if we
194 * migrate during auto negotiation, after migration the link will be
195 * down.
196 */
197 if (!(s->compat_flags & E1000_FLAG_AUTONEG)) {
198 return;
199 }
200 if ((val & MII_CR_AUTO_NEG_EN) && (val & MII_CR_RESTART_AUTO_NEG)) {
201 e1000_link_down(s);
202 s->phy_reg[PHY_STATUS] &= ~MII_SR_AUTONEG_COMPLETE;
203 DBGOUT(PHY, "Start link auto negotiation\n");
204 timer_mod(s->autoneg_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
205 }
206 }
207
208 static void
209 e1000_autoneg_timer(void *opaque)
210 {
211 E1000State *s = opaque;
212 if (!qemu_get_queue(s->nic)->link_down) {
213 e1000_link_up(s);
214 }
215 s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
216 DBGOUT(PHY, "Auto negotiation is completed\n");
217 }
218
219 static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
220 [PHY_CTRL] = set_phy_ctrl,
221 };
222
223 enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
224
225 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
226 static const char phy_regcap[0x20] = {
227 [PHY_STATUS] = PHY_R, [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
228 [PHY_ID1] = PHY_R, [M88E1000_PHY_SPEC_CTRL] = PHY_RW,
229 [PHY_CTRL] = PHY_RW, [PHY_1000T_CTRL] = PHY_RW,
230 [PHY_LP_ABILITY] = PHY_R, [PHY_1000T_STATUS] = PHY_R,
231 [PHY_AUTONEG_ADV] = PHY_RW, [M88E1000_RX_ERR_CNTR] = PHY_R,
232 [PHY_ID2] = PHY_R, [M88E1000_PHY_SPEC_STATUS] = PHY_R
233 };
234
235 static const uint16_t phy_reg_init[] = {
236 [PHY_CTRL] = 0x1140,
237 [PHY_STATUS] = 0x794d, /* link initially up with not completed autoneg */
238 [PHY_ID1] = 0x141, [PHY_ID2] = PHY_ID2_INIT,
239 [PHY_1000T_CTRL] = 0x0e00, [M88E1000_PHY_SPEC_CTRL] = 0x360,
240 [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60, [PHY_AUTONEG_ADV] = 0xde1,
241 [PHY_LP_ABILITY] = 0x1e0, [PHY_1000T_STATUS] = 0x3c00,
242 [M88E1000_PHY_SPEC_STATUS] = 0xac00,
243 };
244
245 static const uint32_t mac_reg_init[] = {
246 [PBA] = 0x00100030,
247 [LEDCTL] = 0x602,
248 [CTRL] = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
249 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
250 [STATUS] = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
251 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
252 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
253 E1000_STATUS_LU,
254 [MANC] = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
255 E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
256 E1000_MANC_RMCP_EN,
257 };
258
259 /* Helper function, *curr == 0 means the value is not set */
260 static inline void
261 mit_update_delay(uint32_t *curr, uint32_t value)
262 {
263 if (value && (*curr == 0 || value < *curr)) {
264 *curr = value;
265 }
266 }
267
268 static void
269 set_interrupt_cause(E1000State *s, int index, uint32_t val)
270 {
271 PCIDevice *d = PCI_DEVICE(s);
272 uint32_t pending_ints;
273 uint32_t mit_delay;
274
275 if (val && (E1000_DEVID >= E1000_DEV_ID_82547EI_MOBILE)) {
276 /* Only for 8257x */
277 val |= E1000_ICR_INT_ASSERTED;
278 }
279 s->mac_reg[ICR] = val;
280
281 /*
282 * Make sure ICR and ICS registers have the same value.
283 * The spec says that the ICS register is write-only. However in practice,
284 * on real hardware ICS is readable, and for reads it has the same value as
285 * ICR (except that ICS does not have the clear on read behaviour of ICR).
286 *
287 * The VxWorks PRO/1000 driver uses this behaviour.
288 */
289 s->mac_reg[ICS] = val;
290
291 pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
292 if (!s->mit_irq_level && pending_ints) {
293 /*
294 * Here we detect a potential raising edge. We postpone raising the
295 * interrupt line if we are inside the mitigation delay window
296 * (s->mit_timer_on == 1).
297 * We provide a partial implementation of interrupt mitigation,
298 * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
299 * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
300 * RADV; relative timers based on TIDV and RDTR are not implemented.
301 */
302 if (s->mit_timer_on) {
303 return;
304 }
305 if (s->compat_flags & E1000_FLAG_MIT) {
306 /* Compute the next mitigation delay according to pending
307 * interrupts and the current values of RADV (provided
308 * RDTR!=0), TADV and ITR.
309 * Then rearm the timer.
310 */
311 mit_delay = 0;
312 if (s->mit_ide &&
313 (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
314 mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
315 }
316 if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
317 mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
318 }
319 mit_update_delay(&mit_delay, s->mac_reg[ITR]);
320
321 if (mit_delay) {
322 s->mit_timer_on = 1;
323 timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
324 mit_delay * 256);
325 }
326 s->mit_ide = 0;
327 }
328 }
329
330 s->mit_irq_level = (pending_ints != 0);
331 pci_set_irq(d, s->mit_irq_level);
332 }
333
334 static void
335 e1000_mit_timer(void *opaque)
336 {
337 E1000State *s = opaque;
338
339 s->mit_timer_on = 0;
340 /* Call set_interrupt_cause to update the irq level (if necessary). */
341 set_interrupt_cause(s, 0, s->mac_reg[ICR]);
342 }
343
344 static void
345 set_ics(E1000State *s, int index, uint32_t val)
346 {
347 DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
348 s->mac_reg[IMS]);
349 set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
350 }
351
352 static int
353 rxbufsize(uint32_t v)
354 {
355 v &= E1000_RCTL_BSEX | E1000_RCTL_SZ_16384 | E1000_RCTL_SZ_8192 |
356 E1000_RCTL_SZ_4096 | E1000_RCTL_SZ_2048 | E1000_RCTL_SZ_1024 |
357 E1000_RCTL_SZ_512 | E1000_RCTL_SZ_256;
358 switch (v) {
359 case E1000_RCTL_BSEX | E1000_RCTL_SZ_16384:
360 return 16384;
361 case E1000_RCTL_BSEX | E1000_RCTL_SZ_8192:
362 return 8192;
363 case E1000_RCTL_BSEX | E1000_RCTL_SZ_4096:
364 return 4096;
365 case E1000_RCTL_SZ_1024:
366 return 1024;
367 case E1000_RCTL_SZ_512:
368 return 512;
369 case E1000_RCTL_SZ_256:
370 return 256;
371 }
372 return 2048;
373 }
374
375 static void e1000_reset(void *opaque)
376 {
377 E1000State *d = opaque;
378 uint8_t *macaddr = d->conf.macaddr.a;
379 int i;
380
381 timer_del(d->autoneg_timer);
382 timer_del(d->mit_timer);
383 d->mit_timer_on = 0;
384 d->mit_irq_level = 0;
385 d->mit_ide = 0;
386 memset(d->phy_reg, 0, sizeof d->phy_reg);
387 memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
388 memset(d->mac_reg, 0, sizeof d->mac_reg);
389 memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
390 d->rxbuf_min_shift = 1;
391 memset(&d->tx, 0, sizeof d->tx);
392
393 if (qemu_get_queue(d->nic)->link_down) {
394 e1000_link_down(d);
395 }
396
397 /* Some guests expect pre-initialized RAH/RAL (AddrValid flag + MACaddr) */
398 d->mac_reg[RA] = 0;
399 d->mac_reg[RA + 1] = E1000_RAH_AV;
400 for (i = 0; i < 4; i++) {
401 d->mac_reg[RA] |= macaddr[i] << (8 * i);
402 d->mac_reg[RA + 1] |= (i < 2) ? macaddr[i + 4] << (8 * i) : 0;
403 }
404 qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
405 }
406
407 static void
408 set_ctrl(E1000State *s, int index, uint32_t val)
409 {
410 /* RST is self clearing */
411 s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
412 }
413
414 static void
415 set_rx_control(E1000State *s, int index, uint32_t val)
416 {
417 s->mac_reg[RCTL] = val;
418 s->rxbuf_size = rxbufsize(val);
419 s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
420 DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
421 s->mac_reg[RCTL]);
422 qemu_flush_queued_packets(qemu_get_queue(s->nic));
423 }
424
425 static void
426 set_mdic(E1000State *s, int index, uint32_t val)
427 {
428 uint32_t data = val & E1000_MDIC_DATA_MASK;
429 uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
430
431 if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
432 val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
433 else if (val & E1000_MDIC_OP_READ) {
434 DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
435 if (!(phy_regcap[addr] & PHY_R)) {
436 DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
437 val |= E1000_MDIC_ERROR;
438 } else
439 val = (val ^ data) | s->phy_reg[addr];
440 } else if (val & E1000_MDIC_OP_WRITE) {
441 DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
442 if (!(phy_regcap[addr] & PHY_W)) {
443 DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
444 val |= E1000_MDIC_ERROR;
445 } else {
446 if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
447 phyreg_writeops[addr](s, index, data);
448 }
449 s->phy_reg[addr] = data;
450 }
451 }
452 s->mac_reg[MDIC] = val | E1000_MDIC_READY;
453
454 if (val & E1000_MDIC_INT_EN) {
455 set_ics(s, 0, E1000_ICR_MDAC);
456 }
457 }
458
459 static uint32_t
460 get_eecd(E1000State *s, int index)
461 {
462 uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
463
464 DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
465 s->eecd_state.bitnum_out, s->eecd_state.reading);
466 if (!s->eecd_state.reading ||
467 ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
468 ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
469 ret |= E1000_EECD_DO;
470 return ret;
471 }
472
473 static void
474 set_eecd(E1000State *s, int index, uint32_t val)
475 {
476 uint32_t oldval = s->eecd_state.old_eecd;
477
478 s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
479 E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
480 if (!(E1000_EECD_CS & val)) // CS inactive; nothing to do
481 return;
482 if (E1000_EECD_CS & (val ^ oldval)) { // CS rise edge; reset state
483 s->eecd_state.val_in = 0;
484 s->eecd_state.bitnum_in = 0;
485 s->eecd_state.bitnum_out = 0;
486 s->eecd_state.reading = 0;
487 }
488 if (!(E1000_EECD_SK & (val ^ oldval))) // no clock edge
489 return;
490 if (!(E1000_EECD_SK & val)) { // falling edge
491 s->eecd_state.bitnum_out++;
492 return;
493 }
494 s->eecd_state.val_in <<= 1;
495 if (val & E1000_EECD_DI)
496 s->eecd_state.val_in |= 1;
497 if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
498 s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
499 s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
500 EEPROM_READ_OPCODE_MICROWIRE);
501 }
502 DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
503 s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
504 s->eecd_state.reading);
505 }
506
507 static uint32_t
508 flash_eerd_read(E1000State *s, int x)
509 {
510 unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
511
512 if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
513 return (s->mac_reg[EERD]);
514
515 if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
516 return (E1000_EEPROM_RW_REG_DONE | r);
517
518 return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
519 E1000_EEPROM_RW_REG_DONE | r);
520 }
521
522 static void
523 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
524 {
525 uint32_t sum;
526
527 if (cse && cse < n)
528 n = cse + 1;
529 if (sloc < n-1) {
530 sum = net_checksum_add(n-css, data+css);
531 cpu_to_be16wu((uint16_t *)(data + sloc),
532 net_checksum_finish(sum));
533 }
534 }
535
536 static inline int
537 vlan_enabled(E1000State *s)
538 {
539 return ((s->mac_reg[CTRL] & E1000_CTRL_VME) != 0);
540 }
541
542 static inline int
543 vlan_rx_filter_enabled(E1000State *s)
544 {
545 return ((s->mac_reg[RCTL] & E1000_RCTL_VFE) != 0);
546 }
547
548 static inline int
549 is_vlan_packet(E1000State *s, const uint8_t *buf)
550 {
551 return (be16_to_cpup((uint16_t *)(buf + 12)) ==
552 le16_to_cpup((uint16_t *)(s->mac_reg + VET)));
553 }
554
555 static inline int
556 is_vlan_txd(uint32_t txd_lower)
557 {
558 return ((txd_lower & E1000_TXD_CMD_VLE) != 0);
559 }
560
561 /* FCS aka Ethernet CRC-32. We don't get it from backends and can't
562 * fill it in, just pad descriptor length by 4 bytes unless guest
563 * told us to strip it off the packet. */
564 static inline int
565 fcs_len(E1000State *s)
566 {
567 return (s->mac_reg[RCTL] & E1000_RCTL_SECRC) ? 0 : 4;
568 }
569
570 static void
571 e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
572 {
573 NetClientState *nc = qemu_get_queue(s->nic);
574 if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
575 nc->info->receive(nc, buf, size);
576 } else {
577 qemu_send_packet(nc, buf, size);
578 }
579 }
580
581 static void
582 xmit_seg(E1000State *s)
583 {
584 uint16_t len, *sp;
585 unsigned int frames = s->tx.tso_frames, css, sofar, n;
586 struct e1000_tx *tp = &s->tx;
587
588 if (tp->tse && tp->cptse) {
589 css = tp->ipcss;
590 DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
591 frames, tp->size, css);
592 if (tp->ip) { // IPv4
593 cpu_to_be16wu((uint16_t *)(tp->data+css+2),
594 tp->size - css);
595 cpu_to_be16wu((uint16_t *)(tp->data+css+4),
596 be16_to_cpup((uint16_t *)(tp->data+css+4))+frames);
597 } else // IPv6
598 cpu_to_be16wu((uint16_t *)(tp->data+css+4),
599 tp->size - css);
600 css = tp->tucss;
601 len = tp->size - css;
602 DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", tp->tcp, css, len);
603 if (tp->tcp) {
604 sofar = frames * tp->mss;
605 cpu_to_be32wu((uint32_t *)(tp->data+css+4), // seq
606 be32_to_cpupu((uint32_t *)(tp->data+css+4))+sofar);
607 if (tp->paylen - sofar > tp->mss)
608 tp->data[css + 13] &= ~9; // PSH, FIN
609 } else // UDP
610 cpu_to_be16wu((uint16_t *)(tp->data+css+4), len);
611 if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
612 unsigned int phsum;
613 // add pseudo-header length before checksum calculation
614 sp = (uint16_t *)(tp->data + tp->tucso);
615 phsum = be16_to_cpup(sp) + len;
616 phsum = (phsum >> 16) + (phsum & 0xffff);
617 cpu_to_be16wu(sp, phsum);
618 }
619 tp->tso_frames++;
620 }
621
622 if (tp->sum_needed & E1000_TXD_POPTS_TXSM)
623 putsum(tp->data, tp->size, tp->tucso, tp->tucss, tp->tucse);
624 if (tp->sum_needed & E1000_TXD_POPTS_IXSM)
625 putsum(tp->data, tp->size, tp->ipcso, tp->ipcss, tp->ipcse);
626 if (tp->vlan_needed) {
627 memmove(tp->vlan, tp->data, 4);
628 memmove(tp->data, tp->data + 4, 8);
629 memcpy(tp->data + 8, tp->vlan_header, 4);
630 e1000_send_packet(s, tp->vlan, tp->size + 4);
631 } else
632 e1000_send_packet(s, tp->data, tp->size);
633 s->mac_reg[TPT]++;
634 s->mac_reg[GPTC]++;
635 n = s->mac_reg[TOTL];
636 if ((s->mac_reg[TOTL] += s->tx.size) < n)
637 s->mac_reg[TOTH]++;
638 }
639
640 static void
641 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
642 {
643 PCIDevice *d = PCI_DEVICE(s);
644 uint32_t txd_lower = le32_to_cpu(dp->lower.data);
645 uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
646 unsigned int split_size = txd_lower & 0xffff, bytes, sz, op;
647 unsigned int msh = 0xfffff;
648 uint64_t addr;
649 struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
650 struct e1000_tx *tp = &s->tx;
651
652 s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
653 if (dtype == E1000_TXD_CMD_DEXT) { // context descriptor
654 op = le32_to_cpu(xp->cmd_and_length);
655 tp->ipcss = xp->lower_setup.ip_fields.ipcss;
656 tp->ipcso = xp->lower_setup.ip_fields.ipcso;
657 tp->ipcse = le16_to_cpu(xp->lower_setup.ip_fields.ipcse);
658 tp->tucss = xp->upper_setup.tcp_fields.tucss;
659 tp->tucso = xp->upper_setup.tcp_fields.tucso;
660 tp->tucse = le16_to_cpu(xp->upper_setup.tcp_fields.tucse);
661 tp->paylen = op & 0xfffff;
662 tp->hdr_len = xp->tcp_seg_setup.fields.hdr_len;
663 tp->mss = le16_to_cpu(xp->tcp_seg_setup.fields.mss);
664 tp->ip = (op & E1000_TXD_CMD_IP) ? 1 : 0;
665 tp->tcp = (op & E1000_TXD_CMD_TCP) ? 1 : 0;
666 tp->tse = (op & E1000_TXD_CMD_TSE) ? 1 : 0;
667 tp->tso_frames = 0;
668 if (tp->tucso == 0) { // this is probably wrong
669 DBGOUT(TXSUM, "TCP/UDP: cso 0!\n");
670 tp->tucso = tp->tucss + (tp->tcp ? 16 : 6);
671 }
672 return;
673 } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
674 // data descriptor
675 if (tp->size == 0) {
676 tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
677 }
678 tp->cptse = ( txd_lower & E1000_TXD_CMD_TSE ) ? 1 : 0;
679 } else {
680 // legacy descriptor
681 tp->cptse = 0;
682 }
683
684 if (vlan_enabled(s) && is_vlan_txd(txd_lower) &&
685 (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
686 tp->vlan_needed = 1;
687 cpu_to_be16wu((uint16_t *)(tp->vlan_header),
688 le16_to_cpup((uint16_t *)(s->mac_reg + VET)));
689 cpu_to_be16wu((uint16_t *)(tp->vlan_header + 2),
690 le16_to_cpu(dp->upper.fields.special));
691 }
692
693 addr = le64_to_cpu(dp->buffer_addr);
694 if (tp->tse && tp->cptse) {
695 msh = tp->hdr_len + tp->mss;
696 do {
697 bytes = split_size;
698 if (tp->size + bytes > msh)
699 bytes = msh - tp->size;
700
701 bytes = MIN(sizeof(tp->data) - tp->size, bytes);
702 pci_dma_read(d, addr, tp->data + tp->size, bytes);
703 sz = tp->size + bytes;
704 if (sz >= tp->hdr_len && tp->size < tp->hdr_len) {
705 memmove(tp->header, tp->data, tp->hdr_len);
706 }
707 tp->size = sz;
708 addr += bytes;
709 if (sz == msh) {
710 xmit_seg(s);
711 memmove(tp->data, tp->header, tp->hdr_len);
712 tp->size = tp->hdr_len;
713 }
714 } while (split_size -= bytes);
715 } else if (!tp->tse && tp->cptse) {
716 // context descriptor TSE is not set, while data descriptor TSE is set
717 DBGOUT(TXERR, "TCP segmentation error\n");
718 } else {
719 split_size = MIN(sizeof(tp->data) - tp->size, split_size);
720 pci_dma_read(d, addr, tp->data + tp->size, split_size);
721 tp->size += split_size;
722 }
723
724 if (!(txd_lower & E1000_TXD_CMD_EOP))
725 return;
726 if (!(tp->tse && tp->cptse && tp->size < tp->hdr_len)) {
727 xmit_seg(s);
728 }
729 tp->tso_frames = 0;
730 tp->sum_needed = 0;
731 tp->vlan_needed = 0;
732 tp->size = 0;
733 tp->cptse = 0;
734 }
735
736 static uint32_t
737 txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
738 {
739 PCIDevice *d = PCI_DEVICE(s);
740 uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
741
742 if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
743 return 0;
744 txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
745 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
746 dp->upper.data = cpu_to_le32(txd_upper);
747 pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
748 &dp->upper, sizeof(dp->upper));
749 return E1000_ICR_TXDW;
750 }
751
752 static uint64_t tx_desc_base(E1000State *s)
753 {
754 uint64_t bah = s->mac_reg[TDBAH];
755 uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
756
757 return (bah << 32) + bal;
758 }
759
760 static void
761 start_xmit(E1000State *s)
762 {
763 PCIDevice *d = PCI_DEVICE(s);
764 dma_addr_t base;
765 struct e1000_tx_desc desc;
766 uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
767
768 if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
769 DBGOUT(TX, "tx disabled\n");
770 return;
771 }
772
773 while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
774 base = tx_desc_base(s) +
775 sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
776 pci_dma_read(d, base, &desc, sizeof(desc));
777
778 DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
779 (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
780 desc.upper.data);
781
782 process_tx_desc(s, &desc);
783 cause |= txdesc_writeback(s, base, &desc);
784
785 if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
786 s->mac_reg[TDH] = 0;
787 /*
788 * the following could happen only if guest sw assigns
789 * bogus values to TDT/TDLEN.
790 * there's nothing too intelligent we could do about this.
791 */
792 if (s->mac_reg[TDH] == tdh_start) {
793 DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
794 tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
795 break;
796 }
797 }
798 set_ics(s, 0, cause);
799 }
800
801 static int
802 receive_filter(E1000State *s, const uint8_t *buf, int size)
803 {
804 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
805 static const int mta_shift[] = {4, 3, 2, 0};
806 uint32_t f, rctl = s->mac_reg[RCTL], ra[2], *rp;
807
808 if (is_vlan_packet(s, buf) && vlan_rx_filter_enabled(s)) {
809 uint16_t vid = be16_to_cpup((uint16_t *)(buf + 14));
810 uint32_t vfta = le32_to_cpup((uint32_t *)(s->mac_reg + VFTA) +
811 ((vid >> 5) & 0x7f));
812 if ((vfta & (1 << (vid & 0x1f))) == 0)
813 return 0;
814 }
815
816 if (rctl & E1000_RCTL_UPE) // promiscuous
817 return 1;
818
819 if ((buf[0] & 1) && (rctl & E1000_RCTL_MPE)) // promiscuous mcast
820 return 1;
821
822 if ((rctl & E1000_RCTL_BAM) && !memcmp(buf, bcast, sizeof bcast))
823 return 1;
824
825 for (rp = s->mac_reg + RA; rp < s->mac_reg + RA + 32; rp += 2) {
826 if (!(rp[1] & E1000_RAH_AV))
827 continue;
828 ra[0] = cpu_to_le32(rp[0]);
829 ra[1] = cpu_to_le32(rp[1]);
830 if (!memcmp(buf, (uint8_t *)ra, 6)) {
831 DBGOUT(RXFILTER,
832 "unicast match[%d]: %02x:%02x:%02x:%02x:%02x:%02x\n",
833 (int)(rp - s->mac_reg - RA)/2,
834 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
835 return 1;
836 }
837 }
838 DBGOUT(RXFILTER, "unicast mismatch: %02x:%02x:%02x:%02x:%02x:%02x\n",
839 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
840
841 f = mta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3];
842 f = (((buf[5] << 8) | buf[4]) >> f) & 0xfff;
843 if (s->mac_reg[MTA + (f >> 5)] & (1 << (f & 0x1f)))
844 return 1;
845 DBGOUT(RXFILTER,
846 "dropping, inexact filter mismatch: %02x:%02x:%02x:%02x:%02x:%02x MO %d MTA[%d] %x\n",
847 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5],
848 (rctl >> E1000_RCTL_MO_SHIFT) & 3, f >> 5,
849 s->mac_reg[MTA + (f >> 5)]);
850
851 return 0;
852 }
853
854 static void
855 e1000_set_link_status(NetClientState *nc)
856 {
857 E1000State *s = qemu_get_nic_opaque(nc);
858 uint32_t old_status = s->mac_reg[STATUS];
859
860 if (nc->link_down) {
861 e1000_link_down(s);
862 } else {
863 e1000_link_up(s);
864 }
865
866 if (s->mac_reg[STATUS] != old_status)
867 set_ics(s, 0, E1000_ICR_LSC);
868 }
869
870 static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
871 {
872 int bufs;
873 /* Fast-path short packets */
874 if (total_size <= s->rxbuf_size) {
875 return s->mac_reg[RDH] != s->mac_reg[RDT];
876 }
877 if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
878 bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
879 } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
880 bufs = s->mac_reg[RDLEN] / sizeof(struct e1000_rx_desc) +
881 s->mac_reg[RDT] - s->mac_reg[RDH];
882 } else {
883 return false;
884 }
885 return total_size <= bufs * s->rxbuf_size;
886 }
887
888 static int
889 e1000_can_receive(NetClientState *nc)
890 {
891 E1000State *s = qemu_get_nic_opaque(nc);
892
893 return (s->mac_reg[STATUS] & E1000_STATUS_LU) &&
894 (s->mac_reg[RCTL] & E1000_RCTL_EN) && e1000_has_rxbufs(s, 1);
895 }
896
897 static uint64_t rx_desc_base(E1000State *s)
898 {
899 uint64_t bah = s->mac_reg[RDBAH];
900 uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
901
902 return (bah << 32) + bal;
903 }
904
905 static ssize_t
906 e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
907 {
908 E1000State *s = qemu_get_nic_opaque(nc);
909 PCIDevice *d = PCI_DEVICE(s);
910 struct e1000_rx_desc desc;
911 dma_addr_t base;
912 unsigned int n, rdt;
913 uint32_t rdh_start;
914 uint16_t vlan_special = 0;
915 uint8_t vlan_status = 0;
916 uint8_t min_buf[MIN_BUF_SIZE];
917 struct iovec min_iov;
918 uint8_t *filter_buf = iov->iov_base;
919 size_t size = iov_size(iov, iovcnt);
920 size_t iov_ofs = 0;
921 size_t desc_offset;
922 size_t desc_size;
923 size_t total_size;
924
925 if (!(s->mac_reg[STATUS] & E1000_STATUS_LU)) {
926 return -1;
927 }
928
929 if (!(s->mac_reg[RCTL] & E1000_RCTL_EN)) {
930 return -1;
931 }
932
933 /* Pad to minimum Ethernet frame length */
934 if (size < sizeof(min_buf)) {
935 iov_to_buf(iov, iovcnt, 0, min_buf, size);
936 memset(&min_buf[size], 0, sizeof(min_buf) - size);
937 min_iov.iov_base = filter_buf = min_buf;
938 min_iov.iov_len = size = sizeof(min_buf);
939 iovcnt = 1;
940 iov = &min_iov;
941 } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
942 /* This is very unlikely, but may happen. */
943 iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
944 filter_buf = min_buf;
945 }
946
947 /* Discard oversized packets if !LPE and !SBP. */
948 if ((size > MAXIMUM_ETHERNET_LPE_SIZE ||
949 (size > MAXIMUM_ETHERNET_VLAN_SIZE
950 && !(s->mac_reg[RCTL] & E1000_RCTL_LPE)))
951 && !(s->mac_reg[RCTL] & E1000_RCTL_SBP)) {
952 return size;
953 }
954
955 if (!receive_filter(s, filter_buf, size)) {
956 return size;
957 }
958
959 if (vlan_enabled(s) && is_vlan_packet(s, filter_buf)) {
960 vlan_special = cpu_to_le16(be16_to_cpup((uint16_t *)(filter_buf
961 + 14)));
962 iov_ofs = 4;
963 if (filter_buf == iov->iov_base) {
964 memmove(filter_buf + 4, filter_buf, 12);
965 } else {
966 iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
967 while (iov->iov_len <= iov_ofs) {
968 iov_ofs -= iov->iov_len;
969 iov++;
970 }
971 }
972 vlan_status = E1000_RXD_STAT_VP;
973 size -= 4;
974 }
975
976 rdh_start = s->mac_reg[RDH];
977 desc_offset = 0;
978 total_size = size + fcs_len(s);
979 if (!e1000_has_rxbufs(s, total_size)) {
980 set_ics(s, 0, E1000_ICS_RXO);
981 return -1;
982 }
983 do {
984 desc_size = total_size - desc_offset;
985 if (desc_size > s->rxbuf_size) {
986 desc_size = s->rxbuf_size;
987 }
988 base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
989 pci_dma_read(d, base, &desc, sizeof(desc));
990 desc.special = vlan_special;
991 desc.status |= (vlan_status | E1000_RXD_STAT_DD);
992 if (desc.buffer_addr) {
993 if (desc_offset < size) {
994 size_t iov_copy;
995 hwaddr ba = le64_to_cpu(desc.buffer_addr);
996 size_t copy_size = size - desc_offset;
997 if (copy_size > s->rxbuf_size) {
998 copy_size = s->rxbuf_size;
999 }
1000 do {
1001 iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
1002 pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
1003 copy_size -= iov_copy;
1004 ba += iov_copy;
1005 iov_ofs += iov_copy;
1006 if (iov_ofs == iov->iov_len) {
1007 iov++;
1008 iov_ofs = 0;
1009 }
1010 } while (copy_size);
1011 }
1012 desc_offset += desc_size;
1013 desc.length = cpu_to_le16(desc_size);
1014 if (desc_offset >= total_size) {
1015 desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
1016 } else {
1017 /* Guest zeroing out status is not a hardware requirement.
1018 Clear EOP in case guest didn't do it. */
1019 desc.status &= ~E1000_RXD_STAT_EOP;
1020 }
1021 } else { // as per intel docs; skip descriptors with null buf addr
1022 DBGOUT(RX, "Null RX descriptor!!\n");
1023 }
1024 pci_dma_write(d, base, &desc, sizeof(desc));
1025
1026 if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
1027 s->mac_reg[RDH] = 0;
1028 /* see comment in start_xmit; same here */
1029 if (s->mac_reg[RDH] == rdh_start) {
1030 DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
1031 rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
1032 set_ics(s, 0, E1000_ICS_RXO);
1033 return -1;
1034 }
1035 } while (desc_offset < total_size);
1036
1037 s->mac_reg[GPRC]++;
1038 s->mac_reg[TPR]++;
1039 /* TOR - Total Octets Received:
1040 * This register includes bytes received in a packet from the <Destination
1041 * Address> field through the <CRC> field, inclusively.
1042 */
1043 n = s->mac_reg[TORL] + size + /* Always include FCS length. */ 4;
1044 if (n < s->mac_reg[TORL])
1045 s->mac_reg[TORH]++;
1046 s->mac_reg[TORL] = n;
1047
1048 n = E1000_ICS_RXT0;
1049 if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
1050 rdt += s->mac_reg[RDLEN] / sizeof(desc);
1051 if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
1052 s->rxbuf_min_shift)
1053 n |= E1000_ICS_RXDMT0;
1054
1055 set_ics(s, 0, n);
1056
1057 return size;
1058 }
1059
1060 static ssize_t
1061 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1062 {
1063 const struct iovec iov = {
1064 .iov_base = (uint8_t *)buf,
1065 .iov_len = size
1066 };
1067
1068 return e1000_receive_iov(nc, &iov, 1);
1069 }
1070
1071 static uint32_t
1072 mac_readreg(E1000State *s, int index)
1073 {
1074 return s->mac_reg[index];
1075 }
1076
1077 static uint32_t
1078 mac_icr_read(E1000State *s, int index)
1079 {
1080 uint32_t ret = s->mac_reg[ICR];
1081
1082 DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1083 set_interrupt_cause(s, 0, 0);
1084 return ret;
1085 }
1086
1087 static uint32_t
1088 mac_read_clr4(E1000State *s, int index)
1089 {
1090 uint32_t ret = s->mac_reg[index];
1091
1092 s->mac_reg[index] = 0;
1093 return ret;
1094 }
1095
1096 static uint32_t
1097 mac_read_clr8(E1000State *s, int index)
1098 {
1099 uint32_t ret = s->mac_reg[index];
1100
1101 s->mac_reg[index] = 0;
1102 s->mac_reg[index-1] = 0;
1103 return ret;
1104 }
1105
1106 static void
1107 mac_writereg(E1000State *s, int index, uint32_t val)
1108 {
1109 uint32_t macaddr[2];
1110
1111 s->mac_reg[index] = val;
1112
1113 if (index == RA + 1) {
1114 macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
1115 macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
1116 qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
1117 }
1118 }
1119
1120 static void
1121 set_rdt(E1000State *s, int index, uint32_t val)
1122 {
1123 s->mac_reg[index] = val & 0xffff;
1124 if (e1000_has_rxbufs(s, 1)) {
1125 qemu_flush_queued_packets(qemu_get_queue(s->nic));
1126 }
1127 }
1128
1129 static void
1130 set_16bit(E1000State *s, int index, uint32_t val)
1131 {
1132 s->mac_reg[index] = val & 0xffff;
1133 }
1134
1135 static void
1136 set_dlen(E1000State *s, int index, uint32_t val)
1137 {
1138 s->mac_reg[index] = val & 0xfff80;
1139 }
1140
1141 static void
1142 set_tctl(E1000State *s, int index, uint32_t val)
1143 {
1144 s->mac_reg[index] = val;
1145 s->mac_reg[TDT] &= 0xffff;
1146 start_xmit(s);
1147 }
1148
1149 static void
1150 set_icr(E1000State *s, int index, uint32_t val)
1151 {
1152 DBGOUT(INTERRUPT, "set_icr %x\n", val);
1153 set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1154 }
1155
1156 static void
1157 set_imc(E1000State *s, int index, uint32_t val)
1158 {
1159 s->mac_reg[IMS] &= ~val;
1160 set_ics(s, 0, 0);
1161 }
1162
1163 static void
1164 set_ims(E1000State *s, int index, uint32_t val)
1165 {
1166 s->mac_reg[IMS] |= val;
1167 set_ics(s, 0, 0);
1168 }
1169
1170 #define getreg(x) [x] = mac_readreg
1171 static uint32_t (*macreg_readops[])(E1000State *, int) = {
1172 getreg(PBA), getreg(RCTL), getreg(TDH), getreg(TXDCTL),
1173 getreg(WUFC), getreg(TDT), getreg(CTRL), getreg(LEDCTL),
1174 getreg(MANC), getreg(MDIC), getreg(SWSM), getreg(STATUS),
1175 getreg(TORL), getreg(TOTL), getreg(IMS), getreg(TCTL),
1176 getreg(RDH), getreg(RDT), getreg(VET), getreg(ICS),
1177 getreg(TDBAL), getreg(TDBAH), getreg(RDBAH), getreg(RDBAL),
1178 getreg(TDLEN), getreg(RDLEN), getreg(RDTR), getreg(RADV),
1179 getreg(TADV), getreg(ITR),
1180
1181 [TOTH] = mac_read_clr8, [TORH] = mac_read_clr8, [GPRC] = mac_read_clr4,
1182 [GPTC] = mac_read_clr4, [TPR] = mac_read_clr4, [TPT] = mac_read_clr4,
1183 [ICR] = mac_icr_read, [EECD] = get_eecd, [EERD] = flash_eerd_read,
1184 [CRCERRS ... MPC] = &mac_readreg,
1185 [RA ... RA+31] = &mac_readreg,
1186 [MTA ... MTA+127] = &mac_readreg,
1187 [VFTA ... VFTA+127] = &mac_readreg,
1188 };
1189 enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1190
1191 #define putreg(x) [x] = mac_writereg
1192 static void (*macreg_writeops[])(E1000State *, int, uint32_t) = {
1193 putreg(PBA), putreg(EERD), putreg(SWSM), putreg(WUFC),
1194 putreg(TDBAL), putreg(TDBAH), putreg(TXDCTL), putreg(RDBAH),
1195 putreg(RDBAL), putreg(LEDCTL), putreg(VET),
1196 [TDLEN] = set_dlen, [RDLEN] = set_dlen, [TCTL] = set_tctl,
1197 [TDT] = set_tctl, [MDIC] = set_mdic, [ICS] = set_ics,
1198 [TDH] = set_16bit, [RDH] = set_16bit, [RDT] = set_rdt,
1199 [IMC] = set_imc, [IMS] = set_ims, [ICR] = set_icr,
1200 [EECD] = set_eecd, [RCTL] = set_rx_control, [CTRL] = set_ctrl,
1201 [RDTR] = set_16bit, [RADV] = set_16bit, [TADV] = set_16bit,
1202 [ITR] = set_16bit,
1203 [RA ... RA+31] = &mac_writereg,
1204 [MTA ... MTA+127] = &mac_writereg,
1205 [VFTA ... VFTA+127] = &mac_writereg,
1206 };
1207
1208 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1209
1210 static void
1211 e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1212 unsigned size)
1213 {
1214 E1000State *s = opaque;
1215 unsigned int index = (addr & 0x1ffff) >> 2;
1216
1217 if (index < NWRITEOPS && macreg_writeops[index]) {
1218 macreg_writeops[index](s, index, val);
1219 } else if (index < NREADOPS && macreg_readops[index]) {
1220 DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n", index<<2, val);
1221 } else {
1222 DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1223 index<<2, val);
1224 }
1225 }
1226
1227 static uint64_t
1228 e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1229 {
1230 E1000State *s = opaque;
1231 unsigned int index = (addr & 0x1ffff) >> 2;
1232
1233 if (index < NREADOPS && macreg_readops[index])
1234 {
1235 return macreg_readops[index](s, index);
1236 }
1237 DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1238 return 0;
1239 }
1240
1241 static const MemoryRegionOps e1000_mmio_ops = {
1242 .read = e1000_mmio_read,
1243 .write = e1000_mmio_write,
1244 .endianness = DEVICE_LITTLE_ENDIAN,
1245 .impl = {
1246 .min_access_size = 4,
1247 .max_access_size = 4,
1248 },
1249 };
1250
1251 static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1252 unsigned size)
1253 {
1254 E1000State *s = opaque;
1255
1256 (void)s;
1257 return 0;
1258 }
1259
1260 static void e1000_io_write(void *opaque, hwaddr addr,
1261 uint64_t val, unsigned size)
1262 {
1263 E1000State *s = opaque;
1264
1265 (void)s;
1266 }
1267
1268 static const MemoryRegionOps e1000_io_ops = {
1269 .read = e1000_io_read,
1270 .write = e1000_io_write,
1271 .endianness = DEVICE_LITTLE_ENDIAN,
1272 };
1273
1274 static bool is_version_1(void *opaque, int version_id)
1275 {
1276 return version_id == 1;
1277 }
1278
1279 static void e1000_pre_save(void *opaque)
1280 {
1281 E1000State *s = opaque;
1282 NetClientState *nc = qemu_get_queue(s->nic);
1283
1284 /* If the mitigation timer is active, emulate a timeout now. */
1285 if (s->mit_timer_on) {
1286 e1000_mit_timer(s);
1287 }
1288
1289 if (!(s->compat_flags & E1000_FLAG_AUTONEG)) {
1290 return;
1291 }
1292
1293 /*
1294 * If link is down and auto-negotiation is ongoing, complete
1295 * auto-negotiation immediately. This allows is to look at
1296 * MII_SR_AUTONEG_COMPLETE to infer link status on load.
1297 */
1298 if (nc->link_down &&
1299 s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN &&
1300 s->phy_reg[PHY_CTRL] & MII_CR_RESTART_AUTO_NEG) {
1301 s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
1302 }
1303 }
1304
1305 static int e1000_post_load(void *opaque, int version_id)
1306 {
1307 E1000State *s = opaque;
1308 NetClientState *nc = qemu_get_queue(s->nic);
1309
1310 if (!(s->compat_flags & E1000_FLAG_MIT)) {
1311 s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1312 s->mac_reg[TADV] = 0;
1313 s->mit_irq_level = false;
1314 }
1315 s->mit_ide = 0;
1316 s->mit_timer_on = false;
1317
1318 /* nc.link_down can't be migrated, so infer link_down according
1319 * to link status bit in mac_reg[STATUS].
1320 * Alternatively, restart link negotiation if it was in progress. */
1321 nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1322
1323 if (!(s->compat_flags & E1000_FLAG_AUTONEG)) {
1324 return 0;
1325 }
1326
1327 if (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN &&
1328 s->phy_reg[PHY_CTRL] & MII_CR_RESTART_AUTO_NEG &&
1329 !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1330 nc->link_down = false;
1331 timer_mod(s->autoneg_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1332 }
1333
1334 return 0;
1335 }
1336
1337 static bool e1000_mit_state_needed(void *opaque)
1338 {
1339 E1000State *s = opaque;
1340
1341 return s->compat_flags & E1000_FLAG_MIT;
1342 }
1343
1344 static const VMStateDescription vmstate_e1000_mit_state = {
1345 .name = "e1000/mit_state",
1346 .version_id = 1,
1347 .minimum_version_id = 1,
1348 .minimum_version_id_old = 1,
1349 .fields = (VMStateField[]) {
1350 VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1351 VMSTATE_UINT32(mac_reg[RADV], E1000State),
1352 VMSTATE_UINT32(mac_reg[TADV], E1000State),
1353 VMSTATE_UINT32(mac_reg[ITR], E1000State),
1354 VMSTATE_BOOL(mit_irq_level, E1000State),
1355 VMSTATE_END_OF_LIST()
1356 }
1357 };
1358
1359 static const VMStateDescription vmstate_e1000 = {
1360 .name = "e1000",
1361 .version_id = 2,
1362 .minimum_version_id = 1,
1363 .minimum_version_id_old = 1,
1364 .pre_save = e1000_pre_save,
1365 .post_load = e1000_post_load,
1366 .fields = (VMStateField []) {
1367 VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1368 VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1369 VMSTATE_UNUSED(4), /* Was mmio_base. */
1370 VMSTATE_UINT32(rxbuf_size, E1000State),
1371 VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1372 VMSTATE_UINT32(eecd_state.val_in, E1000State),
1373 VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1374 VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1375 VMSTATE_UINT16(eecd_state.reading, E1000State),
1376 VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1377 VMSTATE_UINT8(tx.ipcss, E1000State),
1378 VMSTATE_UINT8(tx.ipcso, E1000State),
1379 VMSTATE_UINT16(tx.ipcse, E1000State),
1380 VMSTATE_UINT8(tx.tucss, E1000State),
1381 VMSTATE_UINT8(tx.tucso, E1000State),
1382 VMSTATE_UINT16(tx.tucse, E1000State),
1383 VMSTATE_UINT32(tx.paylen, E1000State),
1384 VMSTATE_UINT8(tx.hdr_len, E1000State),
1385 VMSTATE_UINT16(tx.mss, E1000State),
1386 VMSTATE_UINT16(tx.size, E1000State),
1387 VMSTATE_UINT16(tx.tso_frames, E1000State),
1388 VMSTATE_UINT8(tx.sum_needed, E1000State),
1389 VMSTATE_INT8(tx.ip, E1000State),
1390 VMSTATE_INT8(tx.tcp, E1000State),
1391 VMSTATE_BUFFER(tx.header, E1000State),
1392 VMSTATE_BUFFER(tx.data, E1000State),
1393 VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1394 VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1395 VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1396 VMSTATE_UINT32(mac_reg[EECD], E1000State),
1397 VMSTATE_UINT32(mac_reg[EERD], E1000State),
1398 VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1399 VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1400 VMSTATE_UINT32(mac_reg[ICR], E1000State),
1401 VMSTATE_UINT32(mac_reg[ICS], E1000State),
1402 VMSTATE_UINT32(mac_reg[IMC], E1000State),
1403 VMSTATE_UINT32(mac_reg[IMS], E1000State),
1404 VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1405 VMSTATE_UINT32(mac_reg[MANC], E1000State),
1406 VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1407 VMSTATE_UINT32(mac_reg[MPC], E1000State),
1408 VMSTATE_UINT32(mac_reg[PBA], E1000State),
1409 VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1410 VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1411 VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1412 VMSTATE_UINT32(mac_reg[RDH], E1000State),
1413 VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1414 VMSTATE_UINT32(mac_reg[RDT], E1000State),
1415 VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1416 VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1417 VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1418 VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1419 VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1420 VMSTATE_UINT32(mac_reg[TDH], E1000State),
1421 VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1422 VMSTATE_UINT32(mac_reg[TDT], E1000State),
1423 VMSTATE_UINT32(mac_reg[TORH], E1000State),
1424 VMSTATE_UINT32(mac_reg[TORL], E1000State),
1425 VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1426 VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1427 VMSTATE_UINT32(mac_reg[TPR], E1000State),
1428 VMSTATE_UINT32(mac_reg[TPT], E1000State),
1429 VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1430 VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1431 VMSTATE_UINT32(mac_reg[VET], E1000State),
1432 VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1433 VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1434 VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1435 VMSTATE_END_OF_LIST()
1436 },
1437 .subsections = (VMStateSubsection[]) {
1438 {
1439 .vmsd = &vmstate_e1000_mit_state,
1440 .needed = e1000_mit_state_needed,
1441 }, {
1442 /* empty */
1443 }
1444 }
1445 };
1446
1447 static const uint16_t e1000_eeprom_template[64] = {
1448 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0x0000, 0x0000, 0x0000,
1449 0x3000, 0x1000, 0x6403, E1000_DEVID, 0x8086, E1000_DEVID, 0x8086, 0x3040,
1450 0x0008, 0x2000, 0x7e14, 0x0048, 0x1000, 0x00d8, 0x0000, 0x2700,
1451 0x6cc9, 0x3150, 0x0722, 0x040b, 0x0984, 0x0000, 0xc000, 0x0706,
1452 0x1008, 0x0000, 0x0f04, 0x7fff, 0x4d01, 0xffff, 0xffff, 0xffff,
1453 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
1454 0x0100, 0x4000, 0x121c, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
1455 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000,
1456 };
1457
1458 /* PCI interface */
1459
1460 static void
1461 e1000_mmio_setup(E1000State *d)
1462 {
1463 int i;
1464 const uint32_t excluded_regs[] = {
1465 E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1466 E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1467 };
1468
1469 memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1470 "e1000-mmio", PNPMMIO_SIZE);
1471 memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1472 for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1473 memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1474 excluded_regs[i+1] - excluded_regs[i] - 4);
1475 memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1476 }
1477
1478 static void
1479 e1000_cleanup(NetClientState *nc)
1480 {
1481 E1000State *s = qemu_get_nic_opaque(nc);
1482
1483 s->nic = NULL;
1484 }
1485
1486 static void
1487 pci_e1000_uninit(PCIDevice *dev)
1488 {
1489 E1000State *d = E1000(dev);
1490
1491 timer_del(d->autoneg_timer);
1492 timer_free(d->autoneg_timer);
1493 timer_del(d->mit_timer);
1494 timer_free(d->mit_timer);
1495 memory_region_destroy(&d->mmio);
1496 memory_region_destroy(&d->io);
1497 qemu_del_nic(d->nic);
1498 }
1499
1500 static NetClientInfo net_e1000_info = {
1501 .type = NET_CLIENT_OPTIONS_KIND_NIC,
1502 .size = sizeof(NICState),
1503 .can_receive = e1000_can_receive,
1504 .receive = e1000_receive,
1505 .receive_iov = e1000_receive_iov,
1506 .cleanup = e1000_cleanup,
1507 .link_status_changed = e1000_set_link_status,
1508 };
1509
1510 static int pci_e1000_init(PCIDevice *pci_dev)
1511 {
1512 DeviceState *dev = DEVICE(pci_dev);
1513 E1000State *d = E1000(pci_dev);
1514 uint8_t *pci_conf;
1515 uint16_t checksum = 0;
1516 int i;
1517 uint8_t *macaddr;
1518
1519 pci_conf = pci_dev->config;
1520
1521 /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1522 pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1523
1524 pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1525
1526 e1000_mmio_setup(d);
1527
1528 pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1529
1530 pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1531
1532 memmove(d->eeprom_data, e1000_eeprom_template,
1533 sizeof e1000_eeprom_template);
1534 qemu_macaddr_default_if_unset(&d->conf.macaddr);
1535 macaddr = d->conf.macaddr.a;
1536 for (i = 0; i < 3; i++)
1537 d->eeprom_data[i] = (macaddr[2*i+1]<<8) | macaddr[2*i];
1538 for (i = 0; i < EEPROM_CHECKSUM_REG; i++)
1539 checksum += d->eeprom_data[i];
1540 checksum = (uint16_t) EEPROM_SUM - checksum;
1541 d->eeprom_data[EEPROM_CHECKSUM_REG] = checksum;
1542
1543 d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1544 object_get_typename(OBJECT(d)), dev->id, d);
1545
1546 qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1547
1548 add_boot_device_path(d->conf.bootindex, dev, "/ethernet-phy@0");
1549
1550 d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1551 d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1552
1553 return 0;
1554 }
1555
1556 static void qdev_e1000_reset(DeviceState *dev)
1557 {
1558 E1000State *d = E1000(dev);
1559 e1000_reset(d);
1560 }
1561
1562 static Property e1000_properties[] = {
1563 DEFINE_NIC_PROPERTIES(E1000State, conf),
1564 DEFINE_PROP_BIT("autonegotiation", E1000State,
1565 compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1566 DEFINE_PROP_BIT("mitigation", E1000State,
1567 compat_flags, E1000_FLAG_MIT_BIT, true),
1568 DEFINE_PROP_END_OF_LIST(),
1569 };
1570
1571 static void e1000_class_init(ObjectClass *klass, void *data)
1572 {
1573 DeviceClass *dc = DEVICE_CLASS(klass);
1574 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1575
1576 k->init = pci_e1000_init;
1577 k->exit = pci_e1000_uninit;
1578 k->romfile = "efi-e1000.rom";
1579 k->vendor_id = PCI_VENDOR_ID_INTEL;
1580 k->device_id = E1000_DEVID;
1581 k->revision = 0x03;
1582 k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1583 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1584 dc->desc = "Intel Gigabit Ethernet";
1585 dc->reset = qdev_e1000_reset;
1586 dc->vmsd = &vmstate_e1000;
1587 dc->props = e1000_properties;
1588 }
1589
1590 static const TypeInfo e1000_info = {
1591 .name = TYPE_E1000,
1592 .parent = TYPE_PCI_DEVICE,
1593 .instance_size = sizeof(E1000State),
1594 .class_init = e1000_class_init,
1595 };
1596
1597 static void e1000_register_types(void)
1598 {
1599 type_register_static(&e1000_info);
1600 }
1601
1602 type_init(e1000_register_types)