]> git.proxmox.com Git - qemu.git/blob - hw/net/e1000.c
34ac6259132d7e63ef8a90a69c730baeee064289
[qemu.git] / hw / net / e1000.c
1 /*
2 * QEMU e1000 emulation
3 *
4 * Software developer's manual:
5 * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
6 *
7 * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
8 * Copyright (c) 2008 Qumranet
9 * Based on work done by:
10 * Copyright (c) 2007 Dan Aloni
11 * Copyright (c) 2004 Antony T Curtis
12 *
13 * This library is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU Lesser General Public
15 * License as published by the Free Software Foundation; either
16 * version 2 of the License, or (at your option) any later version.
17 *
18 * This library is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 * Lesser General Public License for more details.
22 *
23 * You should have received a copy of the GNU Lesser General Public
24 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
25 */
26
27
28 #include "hw/hw.h"
29 #include "hw/pci/pci.h"
30 #include "net/net.h"
31 #include "net/checksum.h"
32 #include "hw/loader.h"
33 #include "sysemu/sysemu.h"
34 #include "sysemu/dma.h"
35 #include "qemu/iov.h"
36
37 #include "e1000_regs.h"
38
39 #define E1000_DEBUG
40
41 #ifdef E1000_DEBUG
42 enum {
43 DEBUG_GENERAL, DEBUG_IO, DEBUG_MMIO, DEBUG_INTERRUPT,
44 DEBUG_RX, DEBUG_TX, DEBUG_MDIC, DEBUG_EEPROM,
45 DEBUG_UNKNOWN, DEBUG_TXSUM, DEBUG_TXERR, DEBUG_RXERR,
46 DEBUG_RXFILTER, DEBUG_PHY, DEBUG_NOTYET,
47 };
48 #define DBGBIT(x) (1<<DEBUG_##x)
49 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
50
51 #define DBGOUT(what, fmt, ...) do { \
52 if (debugflags & DBGBIT(what)) \
53 fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
54 } while (0)
55 #else
56 #define DBGOUT(what, fmt, ...) do {} while (0)
57 #endif
58
59 #define IOPORT_SIZE 0x40
60 #define PNPMMIO_SIZE 0x20000
61 #define MIN_BUF_SIZE 60 /* Min. octets in an ethernet frame sans FCS */
62
63 /* this is the size past which hardware will drop packets when setting LPE=0 */
64 #define MAXIMUM_ETHERNET_VLAN_SIZE 1522
65 /* this is the size past which hardware will drop packets when setting LPE=1 */
66 #define MAXIMUM_ETHERNET_LPE_SIZE 16384
67
68 #define MAXIMUM_ETHERNET_HDR_LEN (14+4)
69
70 /*
71 * HW models:
72 * E1000_DEV_ID_82540EM works with Windows and Linux
73 * E1000_DEV_ID_82573L OK with windoze and Linux 2.6.22,
74 * appears to perform better than 82540EM, but breaks with Linux 2.6.18
75 * E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
76 * Others never tested
77 */
78 enum { E1000_DEVID = E1000_DEV_ID_82540EM };
79
80 /*
81 * May need to specify additional MAC-to-PHY entries --
82 * Intel's Windows driver refuses to initialize unless they match
83 */
84 enum {
85 PHY_ID2_INIT = E1000_DEVID == E1000_DEV_ID_82573L ? 0xcc2 :
86 E1000_DEVID == E1000_DEV_ID_82544GC_COPPER ? 0xc30 :
87 /* default to E1000_DEV_ID_82540EM */ 0xc20
88 };
89
90 typedef struct E1000State_st {
91 /*< private >*/
92 PCIDevice parent_obj;
93 /*< public >*/
94
95 NICState *nic;
96 NICConf conf;
97 MemoryRegion mmio;
98 MemoryRegion io;
99
100 uint32_t mac_reg[0x8000];
101 uint16_t phy_reg[0x20];
102 uint16_t eeprom_data[64];
103
104 uint32_t rxbuf_size;
105 uint32_t rxbuf_min_shift;
106 struct e1000_tx {
107 unsigned char header[256];
108 unsigned char vlan_header[4];
109 /* Fields vlan and data must not be reordered or separated. */
110 unsigned char vlan[4];
111 unsigned char data[0x10000];
112 uint16_t size;
113 unsigned char sum_needed;
114 unsigned char vlan_needed;
115 uint8_t ipcss;
116 uint8_t ipcso;
117 uint16_t ipcse;
118 uint8_t tucss;
119 uint8_t tucso;
120 uint16_t tucse;
121 uint8_t hdr_len;
122 uint16_t mss;
123 uint32_t paylen;
124 uint16_t tso_frames;
125 char tse;
126 int8_t ip;
127 int8_t tcp;
128 char cptse; // current packet tse bit
129 } tx;
130
131 struct {
132 uint32_t val_in; // shifted in from guest driver
133 uint16_t bitnum_in;
134 uint16_t bitnum_out;
135 uint16_t reading;
136 uint32_t old_eecd;
137 } eecd_state;
138
139 QEMUTimer *autoneg_timer;
140
141 QEMUTimer *mit_timer; /* Mitigation timer. */
142 bool mit_timer_on; /* Mitigation timer is running. */
143 bool mit_irq_level; /* Tracks interrupt pin level. */
144 uint32_t mit_ide; /* Tracks E1000_TXD_CMD_IDE bit. */
145
146 /* Compatibility flags for migration to/from qemu 1.3.0 and older */
147 #define E1000_FLAG_AUTONEG_BIT 0
148 #define E1000_FLAG_MIT_BIT 1
149 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
150 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
151 uint32_t compat_flags;
152 } E1000State;
153
154 #define TYPE_E1000 "e1000"
155
156 #define E1000(obj) \
157 OBJECT_CHECK(E1000State, (obj), TYPE_E1000)
158
159 #define defreg(x) x = (E1000_##x>>2)
160 enum {
161 defreg(CTRL), defreg(EECD), defreg(EERD), defreg(GPRC),
162 defreg(GPTC), defreg(ICR), defreg(ICS), defreg(IMC),
163 defreg(IMS), defreg(LEDCTL), defreg(MANC), defreg(MDIC),
164 defreg(MPC), defreg(PBA), defreg(RCTL), defreg(RDBAH),
165 defreg(RDBAL), defreg(RDH), defreg(RDLEN), defreg(RDT),
166 defreg(STATUS), defreg(SWSM), defreg(TCTL), defreg(TDBAH),
167 defreg(TDBAL), defreg(TDH), defreg(TDLEN), defreg(TDT),
168 defreg(TORH), defreg(TORL), defreg(TOTH), defreg(TOTL),
169 defreg(TPR), defreg(TPT), defreg(TXDCTL), defreg(WUFC),
170 defreg(RA), defreg(MTA), defreg(CRCERRS),defreg(VFTA),
171 defreg(VET), defreg(RDTR), defreg(RADV), defreg(TADV),
172 defreg(ITR),
173 };
174
175 static void
176 e1000_link_down(E1000State *s)
177 {
178 s->mac_reg[STATUS] &= ~E1000_STATUS_LU;
179 s->phy_reg[PHY_STATUS] &= ~MII_SR_LINK_STATUS;
180 }
181
182 static void
183 e1000_link_up(E1000State *s)
184 {
185 s->mac_reg[STATUS] |= E1000_STATUS_LU;
186 s->phy_reg[PHY_STATUS] |= MII_SR_LINK_STATUS;
187 }
188
189 static void
190 set_phy_ctrl(E1000State *s, int index, uint16_t val)
191 {
192 /*
193 * QEMU 1.3 does not support link auto-negotiation emulation, so if we
194 * migrate during auto negotiation, after migration the link will be
195 * down.
196 */
197 if (!(s->compat_flags & E1000_FLAG_AUTONEG)) {
198 return;
199 }
200 if ((val & MII_CR_AUTO_NEG_EN) && (val & MII_CR_RESTART_AUTO_NEG)) {
201 e1000_link_down(s);
202 s->phy_reg[PHY_STATUS] &= ~MII_SR_AUTONEG_COMPLETE;
203 DBGOUT(PHY, "Start link auto negotiation\n");
204 timer_mod(s->autoneg_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
205 }
206 }
207
208 static void
209 e1000_autoneg_timer(void *opaque)
210 {
211 E1000State *s = opaque;
212 if (!qemu_get_queue(s->nic)->link_down) {
213 e1000_link_up(s);
214 }
215 s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
216 DBGOUT(PHY, "Auto negotiation is completed\n");
217 }
218
219 static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
220 [PHY_CTRL] = set_phy_ctrl,
221 };
222
223 enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
224
225 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
226 static const char phy_regcap[0x20] = {
227 [PHY_STATUS] = PHY_R, [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
228 [PHY_ID1] = PHY_R, [M88E1000_PHY_SPEC_CTRL] = PHY_RW,
229 [PHY_CTRL] = PHY_RW, [PHY_1000T_CTRL] = PHY_RW,
230 [PHY_LP_ABILITY] = PHY_R, [PHY_1000T_STATUS] = PHY_R,
231 [PHY_AUTONEG_ADV] = PHY_RW, [M88E1000_RX_ERR_CNTR] = PHY_R,
232 [PHY_ID2] = PHY_R, [M88E1000_PHY_SPEC_STATUS] = PHY_R
233 };
234
235 static const uint16_t phy_reg_init[] = {
236 [PHY_CTRL] = 0x1140,
237 [PHY_STATUS] = 0x794d, /* link initially up with not completed autoneg */
238 [PHY_ID1] = 0x141, [PHY_ID2] = PHY_ID2_INIT,
239 [PHY_1000T_CTRL] = 0x0e00, [M88E1000_PHY_SPEC_CTRL] = 0x360,
240 [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60, [PHY_AUTONEG_ADV] = 0xde1,
241 [PHY_LP_ABILITY] = 0x1e0, [PHY_1000T_STATUS] = 0x3c00,
242 [M88E1000_PHY_SPEC_STATUS] = 0xac00,
243 };
244
245 static const uint32_t mac_reg_init[] = {
246 [PBA] = 0x00100030,
247 [LEDCTL] = 0x602,
248 [CTRL] = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
249 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
250 [STATUS] = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
251 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
252 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
253 E1000_STATUS_LU,
254 [MANC] = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
255 E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
256 E1000_MANC_RMCP_EN,
257 };
258
259 /* Helper function, *curr == 0 means the value is not set */
260 static inline void
261 mit_update_delay(uint32_t *curr, uint32_t value)
262 {
263 if (value && (*curr == 0 || value < *curr)) {
264 *curr = value;
265 }
266 }
267
268 static void
269 set_interrupt_cause(E1000State *s, int index, uint32_t val)
270 {
271 PCIDevice *d = PCI_DEVICE(s);
272 uint32_t pending_ints;
273 uint32_t mit_delay;
274
275 if (val && (E1000_DEVID >= E1000_DEV_ID_82547EI_MOBILE)) {
276 /* Only for 8257x */
277 val |= E1000_ICR_INT_ASSERTED;
278 }
279 s->mac_reg[ICR] = val;
280
281 /*
282 * Make sure ICR and ICS registers have the same value.
283 * The spec says that the ICS register is write-only. However in practice,
284 * on real hardware ICS is readable, and for reads it has the same value as
285 * ICR (except that ICS does not have the clear on read behaviour of ICR).
286 *
287 * The VxWorks PRO/1000 driver uses this behaviour.
288 */
289 s->mac_reg[ICS] = val;
290
291 pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
292 if (!s->mit_irq_level && pending_ints) {
293 /*
294 * Here we detect a potential raising edge. We postpone raising the
295 * interrupt line if we are inside the mitigation delay window
296 * (s->mit_timer_on == 1).
297 * We provide a partial implementation of interrupt mitigation,
298 * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
299 * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
300 * RADV; relative timers based on TIDV and RDTR are not implemented.
301 */
302 if (s->mit_timer_on) {
303 return;
304 }
305 if (s->compat_flags & E1000_FLAG_MIT) {
306 /* Compute the next mitigation delay according to pending
307 * interrupts and the current values of RADV (provided
308 * RDTR!=0), TADV and ITR.
309 * Then rearm the timer.
310 */
311 mit_delay = 0;
312 if (s->mit_ide &&
313 (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
314 mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
315 }
316 if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
317 mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
318 }
319 mit_update_delay(&mit_delay, s->mac_reg[ITR]);
320
321 if (mit_delay) {
322 s->mit_timer_on = 1;
323 timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
324 mit_delay * 256);
325 }
326 s->mit_ide = 0;
327 }
328 }
329
330 s->mit_irq_level = (pending_ints != 0);
331 pci_set_irq(d, s->mit_irq_level);
332 }
333
334 static void
335 e1000_mit_timer(void *opaque)
336 {
337 E1000State *s = opaque;
338
339 s->mit_timer_on = 0;
340 /* Call set_interrupt_cause to update the irq level (if necessary). */
341 set_interrupt_cause(s, 0, s->mac_reg[ICR]);
342 }
343
344 static void
345 set_ics(E1000State *s, int index, uint32_t val)
346 {
347 DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
348 s->mac_reg[IMS]);
349 set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
350 }
351
352 static int
353 rxbufsize(uint32_t v)
354 {
355 v &= E1000_RCTL_BSEX | E1000_RCTL_SZ_16384 | E1000_RCTL_SZ_8192 |
356 E1000_RCTL_SZ_4096 | E1000_RCTL_SZ_2048 | E1000_RCTL_SZ_1024 |
357 E1000_RCTL_SZ_512 | E1000_RCTL_SZ_256;
358 switch (v) {
359 case E1000_RCTL_BSEX | E1000_RCTL_SZ_16384:
360 return 16384;
361 case E1000_RCTL_BSEX | E1000_RCTL_SZ_8192:
362 return 8192;
363 case E1000_RCTL_BSEX | E1000_RCTL_SZ_4096:
364 return 4096;
365 case E1000_RCTL_SZ_1024:
366 return 1024;
367 case E1000_RCTL_SZ_512:
368 return 512;
369 case E1000_RCTL_SZ_256:
370 return 256;
371 }
372 return 2048;
373 }
374
375 static void e1000_reset(void *opaque)
376 {
377 E1000State *d = opaque;
378 uint8_t *macaddr = d->conf.macaddr.a;
379 int i;
380
381 timer_del(d->autoneg_timer);
382 timer_del(d->mit_timer);
383 d->mit_timer_on = 0;
384 d->mit_irq_level = 0;
385 d->mit_ide = 0;
386 memset(d->phy_reg, 0, sizeof d->phy_reg);
387 memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
388 memset(d->mac_reg, 0, sizeof d->mac_reg);
389 memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
390 d->rxbuf_min_shift = 1;
391 memset(&d->tx, 0, sizeof d->tx);
392
393 if (qemu_get_queue(d->nic)->link_down) {
394 e1000_link_down(d);
395 }
396
397 /* Some guests expect pre-initialized RAH/RAL (AddrValid flag + MACaddr) */
398 d->mac_reg[RA] = 0;
399 d->mac_reg[RA + 1] = E1000_RAH_AV;
400 for (i = 0; i < 4; i++) {
401 d->mac_reg[RA] |= macaddr[i] << (8 * i);
402 d->mac_reg[RA + 1] |= (i < 2) ? macaddr[i + 4] << (8 * i) : 0;
403 }
404 qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
405 }
406
407 static void
408 set_ctrl(E1000State *s, int index, uint32_t val)
409 {
410 /* RST is self clearing */
411 s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
412 }
413
414 static void
415 set_rx_control(E1000State *s, int index, uint32_t val)
416 {
417 s->mac_reg[RCTL] = val;
418 s->rxbuf_size = rxbufsize(val);
419 s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
420 DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
421 s->mac_reg[RCTL]);
422 qemu_flush_queued_packets(qemu_get_queue(s->nic));
423 }
424
425 static void
426 set_mdic(E1000State *s, int index, uint32_t val)
427 {
428 uint32_t data = val & E1000_MDIC_DATA_MASK;
429 uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
430
431 if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
432 val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
433 else if (val & E1000_MDIC_OP_READ) {
434 DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
435 if (!(phy_regcap[addr] & PHY_R)) {
436 DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
437 val |= E1000_MDIC_ERROR;
438 } else
439 val = (val ^ data) | s->phy_reg[addr];
440 } else if (val & E1000_MDIC_OP_WRITE) {
441 DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
442 if (!(phy_regcap[addr] & PHY_W)) {
443 DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
444 val |= E1000_MDIC_ERROR;
445 } else {
446 if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
447 phyreg_writeops[addr](s, index, data);
448 }
449 s->phy_reg[addr] = data;
450 }
451 }
452 s->mac_reg[MDIC] = val | E1000_MDIC_READY;
453
454 if (val & E1000_MDIC_INT_EN) {
455 set_ics(s, 0, E1000_ICR_MDAC);
456 }
457 }
458
459 static uint32_t
460 get_eecd(E1000State *s, int index)
461 {
462 uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
463
464 DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
465 s->eecd_state.bitnum_out, s->eecd_state.reading);
466 if (!s->eecd_state.reading ||
467 ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
468 ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
469 ret |= E1000_EECD_DO;
470 return ret;
471 }
472
473 static void
474 set_eecd(E1000State *s, int index, uint32_t val)
475 {
476 uint32_t oldval = s->eecd_state.old_eecd;
477
478 s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
479 E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
480 if (!(E1000_EECD_CS & val)) // CS inactive; nothing to do
481 return;
482 if (E1000_EECD_CS & (val ^ oldval)) { // CS rise edge; reset state
483 s->eecd_state.val_in = 0;
484 s->eecd_state.bitnum_in = 0;
485 s->eecd_state.bitnum_out = 0;
486 s->eecd_state.reading = 0;
487 }
488 if (!(E1000_EECD_SK & (val ^ oldval))) // no clock edge
489 return;
490 if (!(E1000_EECD_SK & val)) { // falling edge
491 s->eecd_state.bitnum_out++;
492 return;
493 }
494 s->eecd_state.val_in <<= 1;
495 if (val & E1000_EECD_DI)
496 s->eecd_state.val_in |= 1;
497 if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
498 s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
499 s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
500 EEPROM_READ_OPCODE_MICROWIRE);
501 }
502 DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
503 s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
504 s->eecd_state.reading);
505 }
506
507 static uint32_t
508 flash_eerd_read(E1000State *s, int x)
509 {
510 unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
511
512 if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
513 return (s->mac_reg[EERD]);
514
515 if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
516 return (E1000_EEPROM_RW_REG_DONE | r);
517
518 return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
519 E1000_EEPROM_RW_REG_DONE | r);
520 }
521
522 static void
523 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
524 {
525 uint32_t sum;
526
527 if (cse && cse < n)
528 n = cse + 1;
529 if (sloc < n-1) {
530 sum = net_checksum_add(n-css, data+css);
531 stw_be_p(data + sloc, net_checksum_finish(sum));
532 }
533 }
534
535 static inline int
536 vlan_enabled(E1000State *s)
537 {
538 return ((s->mac_reg[CTRL] & E1000_CTRL_VME) != 0);
539 }
540
541 static inline int
542 vlan_rx_filter_enabled(E1000State *s)
543 {
544 return ((s->mac_reg[RCTL] & E1000_RCTL_VFE) != 0);
545 }
546
547 static inline int
548 is_vlan_packet(E1000State *s, const uint8_t *buf)
549 {
550 return (be16_to_cpup((uint16_t *)(buf + 12)) ==
551 le16_to_cpup((uint16_t *)(s->mac_reg + VET)));
552 }
553
554 static inline int
555 is_vlan_txd(uint32_t txd_lower)
556 {
557 return ((txd_lower & E1000_TXD_CMD_VLE) != 0);
558 }
559
560 /* FCS aka Ethernet CRC-32. We don't get it from backends and can't
561 * fill it in, just pad descriptor length by 4 bytes unless guest
562 * told us to strip it off the packet. */
563 static inline int
564 fcs_len(E1000State *s)
565 {
566 return (s->mac_reg[RCTL] & E1000_RCTL_SECRC) ? 0 : 4;
567 }
568
569 static void
570 e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
571 {
572 NetClientState *nc = qemu_get_queue(s->nic);
573 if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
574 nc->info->receive(nc, buf, size);
575 } else {
576 qemu_send_packet(nc, buf, size);
577 }
578 }
579
580 static void
581 xmit_seg(E1000State *s)
582 {
583 uint16_t len, *sp;
584 unsigned int frames = s->tx.tso_frames, css, sofar, n;
585 struct e1000_tx *tp = &s->tx;
586
587 if (tp->tse && tp->cptse) {
588 css = tp->ipcss;
589 DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
590 frames, tp->size, css);
591 if (tp->ip) { // IPv4
592 stw_be_p(tp->data+css+2, tp->size - css);
593 stw_be_p(tp->data+css+4,
594 be16_to_cpup((uint16_t *)(tp->data+css+4))+frames);
595 } else // IPv6
596 stw_be_p(tp->data+css+4, tp->size - css);
597 css = tp->tucss;
598 len = tp->size - css;
599 DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", tp->tcp, css, len);
600 if (tp->tcp) {
601 sofar = frames * tp->mss;
602 cpu_to_be32wu((uint32_t *)(tp->data+css+4), // seq
603 ldl_be_p(tp->data+css+4)+sofar);
604 if (tp->paylen - sofar > tp->mss)
605 tp->data[css + 13] &= ~9; // PSH, FIN
606 } else // UDP
607 stw_be_p(tp->data+css+4, len);
608 if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
609 unsigned int phsum;
610 // add pseudo-header length before checksum calculation
611 sp = (uint16_t *)(tp->data + tp->tucso);
612 phsum = be16_to_cpup(sp) + len;
613 phsum = (phsum >> 16) + (phsum & 0xffff);
614 stw_be_p(sp, phsum);
615 }
616 tp->tso_frames++;
617 }
618
619 if (tp->sum_needed & E1000_TXD_POPTS_TXSM)
620 putsum(tp->data, tp->size, tp->tucso, tp->tucss, tp->tucse);
621 if (tp->sum_needed & E1000_TXD_POPTS_IXSM)
622 putsum(tp->data, tp->size, tp->ipcso, tp->ipcss, tp->ipcse);
623 if (tp->vlan_needed) {
624 memmove(tp->vlan, tp->data, 4);
625 memmove(tp->data, tp->data + 4, 8);
626 memcpy(tp->data + 8, tp->vlan_header, 4);
627 e1000_send_packet(s, tp->vlan, tp->size + 4);
628 } else
629 e1000_send_packet(s, tp->data, tp->size);
630 s->mac_reg[TPT]++;
631 s->mac_reg[GPTC]++;
632 n = s->mac_reg[TOTL];
633 if ((s->mac_reg[TOTL] += s->tx.size) < n)
634 s->mac_reg[TOTH]++;
635 }
636
637 static void
638 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
639 {
640 PCIDevice *d = PCI_DEVICE(s);
641 uint32_t txd_lower = le32_to_cpu(dp->lower.data);
642 uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
643 unsigned int split_size = txd_lower & 0xffff, bytes, sz, op;
644 unsigned int msh = 0xfffff;
645 uint64_t addr;
646 struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
647 struct e1000_tx *tp = &s->tx;
648
649 s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
650 if (dtype == E1000_TXD_CMD_DEXT) { // context descriptor
651 op = le32_to_cpu(xp->cmd_and_length);
652 tp->ipcss = xp->lower_setup.ip_fields.ipcss;
653 tp->ipcso = xp->lower_setup.ip_fields.ipcso;
654 tp->ipcse = le16_to_cpu(xp->lower_setup.ip_fields.ipcse);
655 tp->tucss = xp->upper_setup.tcp_fields.tucss;
656 tp->tucso = xp->upper_setup.tcp_fields.tucso;
657 tp->tucse = le16_to_cpu(xp->upper_setup.tcp_fields.tucse);
658 tp->paylen = op & 0xfffff;
659 tp->hdr_len = xp->tcp_seg_setup.fields.hdr_len;
660 tp->mss = le16_to_cpu(xp->tcp_seg_setup.fields.mss);
661 tp->ip = (op & E1000_TXD_CMD_IP) ? 1 : 0;
662 tp->tcp = (op & E1000_TXD_CMD_TCP) ? 1 : 0;
663 tp->tse = (op & E1000_TXD_CMD_TSE) ? 1 : 0;
664 tp->tso_frames = 0;
665 if (tp->tucso == 0) { // this is probably wrong
666 DBGOUT(TXSUM, "TCP/UDP: cso 0!\n");
667 tp->tucso = tp->tucss + (tp->tcp ? 16 : 6);
668 }
669 return;
670 } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
671 // data descriptor
672 if (tp->size == 0) {
673 tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
674 }
675 tp->cptse = ( txd_lower & E1000_TXD_CMD_TSE ) ? 1 : 0;
676 } else {
677 // legacy descriptor
678 tp->cptse = 0;
679 }
680
681 if (vlan_enabled(s) && is_vlan_txd(txd_lower) &&
682 (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
683 tp->vlan_needed = 1;
684 stw_be_p(tp->vlan_header,
685 le16_to_cpup((uint16_t *)(s->mac_reg + VET)));
686 stw_be_p(tp->vlan_header + 2,
687 le16_to_cpu(dp->upper.fields.special));
688 }
689
690 addr = le64_to_cpu(dp->buffer_addr);
691 if (tp->tse && tp->cptse) {
692 msh = tp->hdr_len + tp->mss;
693 do {
694 bytes = split_size;
695 if (tp->size + bytes > msh)
696 bytes = msh - tp->size;
697
698 bytes = MIN(sizeof(tp->data) - tp->size, bytes);
699 pci_dma_read(d, addr, tp->data + tp->size, bytes);
700 sz = tp->size + bytes;
701 if (sz >= tp->hdr_len && tp->size < tp->hdr_len) {
702 memmove(tp->header, tp->data, tp->hdr_len);
703 }
704 tp->size = sz;
705 addr += bytes;
706 if (sz == msh) {
707 xmit_seg(s);
708 memmove(tp->data, tp->header, tp->hdr_len);
709 tp->size = tp->hdr_len;
710 }
711 } while (split_size -= bytes);
712 } else if (!tp->tse && tp->cptse) {
713 // context descriptor TSE is not set, while data descriptor TSE is set
714 DBGOUT(TXERR, "TCP segmentation error\n");
715 } else {
716 split_size = MIN(sizeof(tp->data) - tp->size, split_size);
717 pci_dma_read(d, addr, tp->data + tp->size, split_size);
718 tp->size += split_size;
719 }
720
721 if (!(txd_lower & E1000_TXD_CMD_EOP))
722 return;
723 if (!(tp->tse && tp->cptse && tp->size < tp->hdr_len)) {
724 xmit_seg(s);
725 }
726 tp->tso_frames = 0;
727 tp->sum_needed = 0;
728 tp->vlan_needed = 0;
729 tp->size = 0;
730 tp->cptse = 0;
731 }
732
733 static uint32_t
734 txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
735 {
736 PCIDevice *d = PCI_DEVICE(s);
737 uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
738
739 if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
740 return 0;
741 txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
742 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
743 dp->upper.data = cpu_to_le32(txd_upper);
744 pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
745 &dp->upper, sizeof(dp->upper));
746 return E1000_ICR_TXDW;
747 }
748
749 static uint64_t tx_desc_base(E1000State *s)
750 {
751 uint64_t bah = s->mac_reg[TDBAH];
752 uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
753
754 return (bah << 32) + bal;
755 }
756
757 static void
758 start_xmit(E1000State *s)
759 {
760 PCIDevice *d = PCI_DEVICE(s);
761 dma_addr_t base;
762 struct e1000_tx_desc desc;
763 uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
764
765 if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
766 DBGOUT(TX, "tx disabled\n");
767 return;
768 }
769
770 while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
771 base = tx_desc_base(s) +
772 sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
773 pci_dma_read(d, base, &desc, sizeof(desc));
774
775 DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
776 (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
777 desc.upper.data);
778
779 process_tx_desc(s, &desc);
780 cause |= txdesc_writeback(s, base, &desc);
781
782 if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
783 s->mac_reg[TDH] = 0;
784 /*
785 * the following could happen only if guest sw assigns
786 * bogus values to TDT/TDLEN.
787 * there's nothing too intelligent we could do about this.
788 */
789 if (s->mac_reg[TDH] == tdh_start) {
790 DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
791 tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
792 break;
793 }
794 }
795 set_ics(s, 0, cause);
796 }
797
798 static int
799 receive_filter(E1000State *s, const uint8_t *buf, int size)
800 {
801 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
802 static const int mta_shift[] = {4, 3, 2, 0};
803 uint32_t f, rctl = s->mac_reg[RCTL], ra[2], *rp;
804
805 if (is_vlan_packet(s, buf) && vlan_rx_filter_enabled(s)) {
806 uint16_t vid = be16_to_cpup((uint16_t *)(buf + 14));
807 uint32_t vfta = le32_to_cpup((uint32_t *)(s->mac_reg + VFTA) +
808 ((vid >> 5) & 0x7f));
809 if ((vfta & (1 << (vid & 0x1f))) == 0)
810 return 0;
811 }
812
813 if (rctl & E1000_RCTL_UPE) // promiscuous
814 return 1;
815
816 if ((buf[0] & 1) && (rctl & E1000_RCTL_MPE)) // promiscuous mcast
817 return 1;
818
819 if ((rctl & E1000_RCTL_BAM) && !memcmp(buf, bcast, sizeof bcast))
820 return 1;
821
822 for (rp = s->mac_reg + RA; rp < s->mac_reg + RA + 32; rp += 2) {
823 if (!(rp[1] & E1000_RAH_AV))
824 continue;
825 ra[0] = cpu_to_le32(rp[0]);
826 ra[1] = cpu_to_le32(rp[1]);
827 if (!memcmp(buf, (uint8_t *)ra, 6)) {
828 DBGOUT(RXFILTER,
829 "unicast match[%d]: %02x:%02x:%02x:%02x:%02x:%02x\n",
830 (int)(rp - s->mac_reg - RA)/2,
831 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
832 return 1;
833 }
834 }
835 DBGOUT(RXFILTER, "unicast mismatch: %02x:%02x:%02x:%02x:%02x:%02x\n",
836 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
837
838 f = mta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3];
839 f = (((buf[5] << 8) | buf[4]) >> f) & 0xfff;
840 if (s->mac_reg[MTA + (f >> 5)] & (1 << (f & 0x1f)))
841 return 1;
842 DBGOUT(RXFILTER,
843 "dropping, inexact filter mismatch: %02x:%02x:%02x:%02x:%02x:%02x MO %d MTA[%d] %x\n",
844 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5],
845 (rctl >> E1000_RCTL_MO_SHIFT) & 3, f >> 5,
846 s->mac_reg[MTA + (f >> 5)]);
847
848 return 0;
849 }
850
851 static void
852 e1000_set_link_status(NetClientState *nc)
853 {
854 E1000State *s = qemu_get_nic_opaque(nc);
855 uint32_t old_status = s->mac_reg[STATUS];
856
857 if (nc->link_down) {
858 e1000_link_down(s);
859 } else {
860 e1000_link_up(s);
861 }
862
863 if (s->mac_reg[STATUS] != old_status)
864 set_ics(s, 0, E1000_ICR_LSC);
865 }
866
867 static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
868 {
869 int bufs;
870 /* Fast-path short packets */
871 if (total_size <= s->rxbuf_size) {
872 return s->mac_reg[RDH] != s->mac_reg[RDT];
873 }
874 if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
875 bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
876 } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
877 bufs = s->mac_reg[RDLEN] / sizeof(struct e1000_rx_desc) +
878 s->mac_reg[RDT] - s->mac_reg[RDH];
879 } else {
880 return false;
881 }
882 return total_size <= bufs * s->rxbuf_size;
883 }
884
885 static int
886 e1000_can_receive(NetClientState *nc)
887 {
888 E1000State *s = qemu_get_nic_opaque(nc);
889
890 return (s->mac_reg[STATUS] & E1000_STATUS_LU) &&
891 (s->mac_reg[RCTL] & E1000_RCTL_EN) && e1000_has_rxbufs(s, 1);
892 }
893
894 static uint64_t rx_desc_base(E1000State *s)
895 {
896 uint64_t bah = s->mac_reg[RDBAH];
897 uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
898
899 return (bah << 32) + bal;
900 }
901
902 static ssize_t
903 e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
904 {
905 E1000State *s = qemu_get_nic_opaque(nc);
906 PCIDevice *d = PCI_DEVICE(s);
907 struct e1000_rx_desc desc;
908 dma_addr_t base;
909 unsigned int n, rdt;
910 uint32_t rdh_start;
911 uint16_t vlan_special = 0;
912 uint8_t vlan_status = 0;
913 uint8_t min_buf[MIN_BUF_SIZE];
914 struct iovec min_iov;
915 uint8_t *filter_buf = iov->iov_base;
916 size_t size = iov_size(iov, iovcnt);
917 size_t iov_ofs = 0;
918 size_t desc_offset;
919 size_t desc_size;
920 size_t total_size;
921
922 if (!(s->mac_reg[STATUS] & E1000_STATUS_LU)) {
923 return -1;
924 }
925
926 if (!(s->mac_reg[RCTL] & E1000_RCTL_EN)) {
927 return -1;
928 }
929
930 /* Pad to minimum Ethernet frame length */
931 if (size < sizeof(min_buf)) {
932 iov_to_buf(iov, iovcnt, 0, min_buf, size);
933 memset(&min_buf[size], 0, sizeof(min_buf) - size);
934 min_iov.iov_base = filter_buf = min_buf;
935 min_iov.iov_len = size = sizeof(min_buf);
936 iovcnt = 1;
937 iov = &min_iov;
938 } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
939 /* This is very unlikely, but may happen. */
940 iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
941 filter_buf = min_buf;
942 }
943
944 /* Discard oversized packets if !LPE and !SBP. */
945 if ((size > MAXIMUM_ETHERNET_LPE_SIZE ||
946 (size > MAXIMUM_ETHERNET_VLAN_SIZE
947 && !(s->mac_reg[RCTL] & E1000_RCTL_LPE)))
948 && !(s->mac_reg[RCTL] & E1000_RCTL_SBP)) {
949 return size;
950 }
951
952 if (!receive_filter(s, filter_buf, size)) {
953 return size;
954 }
955
956 if (vlan_enabled(s) && is_vlan_packet(s, filter_buf)) {
957 vlan_special = cpu_to_le16(be16_to_cpup((uint16_t *)(filter_buf
958 + 14)));
959 iov_ofs = 4;
960 if (filter_buf == iov->iov_base) {
961 memmove(filter_buf + 4, filter_buf, 12);
962 } else {
963 iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
964 while (iov->iov_len <= iov_ofs) {
965 iov_ofs -= iov->iov_len;
966 iov++;
967 }
968 }
969 vlan_status = E1000_RXD_STAT_VP;
970 size -= 4;
971 }
972
973 rdh_start = s->mac_reg[RDH];
974 desc_offset = 0;
975 total_size = size + fcs_len(s);
976 if (!e1000_has_rxbufs(s, total_size)) {
977 set_ics(s, 0, E1000_ICS_RXO);
978 return -1;
979 }
980 do {
981 desc_size = total_size - desc_offset;
982 if (desc_size > s->rxbuf_size) {
983 desc_size = s->rxbuf_size;
984 }
985 base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
986 pci_dma_read(d, base, &desc, sizeof(desc));
987 desc.special = vlan_special;
988 desc.status |= (vlan_status | E1000_RXD_STAT_DD);
989 if (desc.buffer_addr) {
990 if (desc_offset < size) {
991 size_t iov_copy;
992 hwaddr ba = le64_to_cpu(desc.buffer_addr);
993 size_t copy_size = size - desc_offset;
994 if (copy_size > s->rxbuf_size) {
995 copy_size = s->rxbuf_size;
996 }
997 do {
998 iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
999 pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
1000 copy_size -= iov_copy;
1001 ba += iov_copy;
1002 iov_ofs += iov_copy;
1003 if (iov_ofs == iov->iov_len) {
1004 iov++;
1005 iov_ofs = 0;
1006 }
1007 } while (copy_size);
1008 }
1009 desc_offset += desc_size;
1010 desc.length = cpu_to_le16(desc_size);
1011 if (desc_offset >= total_size) {
1012 desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
1013 } else {
1014 /* Guest zeroing out status is not a hardware requirement.
1015 Clear EOP in case guest didn't do it. */
1016 desc.status &= ~E1000_RXD_STAT_EOP;
1017 }
1018 } else { // as per intel docs; skip descriptors with null buf addr
1019 DBGOUT(RX, "Null RX descriptor!!\n");
1020 }
1021 pci_dma_write(d, base, &desc, sizeof(desc));
1022
1023 if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
1024 s->mac_reg[RDH] = 0;
1025 /* see comment in start_xmit; same here */
1026 if (s->mac_reg[RDH] == rdh_start) {
1027 DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
1028 rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
1029 set_ics(s, 0, E1000_ICS_RXO);
1030 return -1;
1031 }
1032 } while (desc_offset < total_size);
1033
1034 s->mac_reg[GPRC]++;
1035 s->mac_reg[TPR]++;
1036 /* TOR - Total Octets Received:
1037 * This register includes bytes received in a packet from the <Destination
1038 * Address> field through the <CRC> field, inclusively.
1039 */
1040 n = s->mac_reg[TORL] + size + /* Always include FCS length. */ 4;
1041 if (n < s->mac_reg[TORL])
1042 s->mac_reg[TORH]++;
1043 s->mac_reg[TORL] = n;
1044
1045 n = E1000_ICS_RXT0;
1046 if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
1047 rdt += s->mac_reg[RDLEN] / sizeof(desc);
1048 if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
1049 s->rxbuf_min_shift)
1050 n |= E1000_ICS_RXDMT0;
1051
1052 set_ics(s, 0, n);
1053
1054 return size;
1055 }
1056
1057 static ssize_t
1058 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1059 {
1060 const struct iovec iov = {
1061 .iov_base = (uint8_t *)buf,
1062 .iov_len = size
1063 };
1064
1065 return e1000_receive_iov(nc, &iov, 1);
1066 }
1067
1068 static uint32_t
1069 mac_readreg(E1000State *s, int index)
1070 {
1071 return s->mac_reg[index];
1072 }
1073
1074 static uint32_t
1075 mac_icr_read(E1000State *s, int index)
1076 {
1077 uint32_t ret = s->mac_reg[ICR];
1078
1079 DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1080 set_interrupt_cause(s, 0, 0);
1081 return ret;
1082 }
1083
1084 static uint32_t
1085 mac_read_clr4(E1000State *s, int index)
1086 {
1087 uint32_t ret = s->mac_reg[index];
1088
1089 s->mac_reg[index] = 0;
1090 return ret;
1091 }
1092
1093 static uint32_t
1094 mac_read_clr8(E1000State *s, int index)
1095 {
1096 uint32_t ret = s->mac_reg[index];
1097
1098 s->mac_reg[index] = 0;
1099 s->mac_reg[index-1] = 0;
1100 return ret;
1101 }
1102
1103 static void
1104 mac_writereg(E1000State *s, int index, uint32_t val)
1105 {
1106 uint32_t macaddr[2];
1107
1108 s->mac_reg[index] = val;
1109
1110 if (index == RA + 1) {
1111 macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
1112 macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
1113 qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
1114 }
1115 }
1116
1117 static void
1118 set_rdt(E1000State *s, int index, uint32_t val)
1119 {
1120 s->mac_reg[index] = val & 0xffff;
1121 if (e1000_has_rxbufs(s, 1)) {
1122 qemu_flush_queued_packets(qemu_get_queue(s->nic));
1123 }
1124 }
1125
1126 static void
1127 set_16bit(E1000State *s, int index, uint32_t val)
1128 {
1129 s->mac_reg[index] = val & 0xffff;
1130 }
1131
1132 static void
1133 set_dlen(E1000State *s, int index, uint32_t val)
1134 {
1135 s->mac_reg[index] = val & 0xfff80;
1136 }
1137
1138 static void
1139 set_tctl(E1000State *s, int index, uint32_t val)
1140 {
1141 s->mac_reg[index] = val;
1142 s->mac_reg[TDT] &= 0xffff;
1143 start_xmit(s);
1144 }
1145
1146 static void
1147 set_icr(E1000State *s, int index, uint32_t val)
1148 {
1149 DBGOUT(INTERRUPT, "set_icr %x\n", val);
1150 set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1151 }
1152
1153 static void
1154 set_imc(E1000State *s, int index, uint32_t val)
1155 {
1156 s->mac_reg[IMS] &= ~val;
1157 set_ics(s, 0, 0);
1158 }
1159
1160 static void
1161 set_ims(E1000State *s, int index, uint32_t val)
1162 {
1163 s->mac_reg[IMS] |= val;
1164 set_ics(s, 0, 0);
1165 }
1166
1167 #define getreg(x) [x] = mac_readreg
1168 static uint32_t (*macreg_readops[])(E1000State *, int) = {
1169 getreg(PBA), getreg(RCTL), getreg(TDH), getreg(TXDCTL),
1170 getreg(WUFC), getreg(TDT), getreg(CTRL), getreg(LEDCTL),
1171 getreg(MANC), getreg(MDIC), getreg(SWSM), getreg(STATUS),
1172 getreg(TORL), getreg(TOTL), getreg(IMS), getreg(TCTL),
1173 getreg(RDH), getreg(RDT), getreg(VET), getreg(ICS),
1174 getreg(TDBAL), getreg(TDBAH), getreg(RDBAH), getreg(RDBAL),
1175 getreg(TDLEN), getreg(RDLEN), getreg(RDTR), getreg(RADV),
1176 getreg(TADV), getreg(ITR),
1177
1178 [TOTH] = mac_read_clr8, [TORH] = mac_read_clr8, [GPRC] = mac_read_clr4,
1179 [GPTC] = mac_read_clr4, [TPR] = mac_read_clr4, [TPT] = mac_read_clr4,
1180 [ICR] = mac_icr_read, [EECD] = get_eecd, [EERD] = flash_eerd_read,
1181 [CRCERRS ... MPC] = &mac_readreg,
1182 [RA ... RA+31] = &mac_readreg,
1183 [MTA ... MTA+127] = &mac_readreg,
1184 [VFTA ... VFTA+127] = &mac_readreg,
1185 };
1186 enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1187
1188 #define putreg(x) [x] = mac_writereg
1189 static void (*macreg_writeops[])(E1000State *, int, uint32_t) = {
1190 putreg(PBA), putreg(EERD), putreg(SWSM), putreg(WUFC),
1191 putreg(TDBAL), putreg(TDBAH), putreg(TXDCTL), putreg(RDBAH),
1192 putreg(RDBAL), putreg(LEDCTL), putreg(VET),
1193 [TDLEN] = set_dlen, [RDLEN] = set_dlen, [TCTL] = set_tctl,
1194 [TDT] = set_tctl, [MDIC] = set_mdic, [ICS] = set_ics,
1195 [TDH] = set_16bit, [RDH] = set_16bit, [RDT] = set_rdt,
1196 [IMC] = set_imc, [IMS] = set_ims, [ICR] = set_icr,
1197 [EECD] = set_eecd, [RCTL] = set_rx_control, [CTRL] = set_ctrl,
1198 [RDTR] = set_16bit, [RADV] = set_16bit, [TADV] = set_16bit,
1199 [ITR] = set_16bit,
1200 [RA ... RA+31] = &mac_writereg,
1201 [MTA ... MTA+127] = &mac_writereg,
1202 [VFTA ... VFTA+127] = &mac_writereg,
1203 };
1204
1205 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1206
1207 static void
1208 e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1209 unsigned size)
1210 {
1211 E1000State *s = opaque;
1212 unsigned int index = (addr & 0x1ffff) >> 2;
1213
1214 if (index < NWRITEOPS && macreg_writeops[index]) {
1215 macreg_writeops[index](s, index, val);
1216 } else if (index < NREADOPS && macreg_readops[index]) {
1217 DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n", index<<2, val);
1218 } else {
1219 DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1220 index<<2, val);
1221 }
1222 }
1223
1224 static uint64_t
1225 e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1226 {
1227 E1000State *s = opaque;
1228 unsigned int index = (addr & 0x1ffff) >> 2;
1229
1230 if (index < NREADOPS && macreg_readops[index])
1231 {
1232 return macreg_readops[index](s, index);
1233 }
1234 DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1235 return 0;
1236 }
1237
1238 static const MemoryRegionOps e1000_mmio_ops = {
1239 .read = e1000_mmio_read,
1240 .write = e1000_mmio_write,
1241 .endianness = DEVICE_LITTLE_ENDIAN,
1242 .impl = {
1243 .min_access_size = 4,
1244 .max_access_size = 4,
1245 },
1246 };
1247
1248 static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1249 unsigned size)
1250 {
1251 E1000State *s = opaque;
1252
1253 (void)s;
1254 return 0;
1255 }
1256
1257 static void e1000_io_write(void *opaque, hwaddr addr,
1258 uint64_t val, unsigned size)
1259 {
1260 E1000State *s = opaque;
1261
1262 (void)s;
1263 }
1264
1265 static const MemoryRegionOps e1000_io_ops = {
1266 .read = e1000_io_read,
1267 .write = e1000_io_write,
1268 .endianness = DEVICE_LITTLE_ENDIAN,
1269 };
1270
1271 static bool is_version_1(void *opaque, int version_id)
1272 {
1273 return version_id == 1;
1274 }
1275
1276 static void e1000_pre_save(void *opaque)
1277 {
1278 E1000State *s = opaque;
1279 NetClientState *nc = qemu_get_queue(s->nic);
1280
1281 /* If the mitigation timer is active, emulate a timeout now. */
1282 if (s->mit_timer_on) {
1283 e1000_mit_timer(s);
1284 }
1285
1286 if (!(s->compat_flags & E1000_FLAG_AUTONEG)) {
1287 return;
1288 }
1289
1290 /*
1291 * If link is down and auto-negotiation is ongoing, complete
1292 * auto-negotiation immediately. This allows is to look at
1293 * MII_SR_AUTONEG_COMPLETE to infer link status on load.
1294 */
1295 if (nc->link_down &&
1296 s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN &&
1297 s->phy_reg[PHY_CTRL] & MII_CR_RESTART_AUTO_NEG) {
1298 s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
1299 }
1300 }
1301
1302 static int e1000_post_load(void *opaque, int version_id)
1303 {
1304 E1000State *s = opaque;
1305 NetClientState *nc = qemu_get_queue(s->nic);
1306
1307 if (!(s->compat_flags & E1000_FLAG_MIT)) {
1308 s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1309 s->mac_reg[TADV] = 0;
1310 s->mit_irq_level = false;
1311 }
1312 s->mit_ide = 0;
1313 s->mit_timer_on = false;
1314
1315 /* nc.link_down can't be migrated, so infer link_down according
1316 * to link status bit in mac_reg[STATUS].
1317 * Alternatively, restart link negotiation if it was in progress. */
1318 nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1319
1320 if (!(s->compat_flags & E1000_FLAG_AUTONEG)) {
1321 return 0;
1322 }
1323
1324 if (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN &&
1325 s->phy_reg[PHY_CTRL] & MII_CR_RESTART_AUTO_NEG &&
1326 !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1327 nc->link_down = false;
1328 timer_mod(s->autoneg_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1329 }
1330
1331 return 0;
1332 }
1333
1334 static bool e1000_mit_state_needed(void *opaque)
1335 {
1336 E1000State *s = opaque;
1337
1338 return s->compat_flags & E1000_FLAG_MIT;
1339 }
1340
1341 static const VMStateDescription vmstate_e1000_mit_state = {
1342 .name = "e1000/mit_state",
1343 .version_id = 1,
1344 .minimum_version_id = 1,
1345 .minimum_version_id_old = 1,
1346 .fields = (VMStateField[]) {
1347 VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1348 VMSTATE_UINT32(mac_reg[RADV], E1000State),
1349 VMSTATE_UINT32(mac_reg[TADV], E1000State),
1350 VMSTATE_UINT32(mac_reg[ITR], E1000State),
1351 VMSTATE_BOOL(mit_irq_level, E1000State),
1352 VMSTATE_END_OF_LIST()
1353 }
1354 };
1355
1356 static const VMStateDescription vmstate_e1000 = {
1357 .name = "e1000",
1358 .version_id = 2,
1359 .minimum_version_id = 1,
1360 .minimum_version_id_old = 1,
1361 .pre_save = e1000_pre_save,
1362 .post_load = e1000_post_load,
1363 .fields = (VMStateField []) {
1364 VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1365 VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1366 VMSTATE_UNUSED(4), /* Was mmio_base. */
1367 VMSTATE_UINT32(rxbuf_size, E1000State),
1368 VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1369 VMSTATE_UINT32(eecd_state.val_in, E1000State),
1370 VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1371 VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1372 VMSTATE_UINT16(eecd_state.reading, E1000State),
1373 VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1374 VMSTATE_UINT8(tx.ipcss, E1000State),
1375 VMSTATE_UINT8(tx.ipcso, E1000State),
1376 VMSTATE_UINT16(tx.ipcse, E1000State),
1377 VMSTATE_UINT8(tx.tucss, E1000State),
1378 VMSTATE_UINT8(tx.tucso, E1000State),
1379 VMSTATE_UINT16(tx.tucse, E1000State),
1380 VMSTATE_UINT32(tx.paylen, E1000State),
1381 VMSTATE_UINT8(tx.hdr_len, E1000State),
1382 VMSTATE_UINT16(tx.mss, E1000State),
1383 VMSTATE_UINT16(tx.size, E1000State),
1384 VMSTATE_UINT16(tx.tso_frames, E1000State),
1385 VMSTATE_UINT8(tx.sum_needed, E1000State),
1386 VMSTATE_INT8(tx.ip, E1000State),
1387 VMSTATE_INT8(tx.tcp, E1000State),
1388 VMSTATE_BUFFER(tx.header, E1000State),
1389 VMSTATE_BUFFER(tx.data, E1000State),
1390 VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1391 VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1392 VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1393 VMSTATE_UINT32(mac_reg[EECD], E1000State),
1394 VMSTATE_UINT32(mac_reg[EERD], E1000State),
1395 VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1396 VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1397 VMSTATE_UINT32(mac_reg[ICR], E1000State),
1398 VMSTATE_UINT32(mac_reg[ICS], E1000State),
1399 VMSTATE_UINT32(mac_reg[IMC], E1000State),
1400 VMSTATE_UINT32(mac_reg[IMS], E1000State),
1401 VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1402 VMSTATE_UINT32(mac_reg[MANC], E1000State),
1403 VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1404 VMSTATE_UINT32(mac_reg[MPC], E1000State),
1405 VMSTATE_UINT32(mac_reg[PBA], E1000State),
1406 VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1407 VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1408 VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1409 VMSTATE_UINT32(mac_reg[RDH], E1000State),
1410 VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1411 VMSTATE_UINT32(mac_reg[RDT], E1000State),
1412 VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1413 VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1414 VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1415 VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1416 VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1417 VMSTATE_UINT32(mac_reg[TDH], E1000State),
1418 VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1419 VMSTATE_UINT32(mac_reg[TDT], E1000State),
1420 VMSTATE_UINT32(mac_reg[TORH], E1000State),
1421 VMSTATE_UINT32(mac_reg[TORL], E1000State),
1422 VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1423 VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1424 VMSTATE_UINT32(mac_reg[TPR], E1000State),
1425 VMSTATE_UINT32(mac_reg[TPT], E1000State),
1426 VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1427 VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1428 VMSTATE_UINT32(mac_reg[VET], E1000State),
1429 VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1430 VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1431 VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1432 VMSTATE_END_OF_LIST()
1433 },
1434 .subsections = (VMStateSubsection[]) {
1435 {
1436 .vmsd = &vmstate_e1000_mit_state,
1437 .needed = e1000_mit_state_needed,
1438 }, {
1439 /* empty */
1440 }
1441 }
1442 };
1443
1444 static const uint16_t e1000_eeprom_template[64] = {
1445 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0x0000, 0x0000, 0x0000,
1446 0x3000, 0x1000, 0x6403, E1000_DEVID, 0x8086, E1000_DEVID, 0x8086, 0x3040,
1447 0x0008, 0x2000, 0x7e14, 0x0048, 0x1000, 0x00d8, 0x0000, 0x2700,
1448 0x6cc9, 0x3150, 0x0722, 0x040b, 0x0984, 0x0000, 0xc000, 0x0706,
1449 0x1008, 0x0000, 0x0f04, 0x7fff, 0x4d01, 0xffff, 0xffff, 0xffff,
1450 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
1451 0x0100, 0x4000, 0x121c, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
1452 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000,
1453 };
1454
1455 /* PCI interface */
1456
1457 static void
1458 e1000_mmio_setup(E1000State *d)
1459 {
1460 int i;
1461 const uint32_t excluded_regs[] = {
1462 E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1463 E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1464 };
1465
1466 memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1467 "e1000-mmio", PNPMMIO_SIZE);
1468 memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1469 for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1470 memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1471 excluded_regs[i+1] - excluded_regs[i] - 4);
1472 memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1473 }
1474
1475 static void
1476 e1000_cleanup(NetClientState *nc)
1477 {
1478 E1000State *s = qemu_get_nic_opaque(nc);
1479
1480 s->nic = NULL;
1481 }
1482
1483 static void
1484 pci_e1000_uninit(PCIDevice *dev)
1485 {
1486 E1000State *d = E1000(dev);
1487
1488 timer_del(d->autoneg_timer);
1489 timer_free(d->autoneg_timer);
1490 timer_del(d->mit_timer);
1491 timer_free(d->mit_timer);
1492 memory_region_destroy(&d->mmio);
1493 memory_region_destroy(&d->io);
1494 qemu_del_nic(d->nic);
1495 }
1496
1497 static NetClientInfo net_e1000_info = {
1498 .type = NET_CLIENT_OPTIONS_KIND_NIC,
1499 .size = sizeof(NICState),
1500 .can_receive = e1000_can_receive,
1501 .receive = e1000_receive,
1502 .receive_iov = e1000_receive_iov,
1503 .cleanup = e1000_cleanup,
1504 .link_status_changed = e1000_set_link_status,
1505 };
1506
1507 static int pci_e1000_init(PCIDevice *pci_dev)
1508 {
1509 DeviceState *dev = DEVICE(pci_dev);
1510 E1000State *d = E1000(pci_dev);
1511 uint8_t *pci_conf;
1512 uint16_t checksum = 0;
1513 int i;
1514 uint8_t *macaddr;
1515
1516 pci_conf = pci_dev->config;
1517
1518 /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1519 pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1520
1521 pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1522
1523 e1000_mmio_setup(d);
1524
1525 pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1526
1527 pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1528
1529 memmove(d->eeprom_data, e1000_eeprom_template,
1530 sizeof e1000_eeprom_template);
1531 qemu_macaddr_default_if_unset(&d->conf.macaddr);
1532 macaddr = d->conf.macaddr.a;
1533 for (i = 0; i < 3; i++)
1534 d->eeprom_data[i] = (macaddr[2*i+1]<<8) | macaddr[2*i];
1535 for (i = 0; i < EEPROM_CHECKSUM_REG; i++)
1536 checksum += d->eeprom_data[i];
1537 checksum = (uint16_t) EEPROM_SUM - checksum;
1538 d->eeprom_data[EEPROM_CHECKSUM_REG] = checksum;
1539
1540 d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1541 object_get_typename(OBJECT(d)), dev->id, d);
1542
1543 qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1544
1545 add_boot_device_path(d->conf.bootindex, dev, "/ethernet-phy@0");
1546
1547 d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1548 d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1549
1550 return 0;
1551 }
1552
1553 static void qdev_e1000_reset(DeviceState *dev)
1554 {
1555 E1000State *d = E1000(dev);
1556 e1000_reset(d);
1557 }
1558
1559 static Property e1000_properties[] = {
1560 DEFINE_NIC_PROPERTIES(E1000State, conf),
1561 DEFINE_PROP_BIT("autonegotiation", E1000State,
1562 compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1563 DEFINE_PROP_BIT("mitigation", E1000State,
1564 compat_flags, E1000_FLAG_MIT_BIT, true),
1565 DEFINE_PROP_END_OF_LIST(),
1566 };
1567
1568 static void e1000_class_init(ObjectClass *klass, void *data)
1569 {
1570 DeviceClass *dc = DEVICE_CLASS(klass);
1571 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1572
1573 k->init = pci_e1000_init;
1574 k->exit = pci_e1000_uninit;
1575 k->romfile = "efi-e1000.rom";
1576 k->vendor_id = PCI_VENDOR_ID_INTEL;
1577 k->device_id = E1000_DEVID;
1578 k->revision = 0x03;
1579 k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1580 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1581 dc->desc = "Intel Gigabit Ethernet";
1582 dc->reset = qdev_e1000_reset;
1583 dc->vmsd = &vmstate_e1000;
1584 dc->props = e1000_properties;
1585 }
1586
1587 static const TypeInfo e1000_info = {
1588 .name = TYPE_E1000,
1589 .parent = TYPE_PCI_DEVICE,
1590 .instance_size = sizeof(E1000State),
1591 .class_init = e1000_class_init,
1592 };
1593
1594 static void e1000_register_types(void)
1595 {
1596 type_register_static(&e1000_info);
1597 }
1598
1599 type_init(e1000_register_types)