]> git.proxmox.com Git - mirror_qemu.git/blob - hw/net/e1000.c
Merge remote-tracking branch 'remotes/jasowang/tags/net-pull-request' into staging
[mirror_qemu.git] / hw / net / e1000.c
1 /*
2 * QEMU e1000 emulation
3 *
4 * Software developer's manual:
5 * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
6 *
7 * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
8 * Copyright (c) 2008 Qumranet
9 * Based on work done by:
10 * Copyright (c) 2007 Dan Aloni
11 * Copyright (c) 2004 Antony T Curtis
12 *
13 * This library is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU Lesser General Public
15 * License as published by the Free Software Foundation; either
16 * version 2 of the License, or (at your option) any later version.
17 *
18 * This library is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 * Lesser General Public License for more details.
22 *
23 * You should have received a copy of the GNU Lesser General Public
24 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
25 */
26
27
28 #include "hw/hw.h"
29 #include "hw/pci/pci.h"
30 #include "net/net.h"
31 #include "net/checksum.h"
32 #include "hw/loader.h"
33 #include "sysemu/sysemu.h"
34 #include "sysemu/dma.h"
35 #include "qemu/iov.h"
36 #include "qemu/range.h"
37
38 #include "e1000_regs.h"
39
40 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
41
42 #define E1000_DEBUG
43
44 #ifdef E1000_DEBUG
45 enum {
46 DEBUG_GENERAL, DEBUG_IO, DEBUG_MMIO, DEBUG_INTERRUPT,
47 DEBUG_RX, DEBUG_TX, DEBUG_MDIC, DEBUG_EEPROM,
48 DEBUG_UNKNOWN, DEBUG_TXSUM, DEBUG_TXERR, DEBUG_RXERR,
49 DEBUG_RXFILTER, DEBUG_PHY, DEBUG_NOTYET,
50 };
51 #define DBGBIT(x) (1<<DEBUG_##x)
52 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
53
54 #define DBGOUT(what, fmt, ...) do { \
55 if (debugflags & DBGBIT(what)) \
56 fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
57 } while (0)
58 #else
59 #define DBGOUT(what, fmt, ...) do {} while (0)
60 #endif
61
62 #define IOPORT_SIZE 0x40
63 #define PNPMMIO_SIZE 0x20000
64 #define MIN_BUF_SIZE 60 /* Min. octets in an ethernet frame sans FCS */
65
66 /* this is the size past which hardware will drop packets when setting LPE=0 */
67 #define MAXIMUM_ETHERNET_VLAN_SIZE 1522
68 /* this is the size past which hardware will drop packets when setting LPE=1 */
69 #define MAXIMUM_ETHERNET_LPE_SIZE 16384
70
71 #define MAXIMUM_ETHERNET_HDR_LEN (14+4)
72
73 /*
74 * HW models:
75 * E1000_DEV_ID_82540EM works with Windows, Linux, and OS X <= 10.8
76 * E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
77 * E1000_DEV_ID_82545EM_COPPER works with Linux and OS X >= 10.6
78 * Others never tested
79 */
80
81 typedef struct E1000State_st {
82 /*< private >*/
83 PCIDevice parent_obj;
84 /*< public >*/
85
86 NICState *nic;
87 NICConf conf;
88 MemoryRegion mmio;
89 MemoryRegion io;
90
91 uint32_t mac_reg[0x8000];
92 uint16_t phy_reg[0x20];
93 uint16_t eeprom_data[64];
94
95 uint32_t rxbuf_size;
96 uint32_t rxbuf_min_shift;
97 struct e1000_tx {
98 unsigned char header[256];
99 unsigned char vlan_header[4];
100 /* Fields vlan and data must not be reordered or separated. */
101 unsigned char vlan[4];
102 unsigned char data[0x10000];
103 uint16_t size;
104 unsigned char sum_needed;
105 unsigned char vlan_needed;
106 uint8_t ipcss;
107 uint8_t ipcso;
108 uint16_t ipcse;
109 uint8_t tucss;
110 uint8_t tucso;
111 uint16_t tucse;
112 uint8_t hdr_len;
113 uint16_t mss;
114 uint32_t paylen;
115 uint16_t tso_frames;
116 char tse;
117 int8_t ip;
118 int8_t tcp;
119 char cptse; // current packet tse bit
120 } tx;
121
122 struct {
123 uint32_t val_in; /* shifted in from guest driver */
124 uint16_t bitnum_in;
125 uint16_t bitnum_out;
126 uint16_t reading;
127 uint32_t old_eecd;
128 } eecd_state;
129
130 QEMUTimer *autoneg_timer;
131
132 QEMUTimer *mit_timer; /* Mitigation timer. */
133 bool mit_timer_on; /* Mitigation timer is running. */
134 bool mit_irq_level; /* Tracks interrupt pin level. */
135 uint32_t mit_ide; /* Tracks E1000_TXD_CMD_IDE bit. */
136
137 /* Compatibility flags for migration to/from qemu 1.3.0 and older */
138 #define E1000_FLAG_AUTONEG_BIT 0
139 #define E1000_FLAG_MIT_BIT 1
140 #define E1000_FLAG_MAC_BIT 2
141 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
142 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
143 #define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
144 uint32_t compat_flags;
145 } E1000State;
146
147 #define chkflag(x) (s->compat_flags & E1000_FLAG_##x)
148
149 typedef struct E1000BaseClass {
150 PCIDeviceClass parent_class;
151 uint16_t phy_id2;
152 } E1000BaseClass;
153
154 #define TYPE_E1000_BASE "e1000-base"
155
156 #define E1000(obj) \
157 OBJECT_CHECK(E1000State, (obj), TYPE_E1000_BASE)
158
159 #define E1000_DEVICE_CLASS(klass) \
160 OBJECT_CLASS_CHECK(E1000BaseClass, (klass), TYPE_E1000_BASE)
161 #define E1000_DEVICE_GET_CLASS(obj) \
162 OBJECT_GET_CLASS(E1000BaseClass, (obj), TYPE_E1000_BASE)
163
164 #define defreg(x) x = (E1000_##x>>2)
165 enum {
166 defreg(CTRL), defreg(EECD), defreg(EERD), defreg(GPRC),
167 defreg(GPTC), defreg(ICR), defreg(ICS), defreg(IMC),
168 defreg(IMS), defreg(LEDCTL), defreg(MANC), defreg(MDIC),
169 defreg(MPC), defreg(PBA), defreg(RCTL), defreg(RDBAH),
170 defreg(RDBAL), defreg(RDH), defreg(RDLEN), defreg(RDT),
171 defreg(STATUS), defreg(SWSM), defreg(TCTL), defreg(TDBAH),
172 defreg(TDBAL), defreg(TDH), defreg(TDLEN), defreg(TDT),
173 defreg(TORH), defreg(TORL), defreg(TOTH), defreg(TOTL),
174 defreg(TPR), defreg(TPT), defreg(TXDCTL), defreg(WUFC),
175 defreg(RA), defreg(MTA), defreg(CRCERRS), defreg(VFTA),
176 defreg(VET), defreg(RDTR), defreg(RADV), defreg(TADV),
177 defreg(ITR), defreg(FCRUC), defreg(TDFH), defreg(TDFT),
178 defreg(TDFHS), defreg(TDFTS), defreg(TDFPC), defreg(RDFH),
179 defreg(RDFT), defreg(RDFHS), defreg(RDFTS), defreg(RDFPC),
180 defreg(IPAV), defreg(WUC), defreg(WUS), defreg(AIT),
181 defreg(IP6AT), defreg(IP4AT), defreg(FFLT), defreg(FFMT),
182 defreg(FFVT), defreg(WUPM), defreg(PBM), defreg(SCC),
183 defreg(ECOL), defreg(MCC), defreg(LATECOL), defreg(COLC),
184 defreg(DC), defreg(TNCRS), defreg(SEC), defreg(CEXTERR),
185 defreg(RLEC), defreg(XONRXC), defreg(XONTXC), defreg(XOFFRXC),
186 defreg(XOFFTXC), defreg(RFC), defreg(RJC), defreg(RNBC),
187 defreg(TSCTFC), defreg(MGTPRC), defreg(MGTPDC), defreg(MGTPTC),
188 defreg(RUC), defreg(ROC), defreg(GORCL), defreg(GORCH),
189 defreg(GOTCL), defreg(GOTCH), defreg(BPRC), defreg(MPRC),
190 defreg(TSCTC), defreg(PRC64), defreg(PRC127), defreg(PRC255),
191 defreg(PRC511), defreg(PRC1023), defreg(PRC1522), defreg(PTC64),
192 defreg(PTC127), defreg(PTC255), defreg(PTC511), defreg(PTC1023),
193 defreg(PTC1522), defreg(MPTC), defreg(BPTC)
194 };
195
196 static void
197 e1000_link_down(E1000State *s)
198 {
199 s->mac_reg[STATUS] &= ~E1000_STATUS_LU;
200 s->phy_reg[PHY_STATUS] &= ~MII_SR_LINK_STATUS;
201 s->phy_reg[PHY_STATUS] &= ~MII_SR_AUTONEG_COMPLETE;
202 s->phy_reg[PHY_LP_ABILITY] &= ~MII_LPAR_LPACK;
203 }
204
205 static void
206 e1000_link_up(E1000State *s)
207 {
208 s->mac_reg[STATUS] |= E1000_STATUS_LU;
209 s->phy_reg[PHY_STATUS] |= MII_SR_LINK_STATUS;
210
211 /* E1000_STATUS_LU is tested by e1000_can_receive() */
212 qemu_flush_queued_packets(qemu_get_queue(s->nic));
213 }
214
215 static bool
216 have_autoneg(E1000State *s)
217 {
218 return chkflag(AUTONEG) && (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN);
219 }
220
221 static void
222 set_phy_ctrl(E1000State *s, int index, uint16_t val)
223 {
224 /* bits 0-5 reserved; MII_CR_[RESTART_AUTO_NEG,RESET] are self clearing */
225 s->phy_reg[PHY_CTRL] = val & ~(0x3f |
226 MII_CR_RESET |
227 MII_CR_RESTART_AUTO_NEG);
228
229 /*
230 * QEMU 1.3 does not support link auto-negotiation emulation, so if we
231 * migrate during auto negotiation, after migration the link will be
232 * down.
233 */
234 if (have_autoneg(s) && (val & MII_CR_RESTART_AUTO_NEG)) {
235 e1000_link_down(s);
236 DBGOUT(PHY, "Start link auto negotiation\n");
237 timer_mod(s->autoneg_timer,
238 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
239 }
240 }
241
242 static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
243 [PHY_CTRL] = set_phy_ctrl,
244 };
245
246 enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
247
248 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
249 static const char phy_regcap[0x20] = {
250 [PHY_STATUS] = PHY_R, [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
251 [PHY_ID1] = PHY_R, [M88E1000_PHY_SPEC_CTRL] = PHY_RW,
252 [PHY_CTRL] = PHY_RW, [PHY_1000T_CTRL] = PHY_RW,
253 [PHY_LP_ABILITY] = PHY_R, [PHY_1000T_STATUS] = PHY_R,
254 [PHY_AUTONEG_ADV] = PHY_RW, [M88E1000_RX_ERR_CNTR] = PHY_R,
255 [PHY_ID2] = PHY_R, [M88E1000_PHY_SPEC_STATUS] = PHY_R,
256 [PHY_AUTONEG_EXP] = PHY_R,
257 };
258
259 /* PHY_ID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */
260 static const uint16_t phy_reg_init[] = {
261 [PHY_CTRL] = MII_CR_SPEED_SELECT_MSB |
262 MII_CR_FULL_DUPLEX |
263 MII_CR_AUTO_NEG_EN,
264
265 [PHY_STATUS] = MII_SR_EXTENDED_CAPS |
266 MII_SR_LINK_STATUS | /* link initially up */
267 MII_SR_AUTONEG_CAPS |
268 /* MII_SR_AUTONEG_COMPLETE: initially NOT completed */
269 MII_SR_PREAMBLE_SUPPRESS |
270 MII_SR_EXTENDED_STATUS |
271 MII_SR_10T_HD_CAPS |
272 MII_SR_10T_FD_CAPS |
273 MII_SR_100X_HD_CAPS |
274 MII_SR_100X_FD_CAPS,
275
276 [PHY_ID1] = 0x141,
277 /* [PHY_ID2] configured per DevId, from e1000_reset() */
278 [PHY_AUTONEG_ADV] = 0xde1,
279 [PHY_LP_ABILITY] = 0x1e0,
280 [PHY_1000T_CTRL] = 0x0e00,
281 [PHY_1000T_STATUS] = 0x3c00,
282 [M88E1000_PHY_SPEC_CTRL] = 0x360,
283 [M88E1000_PHY_SPEC_STATUS] = 0xac00,
284 [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,
285 };
286
287 static const uint32_t mac_reg_init[] = {
288 [PBA] = 0x00100030,
289 [LEDCTL] = 0x602,
290 [CTRL] = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
291 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
292 [STATUS] = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
293 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
294 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
295 E1000_STATUS_LU,
296 [MANC] = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
297 E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
298 E1000_MANC_RMCP_EN,
299 };
300
301 /* Helper function, *curr == 0 means the value is not set */
302 static inline void
303 mit_update_delay(uint32_t *curr, uint32_t value)
304 {
305 if (value && (*curr == 0 || value < *curr)) {
306 *curr = value;
307 }
308 }
309
310 static void
311 set_interrupt_cause(E1000State *s, int index, uint32_t val)
312 {
313 PCIDevice *d = PCI_DEVICE(s);
314 uint32_t pending_ints;
315 uint32_t mit_delay;
316
317 s->mac_reg[ICR] = val;
318
319 /*
320 * Make sure ICR and ICS registers have the same value.
321 * The spec says that the ICS register is write-only. However in practice,
322 * on real hardware ICS is readable, and for reads it has the same value as
323 * ICR (except that ICS does not have the clear on read behaviour of ICR).
324 *
325 * The VxWorks PRO/1000 driver uses this behaviour.
326 */
327 s->mac_reg[ICS] = val;
328
329 pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
330 if (!s->mit_irq_level && pending_ints) {
331 /*
332 * Here we detect a potential raising edge. We postpone raising the
333 * interrupt line if we are inside the mitigation delay window
334 * (s->mit_timer_on == 1).
335 * We provide a partial implementation of interrupt mitigation,
336 * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
337 * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
338 * RADV; relative timers based on TIDV and RDTR are not implemented.
339 */
340 if (s->mit_timer_on) {
341 return;
342 }
343 if (chkflag(MIT)) {
344 /* Compute the next mitigation delay according to pending
345 * interrupts and the current values of RADV (provided
346 * RDTR!=0), TADV and ITR.
347 * Then rearm the timer.
348 */
349 mit_delay = 0;
350 if (s->mit_ide &&
351 (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
352 mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
353 }
354 if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
355 mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
356 }
357 mit_update_delay(&mit_delay, s->mac_reg[ITR]);
358
359 if (mit_delay) {
360 s->mit_timer_on = 1;
361 timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
362 mit_delay * 256);
363 }
364 s->mit_ide = 0;
365 }
366 }
367
368 s->mit_irq_level = (pending_ints != 0);
369 pci_set_irq(d, s->mit_irq_level);
370 }
371
372 static void
373 e1000_mit_timer(void *opaque)
374 {
375 E1000State *s = opaque;
376
377 s->mit_timer_on = 0;
378 /* Call set_interrupt_cause to update the irq level (if necessary). */
379 set_interrupt_cause(s, 0, s->mac_reg[ICR]);
380 }
381
382 static void
383 set_ics(E1000State *s, int index, uint32_t val)
384 {
385 DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
386 s->mac_reg[IMS]);
387 set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
388 }
389
390 static void
391 e1000_autoneg_timer(void *opaque)
392 {
393 E1000State *s = opaque;
394 if (!qemu_get_queue(s->nic)->link_down) {
395 e1000_link_up(s);
396 s->phy_reg[PHY_LP_ABILITY] |= MII_LPAR_LPACK;
397 s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
398 DBGOUT(PHY, "Auto negotiation is completed\n");
399 set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */
400 }
401 }
402
403 static int
404 rxbufsize(uint32_t v)
405 {
406 v &= E1000_RCTL_BSEX | E1000_RCTL_SZ_16384 | E1000_RCTL_SZ_8192 |
407 E1000_RCTL_SZ_4096 | E1000_RCTL_SZ_2048 | E1000_RCTL_SZ_1024 |
408 E1000_RCTL_SZ_512 | E1000_RCTL_SZ_256;
409 switch (v) {
410 case E1000_RCTL_BSEX | E1000_RCTL_SZ_16384:
411 return 16384;
412 case E1000_RCTL_BSEX | E1000_RCTL_SZ_8192:
413 return 8192;
414 case E1000_RCTL_BSEX | E1000_RCTL_SZ_4096:
415 return 4096;
416 case E1000_RCTL_SZ_1024:
417 return 1024;
418 case E1000_RCTL_SZ_512:
419 return 512;
420 case E1000_RCTL_SZ_256:
421 return 256;
422 }
423 return 2048;
424 }
425
426 static void e1000_reset(void *opaque)
427 {
428 E1000State *d = opaque;
429 E1000BaseClass *edc = E1000_DEVICE_GET_CLASS(d);
430 uint8_t *macaddr = d->conf.macaddr.a;
431 int i;
432
433 timer_del(d->autoneg_timer);
434 timer_del(d->mit_timer);
435 d->mit_timer_on = 0;
436 d->mit_irq_level = 0;
437 d->mit_ide = 0;
438 memset(d->phy_reg, 0, sizeof d->phy_reg);
439 memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
440 d->phy_reg[PHY_ID2] = edc->phy_id2;
441 memset(d->mac_reg, 0, sizeof d->mac_reg);
442 memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
443 d->rxbuf_min_shift = 1;
444 memset(&d->tx, 0, sizeof d->tx);
445
446 if (qemu_get_queue(d->nic)->link_down) {
447 e1000_link_down(d);
448 }
449
450 /* Throttle interrupts to prevent guest (e.g Win 2012) from
451 * reinjecting interrupts endlessly. TODO: fix non ITR case.
452 */
453 d->mac_reg[ITR] = 250;
454
455 /* Some guests expect pre-initialized RAH/RAL (AddrValid flag + MACaddr) */
456 d->mac_reg[RA] = 0;
457 d->mac_reg[RA + 1] = E1000_RAH_AV;
458 for (i = 0; i < 4; i++) {
459 d->mac_reg[RA] |= macaddr[i] << (8 * i);
460 d->mac_reg[RA + 1] |= (i < 2) ? macaddr[i + 4] << (8 * i) : 0;
461 }
462 qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
463 }
464
465 static void
466 set_ctrl(E1000State *s, int index, uint32_t val)
467 {
468 /* RST is self clearing */
469 s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
470 }
471
472 static void
473 set_rx_control(E1000State *s, int index, uint32_t val)
474 {
475 s->mac_reg[RCTL] = val;
476 s->rxbuf_size = rxbufsize(val);
477 s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
478 DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
479 s->mac_reg[RCTL]);
480 qemu_flush_queued_packets(qemu_get_queue(s->nic));
481 }
482
483 static void
484 set_mdic(E1000State *s, int index, uint32_t val)
485 {
486 uint32_t data = val & E1000_MDIC_DATA_MASK;
487 uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
488
489 if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
490 val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
491 else if (val & E1000_MDIC_OP_READ) {
492 DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
493 if (!(phy_regcap[addr] & PHY_R)) {
494 DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
495 val |= E1000_MDIC_ERROR;
496 } else
497 val = (val ^ data) | s->phy_reg[addr];
498 } else if (val & E1000_MDIC_OP_WRITE) {
499 DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
500 if (!(phy_regcap[addr] & PHY_W)) {
501 DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
502 val |= E1000_MDIC_ERROR;
503 } else {
504 if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
505 phyreg_writeops[addr](s, index, data);
506 } else {
507 s->phy_reg[addr] = data;
508 }
509 }
510 }
511 s->mac_reg[MDIC] = val | E1000_MDIC_READY;
512
513 if (val & E1000_MDIC_INT_EN) {
514 set_ics(s, 0, E1000_ICR_MDAC);
515 }
516 }
517
518 static uint32_t
519 get_eecd(E1000State *s, int index)
520 {
521 uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
522
523 DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
524 s->eecd_state.bitnum_out, s->eecd_state.reading);
525 if (!s->eecd_state.reading ||
526 ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
527 ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
528 ret |= E1000_EECD_DO;
529 return ret;
530 }
531
532 static void
533 set_eecd(E1000State *s, int index, uint32_t val)
534 {
535 uint32_t oldval = s->eecd_state.old_eecd;
536
537 s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
538 E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
539 if (!(E1000_EECD_CS & val)) { /* CS inactive; nothing to do */
540 return;
541 }
542 if (E1000_EECD_CS & (val ^ oldval)) { /* CS rise edge; reset state */
543 s->eecd_state.val_in = 0;
544 s->eecd_state.bitnum_in = 0;
545 s->eecd_state.bitnum_out = 0;
546 s->eecd_state.reading = 0;
547 }
548 if (!(E1000_EECD_SK & (val ^ oldval))) { /* no clock edge */
549 return;
550 }
551 if (!(E1000_EECD_SK & val)) { /* falling edge */
552 s->eecd_state.bitnum_out++;
553 return;
554 }
555 s->eecd_state.val_in <<= 1;
556 if (val & E1000_EECD_DI)
557 s->eecd_state.val_in |= 1;
558 if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
559 s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
560 s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
561 EEPROM_READ_OPCODE_MICROWIRE);
562 }
563 DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
564 s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
565 s->eecd_state.reading);
566 }
567
568 static uint32_t
569 flash_eerd_read(E1000State *s, int x)
570 {
571 unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
572
573 if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
574 return (s->mac_reg[EERD]);
575
576 if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
577 return (E1000_EEPROM_RW_REG_DONE | r);
578
579 return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
580 E1000_EEPROM_RW_REG_DONE | r);
581 }
582
583 static void
584 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
585 {
586 uint32_t sum;
587
588 if (cse && cse < n)
589 n = cse + 1;
590 if (sloc < n-1) {
591 sum = net_checksum_add(n-css, data+css);
592 stw_be_p(data + sloc, net_checksum_finish(sum));
593 }
594 }
595
596 static inline void
597 inc_reg_if_not_full(E1000State *s, int index)
598 {
599 if (s->mac_reg[index] != 0xffffffff) {
600 s->mac_reg[index]++;
601 }
602 }
603
604 static inline void
605 inc_tx_bcast_or_mcast_count(E1000State *s, const unsigned char *arr)
606 {
607 if (!memcmp(arr, bcast, sizeof bcast)) {
608 inc_reg_if_not_full(s, BPTC);
609 } else if (arr[0] & 1) {
610 inc_reg_if_not_full(s, MPTC);
611 }
612 }
613
614 static void
615 grow_8reg_if_not_full(E1000State *s, int index, int size)
616 {
617 uint64_t sum = s->mac_reg[index] | (uint64_t)s->mac_reg[index+1] << 32;
618
619 if (sum + size < sum) {
620 sum = ~0ULL;
621 } else {
622 sum += size;
623 }
624 s->mac_reg[index] = sum;
625 s->mac_reg[index+1] = sum >> 32;
626 }
627
628 static void
629 increase_size_stats(E1000State *s, const int *size_regs, int size)
630 {
631 if (size > 1023) {
632 inc_reg_if_not_full(s, size_regs[5]);
633 } else if (size > 511) {
634 inc_reg_if_not_full(s, size_regs[4]);
635 } else if (size > 255) {
636 inc_reg_if_not_full(s, size_regs[3]);
637 } else if (size > 127) {
638 inc_reg_if_not_full(s, size_regs[2]);
639 } else if (size > 64) {
640 inc_reg_if_not_full(s, size_regs[1]);
641 } else if (size == 64) {
642 inc_reg_if_not_full(s, size_regs[0]);
643 }
644 }
645
646 static inline int
647 vlan_enabled(E1000State *s)
648 {
649 return ((s->mac_reg[CTRL] & E1000_CTRL_VME) != 0);
650 }
651
652 static inline int
653 vlan_rx_filter_enabled(E1000State *s)
654 {
655 return ((s->mac_reg[RCTL] & E1000_RCTL_VFE) != 0);
656 }
657
658 static inline int
659 is_vlan_packet(E1000State *s, const uint8_t *buf)
660 {
661 return (be16_to_cpup((uint16_t *)(buf + 12)) ==
662 le16_to_cpu(s->mac_reg[VET]));
663 }
664
665 static inline int
666 is_vlan_txd(uint32_t txd_lower)
667 {
668 return ((txd_lower & E1000_TXD_CMD_VLE) != 0);
669 }
670
671 /* FCS aka Ethernet CRC-32. We don't get it from backends and can't
672 * fill it in, just pad descriptor length by 4 bytes unless guest
673 * told us to strip it off the packet. */
674 static inline int
675 fcs_len(E1000State *s)
676 {
677 return (s->mac_reg[RCTL] & E1000_RCTL_SECRC) ? 0 : 4;
678 }
679
680 static void
681 e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
682 {
683 static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
684 PTC1023, PTC1522 };
685
686 NetClientState *nc = qemu_get_queue(s->nic);
687 if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
688 nc->info->receive(nc, buf, size);
689 } else {
690 qemu_send_packet(nc, buf, size);
691 }
692 inc_tx_bcast_or_mcast_count(s, buf);
693 increase_size_stats(s, PTCregs, size);
694 }
695
696 static void
697 xmit_seg(E1000State *s)
698 {
699 uint16_t len, *sp;
700 unsigned int frames = s->tx.tso_frames, css, sofar;
701 struct e1000_tx *tp = &s->tx;
702
703 if (tp->tse && tp->cptse) {
704 css = tp->ipcss;
705 DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
706 frames, tp->size, css);
707 if (tp->ip) { /* IPv4 */
708 stw_be_p(tp->data+css+2, tp->size - css);
709 stw_be_p(tp->data+css+4,
710 be16_to_cpup((uint16_t *)(tp->data+css+4))+frames);
711 } else { /* IPv6 */
712 stw_be_p(tp->data+css+4, tp->size - css);
713 }
714 css = tp->tucss;
715 len = tp->size - css;
716 DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", tp->tcp, css, len);
717 if (tp->tcp) {
718 sofar = frames * tp->mss;
719 stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */
720 if (tp->paylen - sofar > tp->mss) {
721 tp->data[css + 13] &= ~9; /* PSH, FIN */
722 } else if (frames) {
723 inc_reg_if_not_full(s, TSCTC);
724 }
725 } else /* UDP */
726 stw_be_p(tp->data+css+4, len);
727 if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
728 unsigned int phsum;
729 // add pseudo-header length before checksum calculation
730 sp = (uint16_t *)(tp->data + tp->tucso);
731 phsum = be16_to_cpup(sp) + len;
732 phsum = (phsum >> 16) + (phsum & 0xffff);
733 stw_be_p(sp, phsum);
734 }
735 tp->tso_frames++;
736 }
737
738 if (tp->sum_needed & E1000_TXD_POPTS_TXSM)
739 putsum(tp->data, tp->size, tp->tucso, tp->tucss, tp->tucse);
740 if (tp->sum_needed & E1000_TXD_POPTS_IXSM)
741 putsum(tp->data, tp->size, tp->ipcso, tp->ipcss, tp->ipcse);
742 if (tp->vlan_needed) {
743 memmove(tp->vlan, tp->data, 4);
744 memmove(tp->data, tp->data + 4, 8);
745 memcpy(tp->data + 8, tp->vlan_header, 4);
746 e1000_send_packet(s, tp->vlan, tp->size + 4);
747 } else {
748 e1000_send_packet(s, tp->data, tp->size);
749 }
750
751 inc_reg_if_not_full(s, TPT);
752 grow_8reg_if_not_full(s, TOTL, s->tx.size);
753 s->mac_reg[GPTC] = s->mac_reg[TPT];
754 s->mac_reg[GOTCL] = s->mac_reg[TOTL];
755 s->mac_reg[GOTCH] = s->mac_reg[TOTH];
756 }
757
758 static void
759 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
760 {
761 PCIDevice *d = PCI_DEVICE(s);
762 uint32_t txd_lower = le32_to_cpu(dp->lower.data);
763 uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
764 unsigned int split_size = txd_lower & 0xffff, bytes, sz, op;
765 unsigned int msh = 0xfffff;
766 uint64_t addr;
767 struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
768 struct e1000_tx *tp = &s->tx;
769
770 s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
771 if (dtype == E1000_TXD_CMD_DEXT) { /* context descriptor */
772 op = le32_to_cpu(xp->cmd_and_length);
773 tp->ipcss = xp->lower_setup.ip_fields.ipcss;
774 tp->ipcso = xp->lower_setup.ip_fields.ipcso;
775 tp->ipcse = le16_to_cpu(xp->lower_setup.ip_fields.ipcse);
776 tp->tucss = xp->upper_setup.tcp_fields.tucss;
777 tp->tucso = xp->upper_setup.tcp_fields.tucso;
778 tp->tucse = le16_to_cpu(xp->upper_setup.tcp_fields.tucse);
779 tp->paylen = op & 0xfffff;
780 tp->hdr_len = xp->tcp_seg_setup.fields.hdr_len;
781 tp->mss = le16_to_cpu(xp->tcp_seg_setup.fields.mss);
782 tp->ip = (op & E1000_TXD_CMD_IP) ? 1 : 0;
783 tp->tcp = (op & E1000_TXD_CMD_TCP) ? 1 : 0;
784 tp->tse = (op & E1000_TXD_CMD_TSE) ? 1 : 0;
785 tp->tso_frames = 0;
786 if (tp->tucso == 0) { /* this is probably wrong */
787 DBGOUT(TXSUM, "TCP/UDP: cso 0!\n");
788 tp->tucso = tp->tucss + (tp->tcp ? 16 : 6);
789 }
790 return;
791 } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
792 // data descriptor
793 if (tp->size == 0) {
794 tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
795 }
796 tp->cptse = ( txd_lower & E1000_TXD_CMD_TSE ) ? 1 : 0;
797 } else {
798 // legacy descriptor
799 tp->cptse = 0;
800 }
801
802 if (vlan_enabled(s) && is_vlan_txd(txd_lower) &&
803 (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
804 tp->vlan_needed = 1;
805 stw_be_p(tp->vlan_header,
806 le16_to_cpu(s->mac_reg[VET]));
807 stw_be_p(tp->vlan_header + 2,
808 le16_to_cpu(dp->upper.fields.special));
809 }
810
811 addr = le64_to_cpu(dp->buffer_addr);
812 if (tp->tse && tp->cptse) {
813 msh = tp->hdr_len + tp->mss;
814 do {
815 bytes = split_size;
816 if (tp->size + bytes > msh)
817 bytes = msh - tp->size;
818
819 bytes = MIN(sizeof(tp->data) - tp->size, bytes);
820 pci_dma_read(d, addr, tp->data + tp->size, bytes);
821 sz = tp->size + bytes;
822 if (sz >= tp->hdr_len && tp->size < tp->hdr_len) {
823 memmove(tp->header, tp->data, tp->hdr_len);
824 }
825 tp->size = sz;
826 addr += bytes;
827 if (sz == msh) {
828 xmit_seg(s);
829 memmove(tp->data, tp->header, tp->hdr_len);
830 tp->size = tp->hdr_len;
831 }
832 split_size -= bytes;
833 } while (bytes && split_size);
834 } else if (!tp->tse && tp->cptse) {
835 // context descriptor TSE is not set, while data descriptor TSE is set
836 DBGOUT(TXERR, "TCP segmentation error\n");
837 } else {
838 split_size = MIN(sizeof(tp->data) - tp->size, split_size);
839 pci_dma_read(d, addr, tp->data + tp->size, split_size);
840 tp->size += split_size;
841 }
842
843 if (!(txd_lower & E1000_TXD_CMD_EOP))
844 return;
845 if (!(tp->tse && tp->cptse && tp->size < tp->hdr_len)) {
846 xmit_seg(s);
847 }
848 tp->tso_frames = 0;
849 tp->sum_needed = 0;
850 tp->vlan_needed = 0;
851 tp->size = 0;
852 tp->cptse = 0;
853 }
854
855 static uint32_t
856 txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
857 {
858 PCIDevice *d = PCI_DEVICE(s);
859 uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
860
861 if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
862 return 0;
863 txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
864 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
865 dp->upper.data = cpu_to_le32(txd_upper);
866 pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
867 &dp->upper, sizeof(dp->upper));
868 return E1000_ICR_TXDW;
869 }
870
871 static uint64_t tx_desc_base(E1000State *s)
872 {
873 uint64_t bah = s->mac_reg[TDBAH];
874 uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
875
876 return (bah << 32) + bal;
877 }
878
879 static void
880 start_xmit(E1000State *s)
881 {
882 PCIDevice *d = PCI_DEVICE(s);
883 dma_addr_t base;
884 struct e1000_tx_desc desc;
885 uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
886
887 if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
888 DBGOUT(TX, "tx disabled\n");
889 return;
890 }
891
892 while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
893 base = tx_desc_base(s) +
894 sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
895 pci_dma_read(d, base, &desc, sizeof(desc));
896
897 DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
898 (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
899 desc.upper.data);
900
901 process_tx_desc(s, &desc);
902 cause |= txdesc_writeback(s, base, &desc);
903
904 if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
905 s->mac_reg[TDH] = 0;
906 /*
907 * the following could happen only if guest sw assigns
908 * bogus values to TDT/TDLEN.
909 * there's nothing too intelligent we could do about this.
910 */
911 if (s->mac_reg[TDH] == tdh_start) {
912 DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
913 tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
914 break;
915 }
916 }
917 set_ics(s, 0, cause);
918 }
919
920 static int
921 receive_filter(E1000State *s, const uint8_t *buf, int size)
922 {
923 static const int mta_shift[] = {4, 3, 2, 0};
924 uint32_t f, rctl = s->mac_reg[RCTL], ra[2], *rp;
925 int isbcast = !memcmp(buf, bcast, sizeof bcast), ismcast = (buf[0] & 1);
926
927 if (is_vlan_packet(s, buf) && vlan_rx_filter_enabled(s)) {
928 uint16_t vid = be16_to_cpup((uint16_t *)(buf + 14));
929 uint32_t vfta = le32_to_cpup((uint32_t *)(s->mac_reg + VFTA) +
930 ((vid >> 5) & 0x7f));
931 if ((vfta & (1 << (vid & 0x1f))) == 0)
932 return 0;
933 }
934
935 if (!isbcast && !ismcast && (rctl & E1000_RCTL_UPE)) { /* promiscuous ucast */
936 return 1;
937 }
938
939 if (ismcast && (rctl & E1000_RCTL_MPE)) { /* promiscuous mcast */
940 inc_reg_if_not_full(s, MPRC);
941 return 1;
942 }
943
944 if (isbcast && (rctl & E1000_RCTL_BAM)) { /* broadcast enabled */
945 inc_reg_if_not_full(s, BPRC);
946 return 1;
947 }
948
949 for (rp = s->mac_reg + RA; rp < s->mac_reg + RA + 32; rp += 2) {
950 if (!(rp[1] & E1000_RAH_AV))
951 continue;
952 ra[0] = cpu_to_le32(rp[0]);
953 ra[1] = cpu_to_le32(rp[1]);
954 if (!memcmp(buf, (uint8_t *)ra, 6)) {
955 DBGOUT(RXFILTER,
956 "unicast match[%d]: %02x:%02x:%02x:%02x:%02x:%02x\n",
957 (int)(rp - s->mac_reg - RA)/2,
958 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
959 return 1;
960 }
961 }
962 DBGOUT(RXFILTER, "unicast mismatch: %02x:%02x:%02x:%02x:%02x:%02x\n",
963 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
964
965 f = mta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3];
966 f = (((buf[5] << 8) | buf[4]) >> f) & 0xfff;
967 if (s->mac_reg[MTA + (f >> 5)] & (1 << (f & 0x1f))) {
968 inc_reg_if_not_full(s, MPRC);
969 return 1;
970 }
971 DBGOUT(RXFILTER,
972 "dropping, inexact filter mismatch: %02x:%02x:%02x:%02x:%02x:%02x MO %d MTA[%d] %x\n",
973 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5],
974 (rctl >> E1000_RCTL_MO_SHIFT) & 3, f >> 5,
975 s->mac_reg[MTA + (f >> 5)]);
976
977 return 0;
978 }
979
980 static void
981 e1000_set_link_status(NetClientState *nc)
982 {
983 E1000State *s = qemu_get_nic_opaque(nc);
984 uint32_t old_status = s->mac_reg[STATUS];
985
986 if (nc->link_down) {
987 e1000_link_down(s);
988 } else {
989 if (have_autoneg(s) &&
990 !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
991 /* emulate auto-negotiation if supported */
992 timer_mod(s->autoneg_timer,
993 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
994 } else {
995 e1000_link_up(s);
996 }
997 }
998
999 if (s->mac_reg[STATUS] != old_status)
1000 set_ics(s, 0, E1000_ICR_LSC);
1001 }
1002
1003 static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
1004 {
1005 int bufs;
1006 /* Fast-path short packets */
1007 if (total_size <= s->rxbuf_size) {
1008 return s->mac_reg[RDH] != s->mac_reg[RDT];
1009 }
1010 if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
1011 bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
1012 } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
1013 bufs = s->mac_reg[RDLEN] / sizeof(struct e1000_rx_desc) +
1014 s->mac_reg[RDT] - s->mac_reg[RDH];
1015 } else {
1016 return false;
1017 }
1018 return total_size <= bufs * s->rxbuf_size;
1019 }
1020
1021 static int
1022 e1000_can_receive(NetClientState *nc)
1023 {
1024 E1000State *s = qemu_get_nic_opaque(nc);
1025
1026 return (s->mac_reg[STATUS] & E1000_STATUS_LU) &&
1027 (s->mac_reg[RCTL] & E1000_RCTL_EN) &&
1028 (s->parent_obj.config[PCI_COMMAND] & PCI_COMMAND_MASTER) &&
1029 e1000_has_rxbufs(s, 1);
1030 }
1031
1032 static uint64_t rx_desc_base(E1000State *s)
1033 {
1034 uint64_t bah = s->mac_reg[RDBAH];
1035 uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
1036
1037 return (bah << 32) + bal;
1038 }
1039
1040 static ssize_t
1041 e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
1042 {
1043 E1000State *s = qemu_get_nic_opaque(nc);
1044 PCIDevice *d = PCI_DEVICE(s);
1045 struct e1000_rx_desc desc;
1046 dma_addr_t base;
1047 unsigned int n, rdt;
1048 uint32_t rdh_start;
1049 uint16_t vlan_special = 0;
1050 uint8_t vlan_status = 0;
1051 uint8_t min_buf[MIN_BUF_SIZE];
1052 struct iovec min_iov;
1053 uint8_t *filter_buf = iov->iov_base;
1054 size_t size = iov_size(iov, iovcnt);
1055 size_t iov_ofs = 0;
1056 size_t desc_offset;
1057 size_t desc_size;
1058 size_t total_size;
1059 static const int PRCregs[6] = { PRC64, PRC127, PRC255, PRC511,
1060 PRC1023, PRC1522 };
1061
1062 if (!(s->mac_reg[STATUS] & E1000_STATUS_LU)) {
1063 return -1;
1064 }
1065
1066 if (!(s->mac_reg[RCTL] & E1000_RCTL_EN)) {
1067 return -1;
1068 }
1069
1070 /* Pad to minimum Ethernet frame length */
1071 if (size < sizeof(min_buf)) {
1072 iov_to_buf(iov, iovcnt, 0, min_buf, size);
1073 memset(&min_buf[size], 0, sizeof(min_buf) - size);
1074 inc_reg_if_not_full(s, RUC);
1075 min_iov.iov_base = filter_buf = min_buf;
1076 min_iov.iov_len = size = sizeof(min_buf);
1077 iovcnt = 1;
1078 iov = &min_iov;
1079 } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
1080 /* This is very unlikely, but may happen. */
1081 iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
1082 filter_buf = min_buf;
1083 }
1084
1085 /* Discard oversized packets if !LPE and !SBP. */
1086 if ((size > MAXIMUM_ETHERNET_LPE_SIZE ||
1087 (size > MAXIMUM_ETHERNET_VLAN_SIZE
1088 && !(s->mac_reg[RCTL] & E1000_RCTL_LPE)))
1089 && !(s->mac_reg[RCTL] & E1000_RCTL_SBP)) {
1090 inc_reg_if_not_full(s, ROC);
1091 return size;
1092 }
1093
1094 if (!receive_filter(s, filter_buf, size)) {
1095 return size;
1096 }
1097
1098 if (vlan_enabled(s) && is_vlan_packet(s, filter_buf)) {
1099 vlan_special = cpu_to_le16(be16_to_cpup((uint16_t *)(filter_buf
1100 + 14)));
1101 iov_ofs = 4;
1102 if (filter_buf == iov->iov_base) {
1103 memmove(filter_buf + 4, filter_buf, 12);
1104 } else {
1105 iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
1106 while (iov->iov_len <= iov_ofs) {
1107 iov_ofs -= iov->iov_len;
1108 iov++;
1109 }
1110 }
1111 vlan_status = E1000_RXD_STAT_VP;
1112 size -= 4;
1113 }
1114
1115 rdh_start = s->mac_reg[RDH];
1116 desc_offset = 0;
1117 total_size = size + fcs_len(s);
1118 if (!e1000_has_rxbufs(s, total_size)) {
1119 set_ics(s, 0, E1000_ICS_RXO);
1120 return -1;
1121 }
1122 do {
1123 desc_size = total_size - desc_offset;
1124 if (desc_size > s->rxbuf_size) {
1125 desc_size = s->rxbuf_size;
1126 }
1127 base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
1128 pci_dma_read(d, base, &desc, sizeof(desc));
1129 desc.special = vlan_special;
1130 desc.status |= (vlan_status | E1000_RXD_STAT_DD);
1131 if (desc.buffer_addr) {
1132 if (desc_offset < size) {
1133 size_t iov_copy;
1134 hwaddr ba = le64_to_cpu(desc.buffer_addr);
1135 size_t copy_size = size - desc_offset;
1136 if (copy_size > s->rxbuf_size) {
1137 copy_size = s->rxbuf_size;
1138 }
1139 do {
1140 iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
1141 pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
1142 copy_size -= iov_copy;
1143 ba += iov_copy;
1144 iov_ofs += iov_copy;
1145 if (iov_ofs == iov->iov_len) {
1146 iov++;
1147 iov_ofs = 0;
1148 }
1149 } while (copy_size);
1150 }
1151 desc_offset += desc_size;
1152 desc.length = cpu_to_le16(desc_size);
1153 if (desc_offset >= total_size) {
1154 desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
1155 } else {
1156 /* Guest zeroing out status is not a hardware requirement.
1157 Clear EOP in case guest didn't do it. */
1158 desc.status &= ~E1000_RXD_STAT_EOP;
1159 }
1160 } else { // as per intel docs; skip descriptors with null buf addr
1161 DBGOUT(RX, "Null RX descriptor!!\n");
1162 }
1163 pci_dma_write(d, base, &desc, sizeof(desc));
1164
1165 if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
1166 s->mac_reg[RDH] = 0;
1167 /* see comment in start_xmit; same here */
1168 if (s->mac_reg[RDH] == rdh_start) {
1169 DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
1170 rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
1171 set_ics(s, 0, E1000_ICS_RXO);
1172 return -1;
1173 }
1174 } while (desc_offset < total_size);
1175
1176 increase_size_stats(s, PRCregs, total_size);
1177 inc_reg_if_not_full(s, TPR);
1178 s->mac_reg[GPRC] = s->mac_reg[TPR];
1179 /* TOR - Total Octets Received:
1180 * This register includes bytes received in a packet from the <Destination
1181 * Address> field through the <CRC> field, inclusively.
1182 * Always include FCS length (4) in size.
1183 */
1184 grow_8reg_if_not_full(s, TORL, size+4);
1185 s->mac_reg[GORCL] = s->mac_reg[TORL];
1186 s->mac_reg[GORCH] = s->mac_reg[TORH];
1187
1188 n = E1000_ICS_RXT0;
1189 if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
1190 rdt += s->mac_reg[RDLEN] / sizeof(desc);
1191 if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
1192 s->rxbuf_min_shift)
1193 n |= E1000_ICS_RXDMT0;
1194
1195 set_ics(s, 0, n);
1196
1197 return size;
1198 }
1199
1200 static ssize_t
1201 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1202 {
1203 const struct iovec iov = {
1204 .iov_base = (uint8_t *)buf,
1205 .iov_len = size
1206 };
1207
1208 return e1000_receive_iov(nc, &iov, 1);
1209 }
1210
1211 static uint32_t
1212 mac_readreg(E1000State *s, int index)
1213 {
1214 return s->mac_reg[index];
1215 }
1216
1217 static uint32_t
1218 mac_low4_read(E1000State *s, int index)
1219 {
1220 return s->mac_reg[index] & 0xf;
1221 }
1222
1223 static uint32_t
1224 mac_low11_read(E1000State *s, int index)
1225 {
1226 return s->mac_reg[index] & 0x7ff;
1227 }
1228
1229 static uint32_t
1230 mac_low13_read(E1000State *s, int index)
1231 {
1232 return s->mac_reg[index] & 0x1fff;
1233 }
1234
1235 static uint32_t
1236 mac_low16_read(E1000State *s, int index)
1237 {
1238 return s->mac_reg[index] & 0xffff;
1239 }
1240
1241 static uint32_t
1242 mac_icr_read(E1000State *s, int index)
1243 {
1244 uint32_t ret = s->mac_reg[ICR];
1245
1246 DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1247 set_interrupt_cause(s, 0, 0);
1248 return ret;
1249 }
1250
1251 static uint32_t
1252 mac_read_clr4(E1000State *s, int index)
1253 {
1254 uint32_t ret = s->mac_reg[index];
1255
1256 s->mac_reg[index] = 0;
1257 return ret;
1258 }
1259
1260 static uint32_t
1261 mac_read_clr8(E1000State *s, int index)
1262 {
1263 uint32_t ret = s->mac_reg[index];
1264
1265 s->mac_reg[index] = 0;
1266 s->mac_reg[index-1] = 0;
1267 return ret;
1268 }
1269
1270 static void
1271 mac_writereg(E1000State *s, int index, uint32_t val)
1272 {
1273 uint32_t macaddr[2];
1274
1275 s->mac_reg[index] = val;
1276
1277 if (index == RA + 1) {
1278 macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
1279 macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
1280 qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
1281 }
1282 }
1283
1284 static void
1285 set_rdt(E1000State *s, int index, uint32_t val)
1286 {
1287 s->mac_reg[index] = val & 0xffff;
1288 if (e1000_has_rxbufs(s, 1)) {
1289 qemu_flush_queued_packets(qemu_get_queue(s->nic));
1290 }
1291 }
1292
1293 static void
1294 set_16bit(E1000State *s, int index, uint32_t val)
1295 {
1296 s->mac_reg[index] = val & 0xffff;
1297 }
1298
1299 static void
1300 set_dlen(E1000State *s, int index, uint32_t val)
1301 {
1302 s->mac_reg[index] = val & 0xfff80;
1303 }
1304
1305 static void
1306 set_tctl(E1000State *s, int index, uint32_t val)
1307 {
1308 s->mac_reg[index] = val;
1309 s->mac_reg[TDT] &= 0xffff;
1310 start_xmit(s);
1311 }
1312
1313 static void
1314 set_icr(E1000State *s, int index, uint32_t val)
1315 {
1316 DBGOUT(INTERRUPT, "set_icr %x\n", val);
1317 set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1318 }
1319
1320 static void
1321 set_imc(E1000State *s, int index, uint32_t val)
1322 {
1323 s->mac_reg[IMS] &= ~val;
1324 set_ics(s, 0, 0);
1325 }
1326
1327 static void
1328 set_ims(E1000State *s, int index, uint32_t val)
1329 {
1330 s->mac_reg[IMS] |= val;
1331 set_ics(s, 0, 0);
1332 }
1333
1334 #define getreg(x) [x] = mac_readreg
1335 static uint32_t (*macreg_readops[])(E1000State *, int) = {
1336 getreg(PBA), getreg(RCTL), getreg(TDH), getreg(TXDCTL),
1337 getreg(WUFC), getreg(TDT), getreg(CTRL), getreg(LEDCTL),
1338 getreg(MANC), getreg(MDIC), getreg(SWSM), getreg(STATUS),
1339 getreg(TORL), getreg(TOTL), getreg(IMS), getreg(TCTL),
1340 getreg(RDH), getreg(RDT), getreg(VET), getreg(ICS),
1341 getreg(TDBAL), getreg(TDBAH), getreg(RDBAH), getreg(RDBAL),
1342 getreg(TDLEN), getreg(RDLEN), getreg(RDTR), getreg(RADV),
1343 getreg(TADV), getreg(ITR), getreg(FCRUC), getreg(IPAV),
1344 getreg(WUC), getreg(WUS), getreg(SCC), getreg(ECOL),
1345 getreg(MCC), getreg(LATECOL), getreg(COLC), getreg(DC),
1346 getreg(TNCRS), getreg(SEC), getreg(CEXTERR), getreg(RLEC),
1347 getreg(XONRXC), getreg(XONTXC), getreg(XOFFRXC), getreg(XOFFTXC),
1348 getreg(RFC), getreg(RJC), getreg(RNBC), getreg(TSCTFC),
1349 getreg(MGTPRC), getreg(MGTPDC), getreg(MGTPTC), getreg(GORCL),
1350 getreg(GOTCL),
1351
1352 [TOTH] = mac_read_clr8, [TORH] = mac_read_clr8,
1353 [GOTCH] = mac_read_clr8, [GORCH] = mac_read_clr8,
1354 [PRC64] = mac_read_clr4, [PRC127] = mac_read_clr4,
1355 [PRC255] = mac_read_clr4, [PRC511] = mac_read_clr4,
1356 [PRC1023] = mac_read_clr4, [PRC1522] = mac_read_clr4,
1357 [PTC64] = mac_read_clr4, [PTC127] = mac_read_clr4,
1358 [PTC255] = mac_read_clr4, [PTC511] = mac_read_clr4,
1359 [PTC1023] = mac_read_clr4, [PTC1522] = mac_read_clr4,
1360 [GPRC] = mac_read_clr4, [GPTC] = mac_read_clr4,
1361 [TPT] = mac_read_clr4, [TPR] = mac_read_clr4,
1362 [RUC] = mac_read_clr4, [ROC] = mac_read_clr4,
1363 [BPRC] = mac_read_clr4, [MPRC] = mac_read_clr4,
1364 [TSCTC] = mac_read_clr4, [BPTC] = mac_read_clr4,
1365 [MPTC] = mac_read_clr4,
1366 [ICR] = mac_icr_read, [EECD] = get_eecd,
1367 [EERD] = flash_eerd_read,
1368 [RDFH] = mac_low13_read, [RDFT] = mac_low13_read,
1369 [RDFHS] = mac_low13_read, [RDFTS] = mac_low13_read,
1370 [RDFPC] = mac_low13_read,
1371 [TDFH] = mac_low11_read, [TDFT] = mac_low11_read,
1372 [TDFHS] = mac_low13_read, [TDFTS] = mac_low13_read,
1373 [TDFPC] = mac_low13_read,
1374 [AIT] = mac_low16_read,
1375
1376 [CRCERRS ... MPC] = &mac_readreg,
1377 [IP6AT ... IP6AT+3] = &mac_readreg, [IP4AT ... IP4AT+6] = &mac_readreg,
1378 [FFLT ... FFLT+6] = &mac_low11_read,
1379 [RA ... RA+31] = &mac_readreg,
1380 [WUPM ... WUPM+31] = &mac_readreg,
1381 [MTA ... MTA+127] = &mac_readreg,
1382 [VFTA ... VFTA+127] = &mac_readreg,
1383 [FFMT ... FFMT+254] = &mac_low4_read,
1384 [FFVT ... FFVT+254] = &mac_readreg,
1385 [PBM ... PBM+16383] = &mac_readreg,
1386 };
1387 enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1388
1389 #define putreg(x) [x] = mac_writereg
1390 static void (*macreg_writeops[])(E1000State *, int, uint32_t) = {
1391 putreg(PBA), putreg(EERD), putreg(SWSM), putreg(WUFC),
1392 putreg(TDBAL), putreg(TDBAH), putreg(TXDCTL), putreg(RDBAH),
1393 putreg(RDBAL), putreg(LEDCTL), putreg(VET), putreg(FCRUC),
1394 putreg(TDFH), putreg(TDFT), putreg(TDFHS), putreg(TDFTS),
1395 putreg(TDFPC), putreg(RDFH), putreg(RDFT), putreg(RDFHS),
1396 putreg(RDFTS), putreg(RDFPC), putreg(IPAV), putreg(WUC),
1397 putreg(WUS), putreg(AIT),
1398
1399 [TDLEN] = set_dlen, [RDLEN] = set_dlen, [TCTL] = set_tctl,
1400 [TDT] = set_tctl, [MDIC] = set_mdic, [ICS] = set_ics,
1401 [TDH] = set_16bit, [RDH] = set_16bit, [RDT] = set_rdt,
1402 [IMC] = set_imc, [IMS] = set_ims, [ICR] = set_icr,
1403 [EECD] = set_eecd, [RCTL] = set_rx_control, [CTRL] = set_ctrl,
1404 [RDTR] = set_16bit, [RADV] = set_16bit, [TADV] = set_16bit,
1405 [ITR] = set_16bit,
1406
1407 [IP6AT ... IP6AT+3] = &mac_writereg, [IP4AT ... IP4AT+6] = &mac_writereg,
1408 [FFLT ... FFLT+6] = &mac_writereg,
1409 [RA ... RA+31] = &mac_writereg,
1410 [WUPM ... WUPM+31] = &mac_writereg,
1411 [MTA ... MTA+127] = &mac_writereg,
1412 [VFTA ... VFTA+127] = &mac_writereg,
1413 [FFMT ... FFMT+254] = &mac_writereg, [FFVT ... FFVT+254] = &mac_writereg,
1414 [PBM ... PBM+16383] = &mac_writereg,
1415 };
1416
1417 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1418
1419 enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 };
1420
1421 #define markflag(x) ((E1000_FLAG_##x << 2) | MAC_ACCESS_FLAG_NEEDED)
1422 /* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p]
1423 * f - flag bits (up to 6 possible flags)
1424 * n - flag needed
1425 * p - partially implenented */
1426 static const uint8_t mac_reg_access[0x8000] = {
1427 [RDTR] = markflag(MIT), [TADV] = markflag(MIT),
1428 [RADV] = markflag(MIT), [ITR] = markflag(MIT),
1429
1430 [IPAV] = markflag(MAC), [WUC] = markflag(MAC),
1431 [IP6AT] = markflag(MAC), [IP4AT] = markflag(MAC),
1432 [FFVT] = markflag(MAC), [WUPM] = markflag(MAC),
1433 [ECOL] = markflag(MAC), [MCC] = markflag(MAC),
1434 [DC] = markflag(MAC), [TNCRS] = markflag(MAC),
1435 [RLEC] = markflag(MAC), [XONRXC] = markflag(MAC),
1436 [XOFFTXC] = markflag(MAC), [RFC] = markflag(MAC),
1437 [TSCTFC] = markflag(MAC), [MGTPRC] = markflag(MAC),
1438 [WUS] = markflag(MAC), [AIT] = markflag(MAC),
1439 [FFLT] = markflag(MAC), [FFMT] = markflag(MAC),
1440 [SCC] = markflag(MAC), [FCRUC] = markflag(MAC),
1441 [LATECOL] = markflag(MAC), [COLC] = markflag(MAC),
1442 [SEC] = markflag(MAC), [CEXTERR] = markflag(MAC),
1443 [XONTXC] = markflag(MAC), [XOFFRXC] = markflag(MAC),
1444 [RJC] = markflag(MAC), [RNBC] = markflag(MAC),
1445 [MGTPDC] = markflag(MAC), [MGTPTC] = markflag(MAC),
1446 [RUC] = markflag(MAC), [ROC] = markflag(MAC),
1447 [GORCL] = markflag(MAC), [GORCH] = markflag(MAC),
1448 [GOTCL] = markflag(MAC), [GOTCH] = markflag(MAC),
1449 [BPRC] = markflag(MAC), [MPRC] = markflag(MAC),
1450 [TSCTC] = markflag(MAC), [PRC64] = markflag(MAC),
1451 [PRC127] = markflag(MAC), [PRC255] = markflag(MAC),
1452 [PRC511] = markflag(MAC), [PRC1023] = markflag(MAC),
1453 [PRC1522] = markflag(MAC), [PTC64] = markflag(MAC),
1454 [PTC127] = markflag(MAC), [PTC255] = markflag(MAC),
1455 [PTC511] = markflag(MAC), [PTC1023] = markflag(MAC),
1456 [PTC1522] = markflag(MAC), [MPTC] = markflag(MAC),
1457 [BPTC] = markflag(MAC),
1458
1459 [TDFH] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1460 [TDFT] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1461 [TDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1462 [TDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1463 [TDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1464 [RDFH] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1465 [RDFT] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1466 [RDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1467 [RDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1468 [RDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1469 [PBM] = markflag(MAC) | MAC_ACCESS_PARTIAL,
1470 };
1471
1472 static void
1473 e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1474 unsigned size)
1475 {
1476 E1000State *s = opaque;
1477 unsigned int index = (addr & 0x1ffff) >> 2;
1478
1479 if (index < NWRITEOPS && macreg_writeops[index]) {
1480 if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1481 || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1482 if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1483 DBGOUT(GENERAL, "Writing to register at offset: 0x%08x. "
1484 "It is not fully implemented.\n", index<<2);
1485 }
1486 macreg_writeops[index](s, index, val);
1487 } else { /* "flag needed" bit is set, but the flag is not active */
1488 DBGOUT(MMIO, "MMIO write attempt to disabled reg. addr=0x%08x\n",
1489 index<<2);
1490 }
1491 } else if (index < NREADOPS && macreg_readops[index]) {
1492 DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n",
1493 index<<2, val);
1494 } else {
1495 DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1496 index<<2, val);
1497 }
1498 }
1499
1500 static uint64_t
1501 e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1502 {
1503 E1000State *s = opaque;
1504 unsigned int index = (addr & 0x1ffff) >> 2;
1505
1506 if (index < NREADOPS && macreg_readops[index]) {
1507 if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
1508 || (s->compat_flags & (mac_reg_access[index] >> 2))) {
1509 if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
1510 DBGOUT(GENERAL, "Reading register at offset: 0x%08x. "
1511 "It is not fully implemented.\n", index<<2);
1512 }
1513 return macreg_readops[index](s, index);
1514 } else { /* "flag needed" bit is set, but the flag is not active */
1515 DBGOUT(MMIO, "MMIO read attempt of disabled reg. addr=0x%08x\n",
1516 index<<2);
1517 }
1518 } else {
1519 DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1520 }
1521 return 0;
1522 }
1523
1524 static const MemoryRegionOps e1000_mmio_ops = {
1525 .read = e1000_mmio_read,
1526 .write = e1000_mmio_write,
1527 .endianness = DEVICE_LITTLE_ENDIAN,
1528 .impl = {
1529 .min_access_size = 4,
1530 .max_access_size = 4,
1531 },
1532 };
1533
1534 static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1535 unsigned size)
1536 {
1537 E1000State *s = opaque;
1538
1539 (void)s;
1540 return 0;
1541 }
1542
1543 static void e1000_io_write(void *opaque, hwaddr addr,
1544 uint64_t val, unsigned size)
1545 {
1546 E1000State *s = opaque;
1547
1548 (void)s;
1549 }
1550
1551 static const MemoryRegionOps e1000_io_ops = {
1552 .read = e1000_io_read,
1553 .write = e1000_io_write,
1554 .endianness = DEVICE_LITTLE_ENDIAN,
1555 };
1556
1557 static bool is_version_1(void *opaque, int version_id)
1558 {
1559 return version_id == 1;
1560 }
1561
1562 static void e1000_pre_save(void *opaque)
1563 {
1564 E1000State *s = opaque;
1565 NetClientState *nc = qemu_get_queue(s->nic);
1566
1567 /* If the mitigation timer is active, emulate a timeout now. */
1568 if (s->mit_timer_on) {
1569 e1000_mit_timer(s);
1570 }
1571
1572 /*
1573 * If link is down and auto-negotiation is supported and ongoing,
1574 * complete auto-negotiation immediately. This allows us to look
1575 * at MII_SR_AUTONEG_COMPLETE to infer link status on load.
1576 */
1577 if (nc->link_down && have_autoneg(s)) {
1578 s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
1579 }
1580 }
1581
1582 static int e1000_post_load(void *opaque, int version_id)
1583 {
1584 E1000State *s = opaque;
1585 NetClientState *nc = qemu_get_queue(s->nic);
1586
1587 if (!chkflag(MIT)) {
1588 s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1589 s->mac_reg[TADV] = 0;
1590 s->mit_irq_level = false;
1591 }
1592 s->mit_ide = 0;
1593 s->mit_timer_on = false;
1594
1595 /* nc.link_down can't be migrated, so infer link_down according
1596 * to link status bit in mac_reg[STATUS].
1597 * Alternatively, restart link negotiation if it was in progress. */
1598 nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1599
1600 if (have_autoneg(s) &&
1601 !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1602 nc->link_down = false;
1603 timer_mod(s->autoneg_timer,
1604 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1605 }
1606
1607 return 0;
1608 }
1609
1610 static bool e1000_mit_state_needed(void *opaque)
1611 {
1612 E1000State *s = opaque;
1613
1614 return chkflag(MIT);
1615 }
1616
1617 static bool e1000_full_mac_needed(void *opaque)
1618 {
1619 E1000State *s = opaque;
1620
1621 return chkflag(MAC);
1622 }
1623
1624 static const VMStateDescription vmstate_e1000_mit_state = {
1625 .name = "e1000/mit_state",
1626 .version_id = 1,
1627 .minimum_version_id = 1,
1628 .needed = e1000_mit_state_needed,
1629 .fields = (VMStateField[]) {
1630 VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1631 VMSTATE_UINT32(mac_reg[RADV], E1000State),
1632 VMSTATE_UINT32(mac_reg[TADV], E1000State),
1633 VMSTATE_UINT32(mac_reg[ITR], E1000State),
1634 VMSTATE_BOOL(mit_irq_level, E1000State),
1635 VMSTATE_END_OF_LIST()
1636 }
1637 };
1638
1639 static const VMStateDescription vmstate_e1000_full_mac_state = {
1640 .name = "e1000/full_mac_state",
1641 .version_id = 1,
1642 .minimum_version_id = 1,
1643 .needed = e1000_full_mac_needed,
1644 .fields = (VMStateField[]) {
1645 VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000),
1646 VMSTATE_END_OF_LIST()
1647 }
1648 };
1649
1650 static const VMStateDescription vmstate_e1000 = {
1651 .name = "e1000",
1652 .version_id = 2,
1653 .minimum_version_id = 1,
1654 .pre_save = e1000_pre_save,
1655 .post_load = e1000_post_load,
1656 .fields = (VMStateField[]) {
1657 VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1658 VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1659 VMSTATE_UNUSED(4), /* Was mmio_base. */
1660 VMSTATE_UINT32(rxbuf_size, E1000State),
1661 VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1662 VMSTATE_UINT32(eecd_state.val_in, E1000State),
1663 VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1664 VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1665 VMSTATE_UINT16(eecd_state.reading, E1000State),
1666 VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1667 VMSTATE_UINT8(tx.ipcss, E1000State),
1668 VMSTATE_UINT8(tx.ipcso, E1000State),
1669 VMSTATE_UINT16(tx.ipcse, E1000State),
1670 VMSTATE_UINT8(tx.tucss, E1000State),
1671 VMSTATE_UINT8(tx.tucso, E1000State),
1672 VMSTATE_UINT16(tx.tucse, E1000State),
1673 VMSTATE_UINT32(tx.paylen, E1000State),
1674 VMSTATE_UINT8(tx.hdr_len, E1000State),
1675 VMSTATE_UINT16(tx.mss, E1000State),
1676 VMSTATE_UINT16(tx.size, E1000State),
1677 VMSTATE_UINT16(tx.tso_frames, E1000State),
1678 VMSTATE_UINT8(tx.sum_needed, E1000State),
1679 VMSTATE_INT8(tx.ip, E1000State),
1680 VMSTATE_INT8(tx.tcp, E1000State),
1681 VMSTATE_BUFFER(tx.header, E1000State),
1682 VMSTATE_BUFFER(tx.data, E1000State),
1683 VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1684 VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1685 VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1686 VMSTATE_UINT32(mac_reg[EECD], E1000State),
1687 VMSTATE_UINT32(mac_reg[EERD], E1000State),
1688 VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1689 VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1690 VMSTATE_UINT32(mac_reg[ICR], E1000State),
1691 VMSTATE_UINT32(mac_reg[ICS], E1000State),
1692 VMSTATE_UINT32(mac_reg[IMC], E1000State),
1693 VMSTATE_UINT32(mac_reg[IMS], E1000State),
1694 VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1695 VMSTATE_UINT32(mac_reg[MANC], E1000State),
1696 VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1697 VMSTATE_UINT32(mac_reg[MPC], E1000State),
1698 VMSTATE_UINT32(mac_reg[PBA], E1000State),
1699 VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1700 VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1701 VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1702 VMSTATE_UINT32(mac_reg[RDH], E1000State),
1703 VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1704 VMSTATE_UINT32(mac_reg[RDT], E1000State),
1705 VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1706 VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1707 VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1708 VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1709 VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1710 VMSTATE_UINT32(mac_reg[TDH], E1000State),
1711 VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1712 VMSTATE_UINT32(mac_reg[TDT], E1000State),
1713 VMSTATE_UINT32(mac_reg[TORH], E1000State),
1714 VMSTATE_UINT32(mac_reg[TORL], E1000State),
1715 VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1716 VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1717 VMSTATE_UINT32(mac_reg[TPR], E1000State),
1718 VMSTATE_UINT32(mac_reg[TPT], E1000State),
1719 VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1720 VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1721 VMSTATE_UINT32(mac_reg[VET], E1000State),
1722 VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1723 VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1724 VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1725 VMSTATE_END_OF_LIST()
1726 },
1727 .subsections = (const VMStateDescription*[]) {
1728 &vmstate_e1000_mit_state,
1729 &vmstate_e1000_full_mac_state,
1730 NULL
1731 }
1732 };
1733
1734 /*
1735 * EEPROM contents documented in Tables 5-2 and 5-3, pp. 98-102.
1736 * Note: A valid DevId will be inserted during pci_e1000_init().
1737 */
1738 static const uint16_t e1000_eeprom_template[64] = {
1739 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0x0000, 0x0000, 0x0000,
1740 0x3000, 0x1000, 0x6403, 0 /*DevId*/, 0x8086, 0 /*DevId*/, 0x8086, 0x3040,
1741 0x0008, 0x2000, 0x7e14, 0x0048, 0x1000, 0x00d8, 0x0000, 0x2700,
1742 0x6cc9, 0x3150, 0x0722, 0x040b, 0x0984, 0x0000, 0xc000, 0x0706,
1743 0x1008, 0x0000, 0x0f04, 0x7fff, 0x4d01, 0xffff, 0xffff, 0xffff,
1744 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
1745 0x0100, 0x4000, 0x121c, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
1746 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000,
1747 };
1748
1749 /* PCI interface */
1750
1751 static void
1752 e1000_mmio_setup(E1000State *d)
1753 {
1754 int i;
1755 const uint32_t excluded_regs[] = {
1756 E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1757 E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1758 };
1759
1760 memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1761 "e1000-mmio", PNPMMIO_SIZE);
1762 memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1763 for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1764 memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1765 excluded_regs[i+1] - excluded_regs[i] - 4);
1766 memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1767 }
1768
1769 static void
1770 pci_e1000_uninit(PCIDevice *dev)
1771 {
1772 E1000State *d = E1000(dev);
1773
1774 timer_del(d->autoneg_timer);
1775 timer_free(d->autoneg_timer);
1776 timer_del(d->mit_timer);
1777 timer_free(d->mit_timer);
1778 qemu_del_nic(d->nic);
1779 }
1780
1781 static NetClientInfo net_e1000_info = {
1782 .type = NET_CLIENT_OPTIONS_KIND_NIC,
1783 .size = sizeof(NICState),
1784 .can_receive = e1000_can_receive,
1785 .receive = e1000_receive,
1786 .receive_iov = e1000_receive_iov,
1787 .link_status_changed = e1000_set_link_status,
1788 };
1789
1790 static void e1000_write_config(PCIDevice *pci_dev, uint32_t address,
1791 uint32_t val, int len)
1792 {
1793 E1000State *s = E1000(pci_dev);
1794
1795 pci_default_write_config(pci_dev, address, val, len);
1796
1797 if (range_covers_byte(address, len, PCI_COMMAND) &&
1798 (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1799 qemu_flush_queued_packets(qemu_get_queue(s->nic));
1800 }
1801 }
1802
1803
1804 static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
1805 {
1806 DeviceState *dev = DEVICE(pci_dev);
1807 E1000State *d = E1000(pci_dev);
1808 PCIDeviceClass *pdc = PCI_DEVICE_GET_CLASS(pci_dev);
1809 uint8_t *pci_conf;
1810 uint16_t checksum = 0;
1811 int i;
1812 uint8_t *macaddr;
1813
1814 pci_dev->config_write = e1000_write_config;
1815
1816 pci_conf = pci_dev->config;
1817
1818 /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1819 pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1820
1821 pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1822
1823 e1000_mmio_setup(d);
1824
1825 pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1826
1827 pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1828
1829 memmove(d->eeprom_data, e1000_eeprom_template,
1830 sizeof e1000_eeprom_template);
1831 qemu_macaddr_default_if_unset(&d->conf.macaddr);
1832 macaddr = d->conf.macaddr.a;
1833 for (i = 0; i < 3; i++)
1834 d->eeprom_data[i] = (macaddr[2*i+1]<<8) | macaddr[2*i];
1835 d->eeprom_data[11] = d->eeprom_data[13] = pdc->device_id;
1836 for (i = 0; i < EEPROM_CHECKSUM_REG; i++)
1837 checksum += d->eeprom_data[i];
1838 checksum = (uint16_t) EEPROM_SUM - checksum;
1839 d->eeprom_data[EEPROM_CHECKSUM_REG] = checksum;
1840
1841 d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1842 object_get_typename(OBJECT(d)), dev->id, d);
1843
1844 qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1845
1846 d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1847 d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1848 }
1849
1850 static void qdev_e1000_reset(DeviceState *dev)
1851 {
1852 E1000State *d = E1000(dev);
1853 e1000_reset(d);
1854 }
1855
1856 static Property e1000_properties[] = {
1857 DEFINE_NIC_PROPERTIES(E1000State, conf),
1858 DEFINE_PROP_BIT("autonegotiation", E1000State,
1859 compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1860 DEFINE_PROP_BIT("mitigation", E1000State,
1861 compat_flags, E1000_FLAG_MIT_BIT, true),
1862 DEFINE_PROP_BIT("extra_mac_registers", E1000State,
1863 compat_flags, E1000_FLAG_MAC_BIT, true),
1864 DEFINE_PROP_END_OF_LIST(),
1865 };
1866
1867 typedef struct E1000Info {
1868 const char *name;
1869 uint16_t device_id;
1870 uint8_t revision;
1871 uint16_t phy_id2;
1872 } E1000Info;
1873
1874 static void e1000_class_init(ObjectClass *klass, void *data)
1875 {
1876 DeviceClass *dc = DEVICE_CLASS(klass);
1877 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1878 E1000BaseClass *e = E1000_DEVICE_CLASS(klass);
1879 const E1000Info *info = data;
1880
1881 k->realize = pci_e1000_realize;
1882 k->exit = pci_e1000_uninit;
1883 k->romfile = "efi-e1000.rom";
1884 k->vendor_id = PCI_VENDOR_ID_INTEL;
1885 k->device_id = info->device_id;
1886 k->revision = info->revision;
1887 e->phy_id2 = info->phy_id2;
1888 k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1889 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1890 dc->desc = "Intel Gigabit Ethernet";
1891 dc->reset = qdev_e1000_reset;
1892 dc->vmsd = &vmstate_e1000;
1893 dc->props = e1000_properties;
1894 }
1895
1896 static void e1000_instance_init(Object *obj)
1897 {
1898 E1000State *n = E1000(obj);
1899 device_add_bootindex_property(obj, &n->conf.bootindex,
1900 "bootindex", "/ethernet-phy@0",
1901 DEVICE(n), NULL);
1902 }
1903
1904 static const TypeInfo e1000_base_info = {
1905 .name = TYPE_E1000_BASE,
1906 .parent = TYPE_PCI_DEVICE,
1907 .instance_size = sizeof(E1000State),
1908 .instance_init = e1000_instance_init,
1909 .class_size = sizeof(E1000BaseClass),
1910 .abstract = true,
1911 };
1912
1913 static const E1000Info e1000_devices[] = {
1914 {
1915 .name = "e1000",
1916 .device_id = E1000_DEV_ID_82540EM,
1917 .revision = 0x03,
1918 .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT,
1919 },
1920 {
1921 .name = "e1000-82544gc",
1922 .device_id = E1000_DEV_ID_82544GC_COPPER,
1923 .revision = 0x03,
1924 .phy_id2 = E1000_PHY_ID2_82544x,
1925 },
1926 {
1927 .name = "e1000-82545em",
1928 .device_id = E1000_DEV_ID_82545EM_COPPER,
1929 .revision = 0x03,
1930 .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT,
1931 },
1932 };
1933
1934 static void e1000_register_types(void)
1935 {
1936 int i;
1937
1938 type_register_static(&e1000_base_info);
1939 for (i = 0; i < ARRAY_SIZE(e1000_devices); i++) {
1940 const E1000Info *info = &e1000_devices[i];
1941 TypeInfo type_info = {};
1942
1943 type_info.name = info->name;
1944 type_info.parent = TYPE_E1000_BASE;
1945 type_info.class_data = (void *)info;
1946 type_info.class_init = e1000_class_init;
1947 type_info.instance_init = e1000_instance_init;
1948
1949 type_register(&type_info);
1950 }
1951 }
1952
1953 type_init(e1000_register_types)