1 /*******************************************************************************
3 Intel(R) Gigabit Ethernet Linux driver
4 Copyright(c) 2007-2012 Intel Corporation.
6 This program is free software; you can redistribute it and/or modify it
7 under the terms and conditions of the GNU General Public License,
8 version 2, as published by the Free Software Foundation.
10 This program is distributed in the hope it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
19 The full GNU General Public License is included in this distribution in
20 the file called "COPYING".
23 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
26 *******************************************************************************/
28 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
30 #include <linux/module.h>
31 #include <linux/types.h>
32 #include <linux/init.h>
33 #include <linux/bitops.h>
34 #include <linux/vmalloc.h>
35 #include <linux/pagemap.h>
36 #include <linux/netdevice.h>
37 #include <linux/ipv6.h>
38 #include <linux/slab.h>
39 #include <net/checksum.h>
40 #include <net/ip6_checksum.h>
41 #include <linux/net_tstamp.h>
42 #include <linux/mii.h>
43 #include <linux/ethtool.h>
45 #include <linux/if_vlan.h>
46 #include <linux/pci.h>
47 #include <linux/pci-aspm.h>
48 #include <linux/delay.h>
49 #include <linux/interrupt.h>
51 #include <linux/tcp.h>
52 #include <linux/sctp.h>
53 #include <linux/if_ether.h>
54 #include <linux/aer.h>
55 #include <linux/prefetch.h>
56 #include <linux/pm_runtime.h>
58 #include <linux/dca.h>
65 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
66 __stringify(BUILD) "-k"
67 char igb_driver_name
[] = "igb";
68 char igb_driver_version
[] = DRV_VERSION
;
69 static const char igb_driver_string
[] =
70 "Intel(R) Gigabit Ethernet Network Driver";
71 static const char igb_copyright
[] = "Copyright (c) 2007-2012 Intel Corporation.";
73 static const struct e1000_info
*igb_info_tbl
[] = {
74 [board_82575
] = &e1000_82575_info
,
77 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl
) = {
78 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_I350_COPPER
), board_82575
},
79 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_I350_FIBER
), board_82575
},
80 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_I350_SERDES
), board_82575
},
81 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_I350_SGMII
), board_82575
},
82 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82580_COPPER
), board_82575
},
83 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82580_FIBER
), board_82575
},
84 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82580_QUAD_FIBER
), board_82575
},
85 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82580_SERDES
), board_82575
},
86 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82580_SGMII
), board_82575
},
87 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82580_COPPER_DUAL
), board_82575
},
88 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_DH89XXCC_SGMII
), board_82575
},
89 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_DH89XXCC_SERDES
), board_82575
},
90 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_DH89XXCC_BACKPLANE
), board_82575
},
91 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_DH89XXCC_SFP
), board_82575
},
92 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82576
), board_82575
},
93 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82576_NS
), board_82575
},
94 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82576_NS_SERDES
), board_82575
},
95 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82576_FIBER
), board_82575
},
96 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82576_SERDES
), board_82575
},
97 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82576_SERDES_QUAD
), board_82575
},
98 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82576_QUAD_COPPER_ET2
), board_82575
},
99 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82576_QUAD_COPPER
), board_82575
},
100 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82575EB_COPPER
), board_82575
},
101 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82575EB_FIBER_SERDES
), board_82575
},
102 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82575GB_QUAD_COPPER
), board_82575
},
103 /* required last entry */
107 MODULE_DEVICE_TABLE(pci
, igb_pci_tbl
);
109 void igb_reset(struct igb_adapter
*);
110 static int igb_setup_all_tx_resources(struct igb_adapter
*);
111 static int igb_setup_all_rx_resources(struct igb_adapter
*);
112 static void igb_free_all_tx_resources(struct igb_adapter
*);
113 static void igb_free_all_rx_resources(struct igb_adapter
*);
114 static void igb_setup_mrqc(struct igb_adapter
*);
115 static int igb_probe(struct pci_dev
*, const struct pci_device_id
*);
116 static void __devexit
igb_remove(struct pci_dev
*pdev
);
117 static void igb_init_hw_timer(struct igb_adapter
*adapter
);
118 static int igb_sw_init(struct igb_adapter
*);
119 static int igb_open(struct net_device
*);
120 static int igb_close(struct net_device
*);
121 static void igb_configure_tx(struct igb_adapter
*);
122 static void igb_configure_rx(struct igb_adapter
*);
123 static void igb_clean_all_tx_rings(struct igb_adapter
*);
124 static void igb_clean_all_rx_rings(struct igb_adapter
*);
125 static void igb_clean_tx_ring(struct igb_ring
*);
126 static void igb_clean_rx_ring(struct igb_ring
*);
127 static void igb_set_rx_mode(struct net_device
*);
128 static void igb_update_phy_info(unsigned long);
129 static void igb_watchdog(unsigned long);
130 static void igb_watchdog_task(struct work_struct
*);
131 static netdev_tx_t
igb_xmit_frame(struct sk_buff
*skb
, struct net_device
*);
132 static struct rtnl_link_stats64
*igb_get_stats64(struct net_device
*dev
,
133 struct rtnl_link_stats64
*stats
);
134 static int igb_change_mtu(struct net_device
*, int);
135 static int igb_set_mac(struct net_device
*, void *);
136 static void igb_set_uta(struct igb_adapter
*adapter
);
137 static irqreturn_t
igb_intr(int irq
, void *);
138 static irqreturn_t
igb_intr_msi(int irq
, void *);
139 static irqreturn_t
igb_msix_other(int irq
, void *);
140 static irqreturn_t
igb_msix_ring(int irq
, void *);
141 #ifdef CONFIG_IGB_DCA
142 static void igb_update_dca(struct igb_q_vector
*);
143 static void igb_setup_dca(struct igb_adapter
*);
144 #endif /* CONFIG_IGB_DCA */
145 static int igb_poll(struct napi_struct
*, int);
146 static bool igb_clean_tx_irq(struct igb_q_vector
*);
147 static bool igb_clean_rx_irq(struct igb_q_vector
*, int);
148 static int igb_ioctl(struct net_device
*, struct ifreq
*, int cmd
);
149 static void igb_tx_timeout(struct net_device
*);
150 static void igb_reset_task(struct work_struct
*);
151 static void igb_vlan_mode(struct net_device
*netdev
, netdev_features_t features
);
152 static int igb_vlan_rx_add_vid(struct net_device
*, u16
);
153 static int igb_vlan_rx_kill_vid(struct net_device
*, u16
);
154 static void igb_restore_vlan(struct igb_adapter
*);
155 static void igb_rar_set_qsel(struct igb_adapter
*, u8
*, u32
, u8
);
156 static void igb_ping_all_vfs(struct igb_adapter
*);
157 static void igb_msg_task(struct igb_adapter
*);
158 static void igb_vmm_control(struct igb_adapter
*);
159 static int igb_set_vf_mac(struct igb_adapter
*, int, unsigned char *);
160 static void igb_restore_vf_multicasts(struct igb_adapter
*adapter
);
161 static int igb_ndo_set_vf_mac(struct net_device
*netdev
, int vf
, u8
*mac
);
162 static int igb_ndo_set_vf_vlan(struct net_device
*netdev
,
163 int vf
, u16 vlan
, u8 qos
);
164 static int igb_ndo_set_vf_bw(struct net_device
*netdev
, int vf
, int tx_rate
);
165 static int igb_ndo_get_vf_config(struct net_device
*netdev
, int vf
,
166 struct ifla_vf_info
*ivi
);
167 static void igb_check_vf_rate_limit(struct igb_adapter
*);
169 #ifdef CONFIG_PCI_IOV
170 static int igb_vf_configure(struct igb_adapter
*adapter
, int vf
);
171 static int igb_find_enabled_vfs(struct igb_adapter
*adapter
);
172 static int igb_check_vf_assignment(struct igb_adapter
*adapter
);
176 #ifdef CONFIG_PM_SLEEP
177 static int igb_suspend(struct device
*);
179 static int igb_resume(struct device
*);
180 #ifdef CONFIG_PM_RUNTIME
181 static int igb_runtime_suspend(struct device
*dev
);
182 static int igb_runtime_resume(struct device
*dev
);
183 static int igb_runtime_idle(struct device
*dev
);
185 static const struct dev_pm_ops igb_pm_ops
= {
186 SET_SYSTEM_SLEEP_PM_OPS(igb_suspend
, igb_resume
)
187 SET_RUNTIME_PM_OPS(igb_runtime_suspend
, igb_runtime_resume
,
191 static void igb_shutdown(struct pci_dev
*);
192 #ifdef CONFIG_IGB_DCA
193 static int igb_notify_dca(struct notifier_block
*, unsigned long, void *);
194 static struct notifier_block dca_notifier
= {
195 .notifier_call
= igb_notify_dca
,
200 #ifdef CONFIG_NET_POLL_CONTROLLER
201 /* for netdump / net console */
202 static void igb_netpoll(struct net_device
*);
204 #ifdef CONFIG_PCI_IOV
205 static unsigned int max_vfs
= 0;
206 module_param(max_vfs
, uint
, 0);
207 MODULE_PARM_DESC(max_vfs
, "Maximum number of virtual functions to allocate "
208 "per physical function");
209 #endif /* CONFIG_PCI_IOV */
211 static pci_ers_result_t
igb_io_error_detected(struct pci_dev
*,
212 pci_channel_state_t
);
213 static pci_ers_result_t
igb_io_slot_reset(struct pci_dev
*);
214 static void igb_io_resume(struct pci_dev
*);
216 static struct pci_error_handlers igb_err_handler
= {
217 .error_detected
= igb_io_error_detected
,
218 .slot_reset
= igb_io_slot_reset
,
219 .resume
= igb_io_resume
,
222 static void igb_init_dmac(struct igb_adapter
*adapter
, u32 pba
);
224 static struct pci_driver igb_driver
= {
225 .name
= igb_driver_name
,
226 .id_table
= igb_pci_tbl
,
228 .remove
= __devexit_p(igb_remove
),
230 .driver
.pm
= &igb_pm_ops
,
232 .shutdown
= igb_shutdown
,
233 .err_handler
= &igb_err_handler
236 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
237 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
238 MODULE_LICENSE("GPL");
239 MODULE_VERSION(DRV_VERSION
);
241 struct igb_reg_info
{
246 static const struct igb_reg_info igb_reg_info_tbl
[] = {
248 /* General Registers */
249 {E1000_CTRL
, "CTRL"},
250 {E1000_STATUS
, "STATUS"},
251 {E1000_CTRL_EXT
, "CTRL_EXT"},
253 /* Interrupt Registers */
257 {E1000_RCTL
, "RCTL"},
258 {E1000_RDLEN(0), "RDLEN"},
259 {E1000_RDH(0), "RDH"},
260 {E1000_RDT(0), "RDT"},
261 {E1000_RXDCTL(0), "RXDCTL"},
262 {E1000_RDBAL(0), "RDBAL"},
263 {E1000_RDBAH(0), "RDBAH"},
266 {E1000_TCTL
, "TCTL"},
267 {E1000_TDBAL(0), "TDBAL"},
268 {E1000_TDBAH(0), "TDBAH"},
269 {E1000_TDLEN(0), "TDLEN"},
270 {E1000_TDH(0), "TDH"},
271 {E1000_TDT(0), "TDT"},
272 {E1000_TXDCTL(0), "TXDCTL"},
273 {E1000_TDFH
, "TDFH"},
274 {E1000_TDFT
, "TDFT"},
275 {E1000_TDFHS
, "TDFHS"},
276 {E1000_TDFPC
, "TDFPC"},
278 /* List Terminator */
283 * igb_regdump - register printout routine
285 static void igb_regdump(struct e1000_hw
*hw
, struct igb_reg_info
*reginfo
)
291 switch (reginfo
->ofs
) {
293 for (n
= 0; n
< 4; n
++)
294 regs
[n
] = rd32(E1000_RDLEN(n
));
297 for (n
= 0; n
< 4; n
++)
298 regs
[n
] = rd32(E1000_RDH(n
));
301 for (n
= 0; n
< 4; n
++)
302 regs
[n
] = rd32(E1000_RDT(n
));
304 case E1000_RXDCTL(0):
305 for (n
= 0; n
< 4; n
++)
306 regs
[n
] = rd32(E1000_RXDCTL(n
));
309 for (n
= 0; n
< 4; n
++)
310 regs
[n
] = rd32(E1000_RDBAL(n
));
313 for (n
= 0; n
< 4; n
++)
314 regs
[n
] = rd32(E1000_RDBAH(n
));
317 for (n
= 0; n
< 4; n
++)
318 regs
[n
] = rd32(E1000_RDBAL(n
));
321 for (n
= 0; n
< 4; n
++)
322 regs
[n
] = rd32(E1000_TDBAH(n
));
325 for (n
= 0; n
< 4; n
++)
326 regs
[n
] = rd32(E1000_TDLEN(n
));
329 for (n
= 0; n
< 4; n
++)
330 regs
[n
] = rd32(E1000_TDH(n
));
333 for (n
= 0; n
< 4; n
++)
334 regs
[n
] = rd32(E1000_TDT(n
));
336 case E1000_TXDCTL(0):
337 for (n
= 0; n
< 4; n
++)
338 regs
[n
] = rd32(E1000_TXDCTL(n
));
341 pr_info("%-15s %08x\n", reginfo
->name
, rd32(reginfo
->ofs
));
345 snprintf(rname
, 16, "%s%s", reginfo
->name
, "[0-3]");
346 pr_info("%-15s %08x %08x %08x %08x\n", rname
, regs
[0], regs
[1],
351 * igb_dump - Print registers, tx-rings and rx-rings
353 static void igb_dump(struct igb_adapter
*adapter
)
355 struct net_device
*netdev
= adapter
->netdev
;
356 struct e1000_hw
*hw
= &adapter
->hw
;
357 struct igb_reg_info
*reginfo
;
358 struct igb_ring
*tx_ring
;
359 union e1000_adv_tx_desc
*tx_desc
;
360 struct my_u0
{ u64 a
; u64 b
; } *u0
;
361 struct igb_ring
*rx_ring
;
362 union e1000_adv_rx_desc
*rx_desc
;
366 if (!netif_msg_hw(adapter
))
369 /* Print netdevice Info */
371 dev_info(&adapter
->pdev
->dev
, "Net device Info\n");
372 pr_info("Device Name state trans_start "
374 pr_info("%-15s %016lX %016lX %016lX\n", netdev
->name
,
375 netdev
->state
, netdev
->trans_start
, netdev
->last_rx
);
378 /* Print Registers */
379 dev_info(&adapter
->pdev
->dev
, "Register Dump\n");
380 pr_info(" Register Name Value\n");
381 for (reginfo
= (struct igb_reg_info
*)igb_reg_info_tbl
;
382 reginfo
->name
; reginfo
++) {
383 igb_regdump(hw
, reginfo
);
386 /* Print TX Ring Summary */
387 if (!netdev
|| !netif_running(netdev
))
390 dev_info(&adapter
->pdev
->dev
, "TX Rings Summary\n");
391 pr_info("Queue [NTU] [NTC] [bi(ntc)->dma ] leng ntw timestamp\n");
392 for (n
= 0; n
< adapter
->num_tx_queues
; n
++) {
393 struct igb_tx_buffer
*buffer_info
;
394 tx_ring
= adapter
->tx_ring
[n
];
395 buffer_info
= &tx_ring
->tx_buffer_info
[tx_ring
->next_to_clean
];
396 pr_info(" %5d %5X %5X %016llX %04X %p %016llX\n",
397 n
, tx_ring
->next_to_use
, tx_ring
->next_to_clean
,
398 (u64
)buffer_info
->dma
,
400 buffer_info
->next_to_watch
,
401 (u64
)buffer_info
->time_stamp
);
405 if (!netif_msg_tx_done(adapter
))
406 goto rx_ring_summary
;
408 dev_info(&adapter
->pdev
->dev
, "TX Rings Dump\n");
410 /* Transmit Descriptor Formats
412 * Advanced Transmit Descriptor
413 * +--------------------------------------------------------------+
414 * 0 | Buffer Address [63:0] |
415 * +--------------------------------------------------------------+
416 * 8 | PAYLEN | PORTS |CC|IDX | STA | DCMD |DTYP|MAC|RSV| DTALEN |
417 * +--------------------------------------------------------------+
418 * 63 46 45 40 39 38 36 35 32 31 24 15 0
421 for (n
= 0; n
< adapter
->num_tx_queues
; n
++) {
422 tx_ring
= adapter
->tx_ring
[n
];
423 pr_info("------------------------------------\n");
424 pr_info("TX QUEUE INDEX = %d\n", tx_ring
->queue_index
);
425 pr_info("------------------------------------\n");
426 pr_info("T [desc] [address 63:0 ] [PlPOCIStDDM Ln] "
427 "[bi->dma ] leng ntw timestamp "
430 for (i
= 0; tx_ring
->desc
&& (i
< tx_ring
->count
); i
++) {
431 const char *next_desc
;
432 struct igb_tx_buffer
*buffer_info
;
433 tx_desc
= IGB_TX_DESC(tx_ring
, i
);
434 buffer_info
= &tx_ring
->tx_buffer_info
[i
];
435 u0
= (struct my_u0
*)tx_desc
;
436 if (i
== tx_ring
->next_to_use
&&
437 i
== tx_ring
->next_to_clean
)
438 next_desc
= " NTC/U";
439 else if (i
== tx_ring
->next_to_use
)
441 else if (i
== tx_ring
->next_to_clean
)
446 pr_info("T [0x%03X] %016llX %016llX %016llX"
447 " %04X %p %016llX %p%s\n", i
,
450 (u64
)buffer_info
->dma
,
452 buffer_info
->next_to_watch
,
453 (u64
)buffer_info
->time_stamp
,
454 buffer_info
->skb
, next_desc
);
456 if (netif_msg_pktdata(adapter
) && buffer_info
->dma
!= 0)
457 print_hex_dump(KERN_INFO
, "",
459 16, 1, phys_to_virt(buffer_info
->dma
),
460 buffer_info
->length
, true);
464 /* Print RX Rings Summary */
466 dev_info(&adapter
->pdev
->dev
, "RX Rings Summary\n");
467 pr_info("Queue [NTU] [NTC]\n");
468 for (n
= 0; n
< adapter
->num_rx_queues
; n
++) {
469 rx_ring
= adapter
->rx_ring
[n
];
470 pr_info(" %5d %5X %5X\n",
471 n
, rx_ring
->next_to_use
, rx_ring
->next_to_clean
);
475 if (!netif_msg_rx_status(adapter
))
478 dev_info(&adapter
->pdev
->dev
, "RX Rings Dump\n");
480 /* Advanced Receive Descriptor (Read) Format
482 * +-----------------------------------------------------+
483 * 0 | Packet Buffer Address [63:1] |A0/NSE|
484 * +----------------------------------------------+------+
485 * 8 | Header Buffer Address [63:1] | DD |
486 * +-----------------------------------------------------+
489 * Advanced Receive Descriptor (Write-Back) Format
491 * 63 48 47 32 31 30 21 20 17 16 4 3 0
492 * +------------------------------------------------------+
493 * 0 | Packet IP |SPH| HDR_LEN | RSV|Packet| RSS |
494 * | Checksum Ident | | | | Type | Type |
495 * +------------------------------------------------------+
496 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
497 * +------------------------------------------------------+
498 * 63 48 47 32 31 20 19 0
501 for (n
= 0; n
< adapter
->num_rx_queues
; n
++) {
502 rx_ring
= adapter
->rx_ring
[n
];
503 pr_info("------------------------------------\n");
504 pr_info("RX QUEUE INDEX = %d\n", rx_ring
->queue_index
);
505 pr_info("------------------------------------\n");
506 pr_info("R [desc] [ PktBuf A0] [ HeadBuf DD] "
507 "[bi->dma ] [bi->skb] <-- Adv Rx Read format\n");
508 pr_info("RWB[desc] [PcsmIpSHl PtRs] [vl er S cks ln] -----"
509 "----------- [bi->skb] <-- Adv Rx Write-Back format\n");
511 for (i
= 0; i
< rx_ring
->count
; i
++) {
512 const char *next_desc
;
513 struct igb_rx_buffer
*buffer_info
;
514 buffer_info
= &rx_ring
->rx_buffer_info
[i
];
515 rx_desc
= IGB_RX_DESC(rx_ring
, i
);
516 u0
= (struct my_u0
*)rx_desc
;
517 staterr
= le32_to_cpu(rx_desc
->wb
.upper
.status_error
);
519 if (i
== rx_ring
->next_to_use
)
521 else if (i
== rx_ring
->next_to_clean
)
526 if (staterr
& E1000_RXD_STAT_DD
) {
527 /* Descriptor Done */
528 pr_info("%s[0x%03X] %016llX %016llX -------"
529 "--------- %p%s\n", "RWB", i
,
532 buffer_info
->skb
, next_desc
);
534 pr_info("%s[0x%03X] %016llX %016llX %016llX"
538 (u64
)buffer_info
->dma
,
539 buffer_info
->skb
, next_desc
);
541 if (netif_msg_pktdata(adapter
)) {
542 print_hex_dump(KERN_INFO
, "",
545 phys_to_virt(buffer_info
->dma
),
546 IGB_RX_HDR_LEN
, true);
547 print_hex_dump(KERN_INFO
, "",
551 buffer_info
->page_dma
+
552 buffer_info
->page_offset
),
565 * igb_read_clock - read raw cycle counter (to be used by time counter)
567 static cycle_t
igb_read_clock(const struct cyclecounter
*tc
)
569 struct igb_adapter
*adapter
=
570 container_of(tc
, struct igb_adapter
, cycles
);
571 struct e1000_hw
*hw
= &adapter
->hw
;
576 * The timestamp latches on lowest register read. For the 82580
577 * the lowest register is SYSTIMR instead of SYSTIML. However we never
578 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
580 if (hw
->mac
.type
>= e1000_82580
) {
581 stamp
= rd32(E1000_SYSTIMR
) >> 8;
582 shift
= IGB_82580_TSYNC_SHIFT
;
585 stamp
|= (u64
)rd32(E1000_SYSTIML
) << shift
;
586 stamp
|= (u64
)rd32(E1000_SYSTIMH
) << (shift
+ 32);
591 * igb_get_hw_dev - return device
592 * used by hardware layer to print debugging information
594 struct net_device
*igb_get_hw_dev(struct e1000_hw
*hw
)
596 struct igb_adapter
*adapter
= hw
->back
;
597 return adapter
->netdev
;
601 * igb_init_module - Driver Registration Routine
603 * igb_init_module is the first routine called when the driver is
604 * loaded. All it does is register with the PCI subsystem.
606 static int __init
igb_init_module(void)
609 pr_info("%s - version %s\n",
610 igb_driver_string
, igb_driver_version
);
612 pr_info("%s\n", igb_copyright
);
614 #ifdef CONFIG_IGB_DCA
615 dca_register_notify(&dca_notifier
);
617 ret
= pci_register_driver(&igb_driver
);
621 module_init(igb_init_module
);
624 * igb_exit_module - Driver Exit Cleanup Routine
626 * igb_exit_module is called just before the driver is removed
629 static void __exit
igb_exit_module(void)
631 #ifdef CONFIG_IGB_DCA
632 dca_unregister_notify(&dca_notifier
);
634 pci_unregister_driver(&igb_driver
);
637 module_exit(igb_exit_module
);
639 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
641 * igb_cache_ring_register - Descriptor ring to register mapping
642 * @adapter: board private structure to initialize
644 * Once we know the feature-set enabled for the device, we'll cache
645 * the register offset the descriptor ring is assigned to.
647 static void igb_cache_ring_register(struct igb_adapter
*adapter
)
650 u32 rbase_offset
= adapter
->vfs_allocated_count
;
652 switch (adapter
->hw
.mac
.type
) {
654 /* The queues are allocated for virtualization such that VF 0
655 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
656 * In order to avoid collision we start at the first free queue
657 * and continue consuming queues in the same sequence
659 if (adapter
->vfs_allocated_count
) {
660 for (; i
< adapter
->rss_queues
; i
++)
661 adapter
->rx_ring
[i
]->reg_idx
= rbase_offset
+
668 for (; i
< adapter
->num_rx_queues
; i
++)
669 adapter
->rx_ring
[i
]->reg_idx
= rbase_offset
+ i
;
670 for (; j
< adapter
->num_tx_queues
; j
++)
671 adapter
->tx_ring
[j
]->reg_idx
= rbase_offset
+ j
;
676 static void igb_free_queues(struct igb_adapter
*adapter
)
680 for (i
= 0; i
< adapter
->num_tx_queues
; i
++) {
681 kfree(adapter
->tx_ring
[i
]);
682 adapter
->tx_ring
[i
] = NULL
;
684 for (i
= 0; i
< adapter
->num_rx_queues
; i
++) {
685 kfree(adapter
->rx_ring
[i
]);
686 adapter
->rx_ring
[i
] = NULL
;
688 adapter
->num_rx_queues
= 0;
689 adapter
->num_tx_queues
= 0;
693 * igb_alloc_queues - Allocate memory for all rings
694 * @adapter: board private structure to initialize
696 * We allocate one ring per queue at run-time since we don't know the
697 * number of queues at compile-time.
699 static int igb_alloc_queues(struct igb_adapter
*adapter
)
701 struct igb_ring
*ring
;
703 int orig_node
= adapter
->node
;
705 for (i
= 0; i
< adapter
->num_tx_queues
; i
++) {
706 if (orig_node
== -1) {
707 int cur_node
= next_online_node(adapter
->node
);
708 if (cur_node
== MAX_NUMNODES
)
709 cur_node
= first_online_node
;
710 adapter
->node
= cur_node
;
712 ring
= kzalloc_node(sizeof(struct igb_ring
), GFP_KERNEL
,
715 ring
= kzalloc(sizeof(struct igb_ring
), GFP_KERNEL
);
718 ring
->count
= adapter
->tx_ring_count
;
719 ring
->queue_index
= i
;
720 ring
->dev
= &adapter
->pdev
->dev
;
721 ring
->netdev
= adapter
->netdev
;
722 ring
->numa_node
= adapter
->node
;
723 /* For 82575, context index must be unique per ring. */
724 if (adapter
->hw
.mac
.type
== e1000_82575
)
725 set_bit(IGB_RING_FLAG_TX_CTX_IDX
, &ring
->flags
);
726 adapter
->tx_ring
[i
] = ring
;
728 /* Restore the adapter's original node */
729 adapter
->node
= orig_node
;
731 for (i
= 0; i
< adapter
->num_rx_queues
; i
++) {
732 if (orig_node
== -1) {
733 int cur_node
= next_online_node(adapter
->node
);
734 if (cur_node
== MAX_NUMNODES
)
735 cur_node
= first_online_node
;
736 adapter
->node
= cur_node
;
738 ring
= kzalloc_node(sizeof(struct igb_ring
), GFP_KERNEL
,
741 ring
= kzalloc(sizeof(struct igb_ring
), GFP_KERNEL
);
744 ring
->count
= adapter
->rx_ring_count
;
745 ring
->queue_index
= i
;
746 ring
->dev
= &adapter
->pdev
->dev
;
747 ring
->netdev
= adapter
->netdev
;
748 ring
->numa_node
= adapter
->node
;
749 /* set flag indicating ring supports SCTP checksum offload */
750 if (adapter
->hw
.mac
.type
>= e1000_82576
)
751 set_bit(IGB_RING_FLAG_RX_SCTP_CSUM
, &ring
->flags
);
753 /* On i350, loopback VLAN packets have the tag byte-swapped. */
754 if (adapter
->hw
.mac
.type
== e1000_i350
)
755 set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP
, &ring
->flags
);
757 adapter
->rx_ring
[i
] = ring
;
759 /* Restore the adapter's original node */
760 adapter
->node
= orig_node
;
762 igb_cache_ring_register(adapter
);
767 /* Restore the adapter's original node */
768 adapter
->node
= orig_node
;
769 igb_free_queues(adapter
);
775 * igb_write_ivar - configure ivar for given MSI-X vector
776 * @hw: pointer to the HW structure
777 * @msix_vector: vector number we are allocating to a given ring
778 * @index: row index of IVAR register to write within IVAR table
779 * @offset: column offset of in IVAR, should be multiple of 8
781 * This function is intended to handle the writing of the IVAR register
782 * for adapters 82576 and newer. The IVAR table consists of 2 columns,
783 * each containing an cause allocation for an Rx and Tx ring, and a
784 * variable number of rows depending on the number of queues supported.
786 static void igb_write_ivar(struct e1000_hw
*hw
, int msix_vector
,
787 int index
, int offset
)
789 u32 ivar
= array_rd32(E1000_IVAR0
, index
);
791 /* clear any bits that are currently set */
792 ivar
&= ~((u32
)0xFF << offset
);
794 /* write vector and valid bit */
795 ivar
|= (msix_vector
| E1000_IVAR_VALID
) << offset
;
797 array_wr32(E1000_IVAR0
, index
, ivar
);
800 #define IGB_N0_QUEUE -1
801 static void igb_assign_vector(struct igb_q_vector
*q_vector
, int msix_vector
)
803 struct igb_adapter
*adapter
= q_vector
->adapter
;
804 struct e1000_hw
*hw
= &adapter
->hw
;
805 int rx_queue
= IGB_N0_QUEUE
;
806 int tx_queue
= IGB_N0_QUEUE
;
809 if (q_vector
->rx
.ring
)
810 rx_queue
= q_vector
->rx
.ring
->reg_idx
;
811 if (q_vector
->tx
.ring
)
812 tx_queue
= q_vector
->tx
.ring
->reg_idx
;
814 switch (hw
->mac
.type
) {
816 /* The 82575 assigns vectors using a bitmask, which matches the
817 bitmask for the EICR/EIMS/EIMC registers. To assign one
818 or more queues to a vector, we write the appropriate bits
819 into the MSIXBM register for that vector. */
820 if (rx_queue
> IGB_N0_QUEUE
)
821 msixbm
= E1000_EICR_RX_QUEUE0
<< rx_queue
;
822 if (tx_queue
> IGB_N0_QUEUE
)
823 msixbm
|= E1000_EICR_TX_QUEUE0
<< tx_queue
;
824 if (!adapter
->msix_entries
&& msix_vector
== 0)
825 msixbm
|= E1000_EIMS_OTHER
;
826 array_wr32(E1000_MSIXBM(0), msix_vector
, msixbm
);
827 q_vector
->eims_value
= msixbm
;
831 * 82576 uses a table that essentially consists of 2 columns
832 * with 8 rows. The ordering is column-major so we use the
833 * lower 3 bits as the row index, and the 4th bit as the
836 if (rx_queue
> IGB_N0_QUEUE
)
837 igb_write_ivar(hw
, msix_vector
,
839 (rx_queue
& 0x8) << 1);
840 if (tx_queue
> IGB_N0_QUEUE
)
841 igb_write_ivar(hw
, msix_vector
,
843 ((tx_queue
& 0x8) << 1) + 8);
844 q_vector
->eims_value
= 1 << msix_vector
;
849 * On 82580 and newer adapters the scheme is similar to 82576
850 * however instead of ordering column-major we have things
851 * ordered row-major. So we traverse the table by using
852 * bit 0 as the column offset, and the remaining bits as the
855 if (rx_queue
> IGB_N0_QUEUE
)
856 igb_write_ivar(hw
, msix_vector
,
858 (rx_queue
& 0x1) << 4);
859 if (tx_queue
> IGB_N0_QUEUE
)
860 igb_write_ivar(hw
, msix_vector
,
862 ((tx_queue
& 0x1) << 4) + 8);
863 q_vector
->eims_value
= 1 << msix_vector
;
870 /* add q_vector eims value to global eims_enable_mask */
871 adapter
->eims_enable_mask
|= q_vector
->eims_value
;
873 /* configure q_vector to set itr on first interrupt */
874 q_vector
->set_itr
= 1;
878 * igb_configure_msix - Configure MSI-X hardware
880 * igb_configure_msix sets up the hardware to properly
881 * generate MSI-X interrupts.
883 static void igb_configure_msix(struct igb_adapter
*adapter
)
887 struct e1000_hw
*hw
= &adapter
->hw
;
889 adapter
->eims_enable_mask
= 0;
891 /* set vector for other causes, i.e. link changes */
892 switch (hw
->mac
.type
) {
894 tmp
= rd32(E1000_CTRL_EXT
);
895 /* enable MSI-X PBA support*/
896 tmp
|= E1000_CTRL_EXT_PBA_CLR
;
898 /* Auto-Mask interrupts upon ICR read. */
899 tmp
|= E1000_CTRL_EXT_EIAME
;
900 tmp
|= E1000_CTRL_EXT_IRCA
;
902 wr32(E1000_CTRL_EXT
, tmp
);
904 /* enable msix_other interrupt */
905 array_wr32(E1000_MSIXBM(0), vector
++,
907 adapter
->eims_other
= E1000_EIMS_OTHER
;
914 /* Turn on MSI-X capability first, or our settings
915 * won't stick. And it will take days to debug. */
916 wr32(E1000_GPIE
, E1000_GPIE_MSIX_MODE
|
917 E1000_GPIE_PBA
| E1000_GPIE_EIAME
|
920 /* enable msix_other interrupt */
921 adapter
->eims_other
= 1 << vector
;
922 tmp
= (vector
++ | E1000_IVAR_VALID
) << 8;
924 wr32(E1000_IVAR_MISC
, tmp
);
927 /* do nothing, since nothing else supports MSI-X */
929 } /* switch (hw->mac.type) */
931 adapter
->eims_enable_mask
|= adapter
->eims_other
;
933 for (i
= 0; i
< adapter
->num_q_vectors
; i
++)
934 igb_assign_vector(adapter
->q_vector
[i
], vector
++);
940 * igb_request_msix - Initialize MSI-X interrupts
942 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
945 static int igb_request_msix(struct igb_adapter
*adapter
)
947 struct net_device
*netdev
= adapter
->netdev
;
948 struct e1000_hw
*hw
= &adapter
->hw
;
949 int i
, err
= 0, vector
= 0;
951 err
= request_irq(adapter
->msix_entries
[vector
].vector
,
952 igb_msix_other
, 0, netdev
->name
, adapter
);
957 for (i
= 0; i
< adapter
->num_q_vectors
; i
++) {
958 struct igb_q_vector
*q_vector
= adapter
->q_vector
[i
];
960 q_vector
->itr_register
= hw
->hw_addr
+ E1000_EITR(vector
);
962 if (q_vector
->rx
.ring
&& q_vector
->tx
.ring
)
963 sprintf(q_vector
->name
, "%s-TxRx-%u", netdev
->name
,
964 q_vector
->rx
.ring
->queue_index
);
965 else if (q_vector
->tx
.ring
)
966 sprintf(q_vector
->name
, "%s-tx-%u", netdev
->name
,
967 q_vector
->tx
.ring
->queue_index
);
968 else if (q_vector
->rx
.ring
)
969 sprintf(q_vector
->name
, "%s-rx-%u", netdev
->name
,
970 q_vector
->rx
.ring
->queue_index
);
972 sprintf(q_vector
->name
, "%s-unused", netdev
->name
);
974 err
= request_irq(adapter
->msix_entries
[vector
].vector
,
975 igb_msix_ring
, 0, q_vector
->name
,
982 igb_configure_msix(adapter
);
988 static void igb_reset_interrupt_capability(struct igb_adapter
*adapter
)
990 if (adapter
->msix_entries
) {
991 pci_disable_msix(adapter
->pdev
);
992 kfree(adapter
->msix_entries
);
993 adapter
->msix_entries
= NULL
;
994 } else if (adapter
->flags
& IGB_FLAG_HAS_MSI
) {
995 pci_disable_msi(adapter
->pdev
);
1000 * igb_free_q_vectors - Free memory allocated for interrupt vectors
1001 * @adapter: board private structure to initialize
1003 * This function frees the memory allocated to the q_vectors. In addition if
1004 * NAPI is enabled it will delete any references to the NAPI struct prior
1005 * to freeing the q_vector.
1007 static void igb_free_q_vectors(struct igb_adapter
*adapter
)
1011 for (v_idx
= 0; v_idx
< adapter
->num_q_vectors
; v_idx
++) {
1012 struct igb_q_vector
*q_vector
= adapter
->q_vector
[v_idx
];
1013 adapter
->q_vector
[v_idx
] = NULL
;
1016 netif_napi_del(&q_vector
->napi
);
1019 adapter
->num_q_vectors
= 0;
1023 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
1025 * This function resets the device so that it has 0 rx queues, tx queues, and
1026 * MSI-X interrupts allocated.
1028 static void igb_clear_interrupt_scheme(struct igb_adapter
*adapter
)
1030 igb_free_queues(adapter
);
1031 igb_free_q_vectors(adapter
);
1032 igb_reset_interrupt_capability(adapter
);
1036 * igb_set_interrupt_capability - set MSI or MSI-X if supported
1038 * Attempt to configure interrupts using the best available
1039 * capabilities of the hardware and kernel.
1041 static int igb_set_interrupt_capability(struct igb_adapter
*adapter
)
1046 /* Number of supported queues. */
1047 adapter
->num_rx_queues
= adapter
->rss_queues
;
1048 if (adapter
->vfs_allocated_count
)
1049 adapter
->num_tx_queues
= 1;
1051 adapter
->num_tx_queues
= adapter
->rss_queues
;
1053 /* start with one vector for every rx queue */
1054 numvecs
= adapter
->num_rx_queues
;
1056 /* if tx handler is separate add 1 for every tx queue */
1057 if (!(adapter
->flags
& IGB_FLAG_QUEUE_PAIRS
))
1058 numvecs
+= adapter
->num_tx_queues
;
1060 /* store the number of vectors reserved for queues */
1061 adapter
->num_q_vectors
= numvecs
;
1063 /* add 1 vector for link status interrupts */
1065 adapter
->msix_entries
= kcalloc(numvecs
, sizeof(struct msix_entry
),
1067 if (!adapter
->msix_entries
)
1070 for (i
= 0; i
< numvecs
; i
++)
1071 adapter
->msix_entries
[i
].entry
= i
;
1073 err
= pci_enable_msix(adapter
->pdev
,
1074 adapter
->msix_entries
,
1079 igb_reset_interrupt_capability(adapter
);
1081 /* If we can't do MSI-X, try MSI */
1083 #ifdef CONFIG_PCI_IOV
1084 /* disable SR-IOV for non MSI-X configurations */
1085 if (adapter
->vf_data
) {
1086 struct e1000_hw
*hw
= &adapter
->hw
;
1087 /* disable iov and allow time for transactions to clear */
1088 pci_disable_sriov(adapter
->pdev
);
1091 kfree(adapter
->vf_data
);
1092 adapter
->vf_data
= NULL
;
1093 wr32(E1000_IOVCTL
, E1000_IOVCTL_REUSE_VFQ
);
1096 dev_info(&adapter
->pdev
->dev
, "IOV Disabled\n");
1099 adapter
->vfs_allocated_count
= 0;
1100 adapter
->rss_queues
= 1;
1101 adapter
->flags
|= IGB_FLAG_QUEUE_PAIRS
;
1102 adapter
->num_rx_queues
= 1;
1103 adapter
->num_tx_queues
= 1;
1104 adapter
->num_q_vectors
= 1;
1105 if (!pci_enable_msi(adapter
->pdev
))
1106 adapter
->flags
|= IGB_FLAG_HAS_MSI
;
1108 /* Notify the stack of the (possibly) reduced queue counts. */
1109 netif_set_real_num_tx_queues(adapter
->netdev
, adapter
->num_tx_queues
);
1110 return netif_set_real_num_rx_queues(adapter
->netdev
,
1111 adapter
->num_rx_queues
);
1115 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1116 * @adapter: board private structure to initialize
1118 * We allocate one q_vector per queue interrupt. If allocation fails we
1121 static int igb_alloc_q_vectors(struct igb_adapter
*adapter
)
1123 struct igb_q_vector
*q_vector
;
1124 struct e1000_hw
*hw
= &adapter
->hw
;
1126 int orig_node
= adapter
->node
;
1128 for (v_idx
= 0; v_idx
< adapter
->num_q_vectors
; v_idx
++) {
1129 if ((adapter
->num_q_vectors
== (adapter
->num_rx_queues
+
1130 adapter
->num_tx_queues
)) &&
1131 (adapter
->num_rx_queues
== v_idx
))
1132 adapter
->node
= orig_node
;
1133 if (orig_node
== -1) {
1134 int cur_node
= next_online_node(adapter
->node
);
1135 if (cur_node
== MAX_NUMNODES
)
1136 cur_node
= first_online_node
;
1137 adapter
->node
= cur_node
;
1139 q_vector
= kzalloc_node(sizeof(struct igb_q_vector
), GFP_KERNEL
,
1142 q_vector
= kzalloc(sizeof(struct igb_q_vector
),
1146 q_vector
->adapter
= adapter
;
1147 q_vector
->itr_register
= hw
->hw_addr
+ E1000_EITR(0);
1148 q_vector
->itr_val
= IGB_START_ITR
;
1149 netif_napi_add(adapter
->netdev
, &q_vector
->napi
, igb_poll
, 64);
1150 adapter
->q_vector
[v_idx
] = q_vector
;
1152 /* Restore the adapter's original node */
1153 adapter
->node
= orig_node
;
1158 /* Restore the adapter's original node */
1159 adapter
->node
= orig_node
;
1160 igb_free_q_vectors(adapter
);
1164 static void igb_map_rx_ring_to_vector(struct igb_adapter
*adapter
,
1165 int ring_idx
, int v_idx
)
1167 struct igb_q_vector
*q_vector
= adapter
->q_vector
[v_idx
];
1169 q_vector
->rx
.ring
= adapter
->rx_ring
[ring_idx
];
1170 q_vector
->rx
.ring
->q_vector
= q_vector
;
1171 q_vector
->rx
.count
++;
1172 q_vector
->itr_val
= adapter
->rx_itr_setting
;
1173 if (q_vector
->itr_val
&& q_vector
->itr_val
<= 3)
1174 q_vector
->itr_val
= IGB_START_ITR
;
1177 static void igb_map_tx_ring_to_vector(struct igb_adapter
*adapter
,
1178 int ring_idx
, int v_idx
)
1180 struct igb_q_vector
*q_vector
= adapter
->q_vector
[v_idx
];
1182 q_vector
->tx
.ring
= adapter
->tx_ring
[ring_idx
];
1183 q_vector
->tx
.ring
->q_vector
= q_vector
;
1184 q_vector
->tx
.count
++;
1185 q_vector
->itr_val
= adapter
->tx_itr_setting
;
1186 q_vector
->tx
.work_limit
= adapter
->tx_work_limit
;
1187 if (q_vector
->itr_val
&& q_vector
->itr_val
<= 3)
1188 q_vector
->itr_val
= IGB_START_ITR
;
1192 * igb_map_ring_to_vector - maps allocated queues to vectors
1194 * This function maps the recently allocated queues to vectors.
1196 static int igb_map_ring_to_vector(struct igb_adapter
*adapter
)
1201 if ((adapter
->num_q_vectors
< adapter
->num_rx_queues
) ||
1202 (adapter
->num_q_vectors
< adapter
->num_tx_queues
))
1205 if (adapter
->num_q_vectors
>=
1206 (adapter
->num_rx_queues
+ adapter
->num_tx_queues
)) {
1207 for (i
= 0; i
< adapter
->num_rx_queues
; i
++)
1208 igb_map_rx_ring_to_vector(adapter
, i
, v_idx
++);
1209 for (i
= 0; i
< adapter
->num_tx_queues
; i
++)
1210 igb_map_tx_ring_to_vector(adapter
, i
, v_idx
++);
1212 for (i
= 0; i
< adapter
->num_rx_queues
; i
++) {
1213 if (i
< adapter
->num_tx_queues
)
1214 igb_map_tx_ring_to_vector(adapter
, i
, v_idx
);
1215 igb_map_rx_ring_to_vector(adapter
, i
, v_idx
++);
1217 for (; i
< adapter
->num_tx_queues
; i
++)
1218 igb_map_tx_ring_to_vector(adapter
, i
, v_idx
++);
1224 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1226 * This function initializes the interrupts and allocates all of the queues.
1228 static int igb_init_interrupt_scheme(struct igb_adapter
*adapter
)
1230 struct pci_dev
*pdev
= adapter
->pdev
;
1233 err
= igb_set_interrupt_capability(adapter
);
1237 err
= igb_alloc_q_vectors(adapter
);
1239 dev_err(&pdev
->dev
, "Unable to allocate memory for vectors\n");
1240 goto err_alloc_q_vectors
;
1243 err
= igb_alloc_queues(adapter
);
1245 dev_err(&pdev
->dev
, "Unable to allocate memory for queues\n");
1246 goto err_alloc_queues
;
1249 err
= igb_map_ring_to_vector(adapter
);
1251 dev_err(&pdev
->dev
, "Invalid q_vector to ring mapping\n");
1252 goto err_map_queues
;
1258 igb_free_queues(adapter
);
1260 igb_free_q_vectors(adapter
);
1261 err_alloc_q_vectors
:
1262 igb_reset_interrupt_capability(adapter
);
1267 * igb_request_irq - initialize interrupts
1269 * Attempts to configure interrupts using the best available
1270 * capabilities of the hardware and kernel.
1272 static int igb_request_irq(struct igb_adapter
*adapter
)
1274 struct net_device
*netdev
= adapter
->netdev
;
1275 struct pci_dev
*pdev
= adapter
->pdev
;
1278 if (adapter
->msix_entries
) {
1279 err
= igb_request_msix(adapter
);
1282 /* fall back to MSI */
1283 igb_clear_interrupt_scheme(adapter
);
1284 if (!pci_enable_msi(pdev
))
1285 adapter
->flags
|= IGB_FLAG_HAS_MSI
;
1286 igb_free_all_tx_resources(adapter
);
1287 igb_free_all_rx_resources(adapter
);
1288 adapter
->num_tx_queues
= 1;
1289 adapter
->num_rx_queues
= 1;
1290 adapter
->num_q_vectors
= 1;
1291 err
= igb_alloc_q_vectors(adapter
);
1294 "Unable to allocate memory for vectors\n");
1297 err
= igb_alloc_queues(adapter
);
1300 "Unable to allocate memory for queues\n");
1301 igb_free_q_vectors(adapter
);
1304 igb_setup_all_tx_resources(adapter
);
1305 igb_setup_all_rx_resources(adapter
);
1308 igb_assign_vector(adapter
->q_vector
[0], 0);
1310 if (adapter
->flags
& IGB_FLAG_HAS_MSI
) {
1311 err
= request_irq(pdev
->irq
, igb_intr_msi
, 0,
1312 netdev
->name
, adapter
);
1316 /* fall back to legacy interrupts */
1317 igb_reset_interrupt_capability(adapter
);
1318 adapter
->flags
&= ~IGB_FLAG_HAS_MSI
;
1321 err
= request_irq(pdev
->irq
, igb_intr
, IRQF_SHARED
,
1322 netdev
->name
, adapter
);
1325 dev_err(&pdev
->dev
, "Error %d getting interrupt\n",
1332 static void igb_free_irq(struct igb_adapter
*adapter
)
1334 if (adapter
->msix_entries
) {
1337 free_irq(adapter
->msix_entries
[vector
++].vector
, adapter
);
1339 for (i
= 0; i
< adapter
->num_q_vectors
; i
++)
1340 free_irq(adapter
->msix_entries
[vector
++].vector
,
1341 adapter
->q_vector
[i
]);
1343 free_irq(adapter
->pdev
->irq
, adapter
);
1348 * igb_irq_disable - Mask off interrupt generation on the NIC
1349 * @adapter: board private structure
1351 static void igb_irq_disable(struct igb_adapter
*adapter
)
1353 struct e1000_hw
*hw
= &adapter
->hw
;
1356 * we need to be careful when disabling interrupts. The VFs are also
1357 * mapped into these registers and so clearing the bits can cause
1358 * issues on the VF drivers so we only need to clear what we set
1360 if (adapter
->msix_entries
) {
1361 u32 regval
= rd32(E1000_EIAM
);
1362 wr32(E1000_EIAM
, regval
& ~adapter
->eims_enable_mask
);
1363 wr32(E1000_EIMC
, adapter
->eims_enable_mask
);
1364 regval
= rd32(E1000_EIAC
);
1365 wr32(E1000_EIAC
, regval
& ~adapter
->eims_enable_mask
);
1369 wr32(E1000_IMC
, ~0);
1371 if (adapter
->msix_entries
) {
1373 for (i
= 0; i
< adapter
->num_q_vectors
; i
++)
1374 synchronize_irq(adapter
->msix_entries
[i
].vector
);
1376 synchronize_irq(adapter
->pdev
->irq
);
1381 * igb_irq_enable - Enable default interrupt generation settings
1382 * @adapter: board private structure
1384 static void igb_irq_enable(struct igb_adapter
*adapter
)
1386 struct e1000_hw
*hw
= &adapter
->hw
;
1388 if (adapter
->msix_entries
) {
1389 u32 ims
= E1000_IMS_LSC
| E1000_IMS_DOUTSYNC
| E1000_IMS_DRSTA
;
1390 u32 regval
= rd32(E1000_EIAC
);
1391 wr32(E1000_EIAC
, regval
| adapter
->eims_enable_mask
);
1392 regval
= rd32(E1000_EIAM
);
1393 wr32(E1000_EIAM
, regval
| adapter
->eims_enable_mask
);
1394 wr32(E1000_EIMS
, adapter
->eims_enable_mask
);
1395 if (adapter
->vfs_allocated_count
) {
1396 wr32(E1000_MBVFIMR
, 0xFF);
1397 ims
|= E1000_IMS_VMMB
;
1399 wr32(E1000_IMS
, ims
);
1401 wr32(E1000_IMS
, IMS_ENABLE_MASK
|
1403 wr32(E1000_IAM
, IMS_ENABLE_MASK
|
1408 static void igb_update_mng_vlan(struct igb_adapter
*adapter
)
1410 struct e1000_hw
*hw
= &adapter
->hw
;
1411 u16 vid
= adapter
->hw
.mng_cookie
.vlan_id
;
1412 u16 old_vid
= adapter
->mng_vlan_id
;
1414 if (hw
->mng_cookie
.status
& E1000_MNG_DHCP_COOKIE_STATUS_VLAN
) {
1415 /* add VID to filter table */
1416 igb_vfta_set(hw
, vid
, true);
1417 adapter
->mng_vlan_id
= vid
;
1419 adapter
->mng_vlan_id
= IGB_MNG_VLAN_NONE
;
1422 if ((old_vid
!= (u16
)IGB_MNG_VLAN_NONE
) &&
1424 !test_bit(old_vid
, adapter
->active_vlans
)) {
1425 /* remove VID from filter table */
1426 igb_vfta_set(hw
, old_vid
, false);
1431 * igb_release_hw_control - release control of the h/w to f/w
1432 * @adapter: address of board private structure
1434 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1435 * For ASF and Pass Through versions of f/w this means that the
1436 * driver is no longer loaded.
1439 static void igb_release_hw_control(struct igb_adapter
*adapter
)
1441 struct e1000_hw
*hw
= &adapter
->hw
;
1444 /* Let firmware take over control of h/w */
1445 ctrl_ext
= rd32(E1000_CTRL_EXT
);
1446 wr32(E1000_CTRL_EXT
,
1447 ctrl_ext
& ~E1000_CTRL_EXT_DRV_LOAD
);
1451 * igb_get_hw_control - get control of the h/w from f/w
1452 * @adapter: address of board private structure
1454 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1455 * For ASF and Pass Through versions of f/w this means that
1456 * the driver is loaded.
1459 static void igb_get_hw_control(struct igb_adapter
*adapter
)
1461 struct e1000_hw
*hw
= &adapter
->hw
;
1464 /* Let firmware know the driver has taken over */
1465 ctrl_ext
= rd32(E1000_CTRL_EXT
);
1466 wr32(E1000_CTRL_EXT
,
1467 ctrl_ext
| E1000_CTRL_EXT_DRV_LOAD
);
1471 * igb_configure - configure the hardware for RX and TX
1472 * @adapter: private board structure
1474 static void igb_configure(struct igb_adapter
*adapter
)
1476 struct net_device
*netdev
= adapter
->netdev
;
1479 igb_get_hw_control(adapter
);
1480 igb_set_rx_mode(netdev
);
1482 igb_restore_vlan(adapter
);
1484 igb_setup_tctl(adapter
);
1485 igb_setup_mrqc(adapter
);
1486 igb_setup_rctl(adapter
);
1488 igb_configure_tx(adapter
);
1489 igb_configure_rx(adapter
);
1491 igb_rx_fifo_flush_82575(&adapter
->hw
);
1493 /* call igb_desc_unused which always leaves
1494 * at least 1 descriptor unused to make sure
1495 * next_to_use != next_to_clean */
1496 for (i
= 0; i
< adapter
->num_rx_queues
; i
++) {
1497 struct igb_ring
*ring
= adapter
->rx_ring
[i
];
1498 igb_alloc_rx_buffers(ring
, igb_desc_unused(ring
));
1503 * igb_power_up_link - Power up the phy/serdes link
1504 * @adapter: address of board private structure
1506 void igb_power_up_link(struct igb_adapter
*adapter
)
1508 if (adapter
->hw
.phy
.media_type
== e1000_media_type_copper
)
1509 igb_power_up_phy_copper(&adapter
->hw
);
1511 igb_power_up_serdes_link_82575(&adapter
->hw
);
1512 igb_reset_phy(&adapter
->hw
);
1516 * igb_power_down_link - Power down the phy/serdes link
1517 * @adapter: address of board private structure
1519 static void igb_power_down_link(struct igb_adapter
*adapter
)
1521 if (adapter
->hw
.phy
.media_type
== e1000_media_type_copper
)
1522 igb_power_down_phy_copper_82575(&adapter
->hw
);
1524 igb_shutdown_serdes_link_82575(&adapter
->hw
);
1528 * igb_up - Open the interface and prepare it to handle traffic
1529 * @adapter: board private structure
1531 int igb_up(struct igb_adapter
*adapter
)
1533 struct e1000_hw
*hw
= &adapter
->hw
;
1536 /* hardware has been reset, we need to reload some things */
1537 igb_configure(adapter
);
1539 clear_bit(__IGB_DOWN
, &adapter
->state
);
1541 for (i
= 0; i
< adapter
->num_q_vectors
; i
++)
1542 napi_enable(&(adapter
->q_vector
[i
]->napi
));
1544 if (adapter
->msix_entries
)
1545 igb_configure_msix(adapter
);
1547 igb_assign_vector(adapter
->q_vector
[0], 0);
1549 /* Clear any pending interrupts. */
1551 igb_irq_enable(adapter
);
1553 /* notify VFs that reset has been completed */
1554 if (adapter
->vfs_allocated_count
) {
1555 u32 reg_data
= rd32(E1000_CTRL_EXT
);
1556 reg_data
|= E1000_CTRL_EXT_PFRSTD
;
1557 wr32(E1000_CTRL_EXT
, reg_data
);
1560 netif_tx_start_all_queues(adapter
->netdev
);
1562 /* start the watchdog. */
1563 hw
->mac
.get_link_status
= 1;
1564 schedule_work(&adapter
->watchdog_task
);
1569 void igb_down(struct igb_adapter
*adapter
)
1571 struct net_device
*netdev
= adapter
->netdev
;
1572 struct e1000_hw
*hw
= &adapter
->hw
;
1576 /* signal that we're down so the interrupt handler does not
1577 * reschedule our watchdog timer */
1578 set_bit(__IGB_DOWN
, &adapter
->state
);
1580 /* disable receives in the hardware */
1581 rctl
= rd32(E1000_RCTL
);
1582 wr32(E1000_RCTL
, rctl
& ~E1000_RCTL_EN
);
1583 /* flush and sleep below */
1585 netif_tx_stop_all_queues(netdev
);
1587 /* disable transmits in the hardware */
1588 tctl
= rd32(E1000_TCTL
);
1589 tctl
&= ~E1000_TCTL_EN
;
1590 wr32(E1000_TCTL
, tctl
);
1591 /* flush both disables and wait for them to finish */
1595 for (i
= 0; i
< adapter
->num_q_vectors
; i
++)
1596 napi_disable(&(adapter
->q_vector
[i
]->napi
));
1598 igb_irq_disable(adapter
);
1600 del_timer_sync(&adapter
->watchdog_timer
);
1601 del_timer_sync(&adapter
->phy_info_timer
);
1603 netif_carrier_off(netdev
);
1605 /* record the stats before reset*/
1606 spin_lock(&adapter
->stats64_lock
);
1607 igb_update_stats(adapter
, &adapter
->stats64
);
1608 spin_unlock(&adapter
->stats64_lock
);
1610 adapter
->link_speed
= 0;
1611 adapter
->link_duplex
= 0;
1613 if (!pci_channel_offline(adapter
->pdev
))
1615 igb_clean_all_tx_rings(adapter
);
1616 igb_clean_all_rx_rings(adapter
);
1617 #ifdef CONFIG_IGB_DCA
1619 /* since we reset the hardware DCA settings were cleared */
1620 igb_setup_dca(adapter
);
1624 void igb_reinit_locked(struct igb_adapter
*adapter
)
1626 WARN_ON(in_interrupt());
1627 while (test_and_set_bit(__IGB_RESETTING
, &adapter
->state
))
1631 clear_bit(__IGB_RESETTING
, &adapter
->state
);
1634 void igb_reset(struct igb_adapter
*adapter
)
1636 struct pci_dev
*pdev
= adapter
->pdev
;
1637 struct e1000_hw
*hw
= &adapter
->hw
;
1638 struct e1000_mac_info
*mac
= &hw
->mac
;
1639 struct e1000_fc_info
*fc
= &hw
->fc
;
1640 u32 pba
= 0, tx_space
, min_tx_space
, min_rx_space
;
1643 /* Repartition Pba for greater than 9k mtu
1644 * To take effect CTRL.RST is required.
1646 switch (mac
->type
) {
1649 pba
= rd32(E1000_RXPBS
);
1650 pba
= igb_rxpbs_adjust_82580(pba
);
1653 pba
= rd32(E1000_RXPBS
);
1654 pba
&= E1000_RXPBS_SIZE_MASK_82576
;
1658 pba
= E1000_PBA_34K
;
1662 if ((adapter
->max_frame_size
> ETH_FRAME_LEN
+ ETH_FCS_LEN
) &&
1663 (mac
->type
< e1000_82576
)) {
1664 /* adjust PBA for jumbo frames */
1665 wr32(E1000_PBA
, pba
);
1667 /* To maintain wire speed transmits, the Tx FIFO should be
1668 * large enough to accommodate two full transmit packets,
1669 * rounded up to the next 1KB and expressed in KB. Likewise,
1670 * the Rx FIFO should be large enough to accommodate at least
1671 * one full receive packet and is similarly rounded up and
1672 * expressed in KB. */
1673 pba
= rd32(E1000_PBA
);
1674 /* upper 16 bits has Tx packet buffer allocation size in KB */
1675 tx_space
= pba
>> 16;
1676 /* lower 16 bits has Rx packet buffer allocation size in KB */
1678 /* the tx fifo also stores 16 bytes of information about the tx
1679 * but don't include ethernet FCS because hardware appends it */
1680 min_tx_space
= (adapter
->max_frame_size
+
1681 sizeof(union e1000_adv_tx_desc
) -
1683 min_tx_space
= ALIGN(min_tx_space
, 1024);
1684 min_tx_space
>>= 10;
1685 /* software strips receive CRC, so leave room for it */
1686 min_rx_space
= adapter
->max_frame_size
;
1687 min_rx_space
= ALIGN(min_rx_space
, 1024);
1688 min_rx_space
>>= 10;
1690 /* If current Tx allocation is less than the min Tx FIFO size,
1691 * and the min Tx FIFO size is less than the current Rx FIFO
1692 * allocation, take space away from current Rx allocation */
1693 if (tx_space
< min_tx_space
&&
1694 ((min_tx_space
- tx_space
) < pba
)) {
1695 pba
= pba
- (min_tx_space
- tx_space
);
1697 /* if short on rx space, rx wins and must trump tx
1699 if (pba
< min_rx_space
)
1702 wr32(E1000_PBA
, pba
);
1705 /* flow control settings */
1706 /* The high water mark must be low enough to fit one full frame
1707 * (or the size used for early receive) above it in the Rx FIFO.
1708 * Set it to the lower of:
1709 * - 90% of the Rx FIFO size, or
1710 * - the full Rx FIFO size minus one full frame */
1711 hwm
= min(((pba
<< 10) * 9 / 10),
1712 ((pba
<< 10) - 2 * adapter
->max_frame_size
));
1714 fc
->high_water
= hwm
& 0xFFF0; /* 16-byte granularity */
1715 fc
->low_water
= fc
->high_water
- 16;
1716 fc
->pause_time
= 0xFFFF;
1718 fc
->current_mode
= fc
->requested_mode
;
1720 /* disable receive for all VFs and wait one second */
1721 if (adapter
->vfs_allocated_count
) {
1723 for (i
= 0 ; i
< adapter
->vfs_allocated_count
; i
++)
1724 adapter
->vf_data
[i
].flags
&= IGB_VF_FLAG_PF_SET_MAC
;
1726 /* ping all the active vfs to let them know we are going down */
1727 igb_ping_all_vfs(adapter
);
1729 /* disable transmits and receives */
1730 wr32(E1000_VFRE
, 0);
1731 wr32(E1000_VFTE
, 0);
1734 /* Allow time for pending master requests to run */
1735 hw
->mac
.ops
.reset_hw(hw
);
1738 if (hw
->mac
.ops
.init_hw(hw
))
1739 dev_err(&pdev
->dev
, "Hardware Error\n");
1741 igb_init_dmac(adapter
, pba
);
1742 if (!netif_running(adapter
->netdev
))
1743 igb_power_down_link(adapter
);
1745 igb_update_mng_vlan(adapter
);
1747 /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1748 wr32(E1000_VET
, ETHERNET_IEEE_VLAN_TYPE
);
1750 igb_get_phy_info(hw
);
1753 static netdev_features_t
igb_fix_features(struct net_device
*netdev
,
1754 netdev_features_t features
)
1757 * Since there is no support for separate rx/tx vlan accel
1758 * enable/disable make sure tx flag is always in same state as rx.
1760 if (features
& NETIF_F_HW_VLAN_RX
)
1761 features
|= NETIF_F_HW_VLAN_TX
;
1763 features
&= ~NETIF_F_HW_VLAN_TX
;
1768 static int igb_set_features(struct net_device
*netdev
,
1769 netdev_features_t features
)
1771 netdev_features_t changed
= netdev
->features
^ features
;
1773 if (changed
& NETIF_F_HW_VLAN_RX
)
1774 igb_vlan_mode(netdev
, features
);
1779 static const struct net_device_ops igb_netdev_ops
= {
1780 .ndo_open
= igb_open
,
1781 .ndo_stop
= igb_close
,
1782 .ndo_start_xmit
= igb_xmit_frame
,
1783 .ndo_get_stats64
= igb_get_stats64
,
1784 .ndo_set_rx_mode
= igb_set_rx_mode
,
1785 .ndo_set_mac_address
= igb_set_mac
,
1786 .ndo_change_mtu
= igb_change_mtu
,
1787 .ndo_do_ioctl
= igb_ioctl
,
1788 .ndo_tx_timeout
= igb_tx_timeout
,
1789 .ndo_validate_addr
= eth_validate_addr
,
1790 .ndo_vlan_rx_add_vid
= igb_vlan_rx_add_vid
,
1791 .ndo_vlan_rx_kill_vid
= igb_vlan_rx_kill_vid
,
1792 .ndo_set_vf_mac
= igb_ndo_set_vf_mac
,
1793 .ndo_set_vf_vlan
= igb_ndo_set_vf_vlan
,
1794 .ndo_set_vf_tx_rate
= igb_ndo_set_vf_bw
,
1795 .ndo_get_vf_config
= igb_ndo_get_vf_config
,
1796 #ifdef CONFIG_NET_POLL_CONTROLLER
1797 .ndo_poll_controller
= igb_netpoll
,
1799 .ndo_fix_features
= igb_fix_features
,
1800 .ndo_set_features
= igb_set_features
,
1804 * igb_probe - Device Initialization Routine
1805 * @pdev: PCI device information struct
1806 * @ent: entry in igb_pci_tbl
1808 * Returns 0 on success, negative on failure
1810 * igb_probe initializes an adapter identified by a pci_dev structure.
1811 * The OS initialization, configuring of the adapter private structure,
1812 * and a hardware reset occur.
1814 static int __devinit
igb_probe(struct pci_dev
*pdev
,
1815 const struct pci_device_id
*ent
)
1817 struct net_device
*netdev
;
1818 struct igb_adapter
*adapter
;
1819 struct e1000_hw
*hw
;
1820 u16 eeprom_data
= 0;
1822 static int global_quad_port_a
; /* global quad port a indication */
1823 const struct e1000_info
*ei
= igb_info_tbl
[ent
->driver_data
];
1824 unsigned long mmio_start
, mmio_len
;
1825 int err
, pci_using_dac
;
1826 u16 eeprom_apme_mask
= IGB_EEPROM_APME
;
1827 u8 part_str
[E1000_PBANUM_LENGTH
];
1829 /* Catch broken hardware that put the wrong VF device ID in
1830 * the PCIe SR-IOV capability.
1832 if (pdev
->is_virtfn
) {
1833 WARN(1, KERN_ERR
"%s (%hx:%hx) should not be a VF!\n",
1834 pci_name(pdev
), pdev
->vendor
, pdev
->device
);
1838 err
= pci_enable_device_mem(pdev
);
1843 err
= dma_set_mask(&pdev
->dev
, DMA_BIT_MASK(64));
1845 err
= dma_set_coherent_mask(&pdev
->dev
, DMA_BIT_MASK(64));
1849 err
= dma_set_mask(&pdev
->dev
, DMA_BIT_MASK(32));
1851 err
= dma_set_coherent_mask(&pdev
->dev
, DMA_BIT_MASK(32));
1853 dev_err(&pdev
->dev
, "No usable DMA "
1854 "configuration, aborting\n");
1860 err
= pci_request_selected_regions(pdev
, pci_select_bars(pdev
,
1866 pci_enable_pcie_error_reporting(pdev
);
1868 pci_set_master(pdev
);
1869 pci_save_state(pdev
);
1872 netdev
= alloc_etherdev_mq(sizeof(struct igb_adapter
),
1875 goto err_alloc_etherdev
;
1877 SET_NETDEV_DEV(netdev
, &pdev
->dev
);
1879 pci_set_drvdata(pdev
, netdev
);
1880 adapter
= netdev_priv(netdev
);
1881 adapter
->netdev
= netdev
;
1882 adapter
->pdev
= pdev
;
1885 adapter
->msg_enable
= NETIF_MSG_DRV
| NETIF_MSG_PROBE
;
1887 mmio_start
= pci_resource_start(pdev
, 0);
1888 mmio_len
= pci_resource_len(pdev
, 0);
1891 hw
->hw_addr
= ioremap(mmio_start
, mmio_len
);
1895 netdev
->netdev_ops
= &igb_netdev_ops
;
1896 igb_set_ethtool_ops(netdev
);
1897 netdev
->watchdog_timeo
= 5 * HZ
;
1899 strncpy(netdev
->name
, pci_name(pdev
), sizeof(netdev
->name
) - 1);
1901 netdev
->mem_start
= mmio_start
;
1902 netdev
->mem_end
= mmio_start
+ mmio_len
;
1904 /* PCI config space info */
1905 hw
->vendor_id
= pdev
->vendor
;
1906 hw
->device_id
= pdev
->device
;
1907 hw
->revision_id
= pdev
->revision
;
1908 hw
->subsystem_vendor_id
= pdev
->subsystem_vendor
;
1909 hw
->subsystem_device_id
= pdev
->subsystem_device
;
1911 /* Copy the default MAC, PHY and NVM function pointers */
1912 memcpy(&hw
->mac
.ops
, ei
->mac_ops
, sizeof(hw
->mac
.ops
));
1913 memcpy(&hw
->phy
.ops
, ei
->phy_ops
, sizeof(hw
->phy
.ops
));
1914 memcpy(&hw
->nvm
.ops
, ei
->nvm_ops
, sizeof(hw
->nvm
.ops
));
1915 /* Initialize skew-specific constants */
1916 err
= ei
->get_invariants(hw
);
1920 /* setup the private structure */
1921 err
= igb_sw_init(adapter
);
1925 igb_get_bus_info_pcie(hw
);
1927 hw
->phy
.autoneg_wait_to_complete
= false;
1929 /* Copper options */
1930 if (hw
->phy
.media_type
== e1000_media_type_copper
) {
1931 hw
->phy
.mdix
= AUTO_ALL_MODES
;
1932 hw
->phy
.disable_polarity_correction
= false;
1933 hw
->phy
.ms_type
= e1000_ms_hw_default
;
1936 if (igb_check_reset_block(hw
))
1937 dev_info(&pdev
->dev
,
1938 "PHY reset is blocked due to SOL/IDER session.\n");
1941 * features is initialized to 0 in allocation, it might have bits
1942 * set by igb_sw_init so we should use an or instead of an
1945 netdev
->features
|= NETIF_F_SG
|
1952 NETIF_F_HW_VLAN_RX
|
1955 /* copy netdev features into list of user selectable features */
1956 netdev
->hw_features
|= netdev
->features
;
1958 /* set this bit last since it cannot be part of hw_features */
1959 netdev
->features
|= NETIF_F_HW_VLAN_FILTER
;
1961 netdev
->vlan_features
|= NETIF_F_TSO
|
1967 if (pci_using_dac
) {
1968 netdev
->features
|= NETIF_F_HIGHDMA
;
1969 netdev
->vlan_features
|= NETIF_F_HIGHDMA
;
1972 if (hw
->mac
.type
>= e1000_82576
) {
1973 netdev
->hw_features
|= NETIF_F_SCTP_CSUM
;
1974 netdev
->features
|= NETIF_F_SCTP_CSUM
;
1977 netdev
->priv_flags
|= IFF_UNICAST_FLT
;
1979 adapter
->en_mng_pt
= igb_enable_mng_pass_thru(hw
);
1981 /* before reading the NVM, reset the controller to put the device in a
1982 * known good starting state */
1983 hw
->mac
.ops
.reset_hw(hw
);
1985 /* make sure the NVM is good */
1986 if (hw
->nvm
.ops
.validate(hw
) < 0) {
1987 dev_err(&pdev
->dev
, "The NVM Checksum Is Not Valid\n");
1992 /* copy the MAC address out of the NVM */
1993 if (hw
->mac
.ops
.read_mac_addr(hw
))
1994 dev_err(&pdev
->dev
, "NVM Read Error\n");
1996 memcpy(netdev
->dev_addr
, hw
->mac
.addr
, netdev
->addr_len
);
1997 memcpy(netdev
->perm_addr
, hw
->mac
.addr
, netdev
->addr_len
);
1999 if (!is_valid_ether_addr(netdev
->perm_addr
)) {
2000 dev_err(&pdev
->dev
, "Invalid MAC Address\n");
2005 setup_timer(&adapter
->watchdog_timer
, igb_watchdog
,
2006 (unsigned long) adapter
);
2007 setup_timer(&adapter
->phy_info_timer
, igb_update_phy_info
,
2008 (unsigned long) adapter
);
2010 INIT_WORK(&adapter
->reset_task
, igb_reset_task
);
2011 INIT_WORK(&adapter
->watchdog_task
, igb_watchdog_task
);
2013 /* Initialize link properties that are user-changeable */
2014 adapter
->fc_autoneg
= true;
2015 hw
->mac
.autoneg
= true;
2016 hw
->phy
.autoneg_advertised
= 0x2f;
2018 hw
->fc
.requested_mode
= e1000_fc_default
;
2019 hw
->fc
.current_mode
= e1000_fc_default
;
2021 igb_validate_mdi_setting(hw
);
2023 /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2024 * enable the ACPI Magic Packet filter
2027 if (hw
->bus
.func
== 0)
2028 hw
->nvm
.ops
.read(hw
, NVM_INIT_CONTROL3_PORT_A
, 1, &eeprom_data
);
2029 else if (hw
->mac
.type
>= e1000_82580
)
2030 hw
->nvm
.ops
.read(hw
, NVM_INIT_CONTROL3_PORT_A
+
2031 NVM_82580_LAN_FUNC_OFFSET(hw
->bus
.func
), 1,
2033 else if (hw
->bus
.func
== 1)
2034 hw
->nvm
.ops
.read(hw
, NVM_INIT_CONTROL3_PORT_B
, 1, &eeprom_data
);
2036 if (eeprom_data
& eeprom_apme_mask
)
2037 adapter
->eeprom_wol
|= E1000_WUFC_MAG
;
2039 /* now that we have the eeprom settings, apply the special cases where
2040 * the eeprom may be wrong or the board simply won't support wake on
2041 * lan on a particular port */
2042 switch (pdev
->device
) {
2043 case E1000_DEV_ID_82575GB_QUAD_COPPER
:
2044 adapter
->eeprom_wol
= 0;
2046 case E1000_DEV_ID_82575EB_FIBER_SERDES
:
2047 case E1000_DEV_ID_82576_FIBER
:
2048 case E1000_DEV_ID_82576_SERDES
:
2049 /* Wake events only supported on port A for dual fiber
2050 * regardless of eeprom setting */
2051 if (rd32(E1000_STATUS
) & E1000_STATUS_FUNC_1
)
2052 adapter
->eeprom_wol
= 0;
2054 case E1000_DEV_ID_82576_QUAD_COPPER
:
2055 case E1000_DEV_ID_82576_QUAD_COPPER_ET2
:
2056 /* if quad port adapter, disable WoL on all but port A */
2057 if (global_quad_port_a
!= 0)
2058 adapter
->eeprom_wol
= 0;
2060 adapter
->flags
|= IGB_FLAG_QUAD_PORT_A
;
2061 /* Reset for multiple quad port adapters */
2062 if (++global_quad_port_a
== 4)
2063 global_quad_port_a
= 0;
2067 /* initialize the wol settings based on the eeprom settings */
2068 adapter
->wol
= adapter
->eeprom_wol
;
2069 device_set_wakeup_enable(&adapter
->pdev
->dev
, adapter
->wol
);
2071 /* reset the hardware with the new settings */
2074 /* let the f/w know that the h/w is now under the control of the
2076 igb_get_hw_control(adapter
);
2078 strcpy(netdev
->name
, "eth%d");
2079 err
= register_netdev(netdev
);
2083 /* carrier off reporting is important to ethtool even BEFORE open */
2084 netif_carrier_off(netdev
);
2086 #ifdef CONFIG_IGB_DCA
2087 if (dca_add_requester(&pdev
->dev
) == 0) {
2088 adapter
->flags
|= IGB_FLAG_DCA_ENABLED
;
2089 dev_info(&pdev
->dev
, "DCA enabled\n");
2090 igb_setup_dca(adapter
);
2094 /* do hw tstamp init after resetting */
2095 igb_init_hw_timer(adapter
);
2097 dev_info(&pdev
->dev
, "Intel(R) Gigabit Ethernet Network Connection\n");
2098 /* print bus type/speed/width info */
2099 dev_info(&pdev
->dev
, "%s: (PCIe:%s:%s) %pM\n",
2101 ((hw
->bus
.speed
== e1000_bus_speed_2500
) ? "2.5Gb/s" :
2102 (hw
->bus
.speed
== e1000_bus_speed_5000
) ? "5.0Gb/s" :
2104 ((hw
->bus
.width
== e1000_bus_width_pcie_x4
) ? "Width x4" :
2105 (hw
->bus
.width
== e1000_bus_width_pcie_x2
) ? "Width x2" :
2106 (hw
->bus
.width
== e1000_bus_width_pcie_x1
) ? "Width x1" :
2110 ret_val
= igb_read_part_string(hw
, part_str
, E1000_PBANUM_LENGTH
);
2112 strcpy(part_str
, "Unknown");
2113 dev_info(&pdev
->dev
, "%s: PBA No: %s\n", netdev
->name
, part_str
);
2114 dev_info(&pdev
->dev
,
2115 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2116 adapter
->msix_entries
? "MSI-X" :
2117 (adapter
->flags
& IGB_FLAG_HAS_MSI
) ? "MSI" : "legacy",
2118 adapter
->num_rx_queues
, adapter
->num_tx_queues
);
2119 switch (hw
->mac
.type
) {
2121 igb_set_eee_i350(hw
);
2127 pm_runtime_put_noidle(&pdev
->dev
);
2131 igb_release_hw_control(adapter
);
2133 if (!igb_check_reset_block(hw
))
2136 if (hw
->flash_address
)
2137 iounmap(hw
->flash_address
);
2139 igb_clear_interrupt_scheme(adapter
);
2140 iounmap(hw
->hw_addr
);
2142 free_netdev(netdev
);
2144 pci_release_selected_regions(pdev
,
2145 pci_select_bars(pdev
, IORESOURCE_MEM
));
2148 pci_disable_device(pdev
);
2153 * igb_remove - Device Removal Routine
2154 * @pdev: PCI device information struct
2156 * igb_remove is called by the PCI subsystem to alert the driver
2157 * that it should release a PCI device. The could be caused by a
2158 * Hot-Plug event, or because the driver is going to be removed from
2161 static void __devexit
igb_remove(struct pci_dev
*pdev
)
2163 struct net_device
*netdev
= pci_get_drvdata(pdev
);
2164 struct igb_adapter
*adapter
= netdev_priv(netdev
);
2165 struct e1000_hw
*hw
= &adapter
->hw
;
2167 pm_runtime_get_noresume(&pdev
->dev
);
2170 * The watchdog timer may be rescheduled, so explicitly
2171 * disable watchdog from being rescheduled.
2173 set_bit(__IGB_DOWN
, &adapter
->state
);
2174 del_timer_sync(&adapter
->watchdog_timer
);
2175 del_timer_sync(&adapter
->phy_info_timer
);
2177 cancel_work_sync(&adapter
->reset_task
);
2178 cancel_work_sync(&adapter
->watchdog_task
);
2180 #ifdef CONFIG_IGB_DCA
2181 if (adapter
->flags
& IGB_FLAG_DCA_ENABLED
) {
2182 dev_info(&pdev
->dev
, "DCA disabled\n");
2183 dca_remove_requester(&pdev
->dev
);
2184 adapter
->flags
&= ~IGB_FLAG_DCA_ENABLED
;
2185 wr32(E1000_DCA_CTRL
, E1000_DCA_CTRL_DCA_MODE_DISABLE
);
2189 /* Release control of h/w to f/w. If f/w is AMT enabled, this
2190 * would have already happened in close and is redundant. */
2191 igb_release_hw_control(adapter
);
2193 unregister_netdev(netdev
);
2195 igb_clear_interrupt_scheme(adapter
);
2197 #ifdef CONFIG_PCI_IOV
2198 /* reclaim resources allocated to VFs */
2199 if (adapter
->vf_data
) {
2200 /* disable iov and allow time for transactions to clear */
2201 if (!igb_check_vf_assignment(adapter
)) {
2202 pci_disable_sriov(pdev
);
2205 dev_info(&pdev
->dev
, "VF(s) assigned to guests!\n");
2208 kfree(adapter
->vf_data
);
2209 adapter
->vf_data
= NULL
;
2210 wr32(E1000_IOVCTL
, E1000_IOVCTL_REUSE_VFQ
);
2213 dev_info(&pdev
->dev
, "IOV Disabled\n");
2217 iounmap(hw
->hw_addr
);
2218 if (hw
->flash_address
)
2219 iounmap(hw
->flash_address
);
2220 pci_release_selected_regions(pdev
,
2221 pci_select_bars(pdev
, IORESOURCE_MEM
));
2223 kfree(adapter
->shadow_vfta
);
2224 free_netdev(netdev
);
2226 pci_disable_pcie_error_reporting(pdev
);
2228 pci_disable_device(pdev
);
2232 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2233 * @adapter: board private structure to initialize
2235 * This function initializes the vf specific data storage and then attempts to
2236 * allocate the VFs. The reason for ordering it this way is because it is much
2237 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2238 * the memory for the VFs.
2240 static void __devinit
igb_probe_vfs(struct igb_adapter
* adapter
)
2242 #ifdef CONFIG_PCI_IOV
2243 struct pci_dev
*pdev
= adapter
->pdev
;
2244 int old_vfs
= igb_find_enabled_vfs(adapter
);
2248 dev_info(&pdev
->dev
, "%d pre-allocated VFs found - override "
2249 "max_vfs setting of %d\n", old_vfs
, max_vfs
);
2250 adapter
->vfs_allocated_count
= old_vfs
;
2253 if (!adapter
->vfs_allocated_count
)
2256 adapter
->vf_data
= kcalloc(adapter
->vfs_allocated_count
,
2257 sizeof(struct vf_data_storage
), GFP_KERNEL
);
2258 /* if allocation failed then we do not support SR-IOV */
2259 if (!adapter
->vf_data
) {
2260 adapter
->vfs_allocated_count
= 0;
2261 dev_err(&pdev
->dev
, "Unable to allocate memory for VF "
2267 if (pci_enable_sriov(pdev
, adapter
->vfs_allocated_count
))
2270 dev_info(&pdev
->dev
, "%d VFs allocated\n",
2271 adapter
->vfs_allocated_count
);
2272 for (i
= 0; i
< adapter
->vfs_allocated_count
; i
++)
2273 igb_vf_configure(adapter
, i
);
2275 /* DMA Coalescing is not supported in IOV mode. */
2276 adapter
->flags
&= ~IGB_FLAG_DMAC
;
2279 kfree(adapter
->vf_data
);
2280 adapter
->vf_data
= NULL
;
2281 adapter
->vfs_allocated_count
= 0;
2284 #endif /* CONFIG_PCI_IOV */
2288 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2289 * @adapter: board private structure to initialize
2291 * igb_init_hw_timer initializes the function pointer and values for the hw
2292 * timer found in hardware.
2294 static void igb_init_hw_timer(struct igb_adapter
*adapter
)
2296 struct e1000_hw
*hw
= &adapter
->hw
;
2298 switch (hw
->mac
.type
) {
2301 memset(&adapter
->cycles
, 0, sizeof(adapter
->cycles
));
2302 adapter
->cycles
.read
= igb_read_clock
;
2303 adapter
->cycles
.mask
= CLOCKSOURCE_MASK(64);
2304 adapter
->cycles
.mult
= 1;
2306 * The 82580 timesync updates the system timer every 8ns by 8ns
2307 * and the value cannot be shifted. Instead we need to shift
2308 * the registers to generate a 64bit timer value. As a result
2309 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2310 * 24 in order to generate a larger value for synchronization.
2312 adapter
->cycles
.shift
= IGB_82580_TSYNC_SHIFT
;
2313 /* disable system timer temporarily by setting bit 31 */
2314 wr32(E1000_TSAUXC
, 0x80000000);
2317 /* Set registers so that rollover occurs soon to test this. */
2318 wr32(E1000_SYSTIMR
, 0x00000000);
2319 wr32(E1000_SYSTIML
, 0x80000000);
2320 wr32(E1000_SYSTIMH
, 0x000000FF);
2323 /* enable system timer by clearing bit 31 */
2324 wr32(E1000_TSAUXC
, 0x0);
2327 timecounter_init(&adapter
->clock
,
2329 ktime_to_ns(ktime_get_real()));
2331 * Synchronize our NIC clock against system wall clock. NIC
2332 * time stamp reading requires ~3us per sample, each sample
2333 * was pretty stable even under load => only require 10
2334 * samples for each offset comparison.
2336 memset(&adapter
->compare
, 0, sizeof(adapter
->compare
));
2337 adapter
->compare
.source
= &adapter
->clock
;
2338 adapter
->compare
.target
= ktime_get_real
;
2339 adapter
->compare
.num_samples
= 10;
2340 timecompare_update(&adapter
->compare
, 0);
2344 * Initialize hardware timer: we keep it running just in case
2345 * that some program needs it later on.
2347 memset(&adapter
->cycles
, 0, sizeof(adapter
->cycles
));
2348 adapter
->cycles
.read
= igb_read_clock
;
2349 adapter
->cycles
.mask
= CLOCKSOURCE_MASK(64);
2350 adapter
->cycles
.mult
= 1;
2352 * Scale the NIC clock cycle by a large factor so that
2353 * relatively small clock corrections can be added or
2354 * subtracted at each clock tick. The drawbacks of a large
2355 * factor are a) that the clock register overflows more quickly
2356 * (not such a big deal) and b) that the increment per tick has
2357 * to fit into 24 bits. As a result we need to use a shift of
2358 * 19 so we can fit a value of 16 into the TIMINCA register.
2360 adapter
->cycles
.shift
= IGB_82576_TSYNC_SHIFT
;
2362 (1 << E1000_TIMINCA_16NS_SHIFT
) |
2363 (16 << IGB_82576_TSYNC_SHIFT
));
2365 /* Set registers so that rollover occurs soon to test this. */
2366 wr32(E1000_SYSTIML
, 0x00000000);
2367 wr32(E1000_SYSTIMH
, 0xFF800000);
2370 timecounter_init(&adapter
->clock
,
2372 ktime_to_ns(ktime_get_real()));
2374 * Synchronize our NIC clock against system wall clock. NIC
2375 * time stamp reading requires ~3us per sample, each sample
2376 * was pretty stable even under load => only require 10
2377 * samples for each offset comparison.
2379 memset(&adapter
->compare
, 0, sizeof(adapter
->compare
));
2380 adapter
->compare
.source
= &adapter
->clock
;
2381 adapter
->compare
.target
= ktime_get_real
;
2382 adapter
->compare
.num_samples
= 10;
2383 timecompare_update(&adapter
->compare
, 0);
2386 /* 82575 does not support timesync */
2394 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2395 * @adapter: board private structure to initialize
2397 * igb_sw_init initializes the Adapter private data structure.
2398 * Fields are initialized based on PCI device information and
2399 * OS network device settings (MTU size).
2401 static int __devinit
igb_sw_init(struct igb_adapter
*adapter
)
2403 struct e1000_hw
*hw
= &adapter
->hw
;
2404 struct net_device
*netdev
= adapter
->netdev
;
2405 struct pci_dev
*pdev
= adapter
->pdev
;
2407 pci_read_config_word(pdev
, PCI_COMMAND
, &hw
->bus
.pci_cmd_word
);
2409 /* set default ring sizes */
2410 adapter
->tx_ring_count
= IGB_DEFAULT_TXD
;
2411 adapter
->rx_ring_count
= IGB_DEFAULT_RXD
;
2413 /* set default ITR values */
2414 adapter
->rx_itr_setting
= IGB_DEFAULT_ITR
;
2415 adapter
->tx_itr_setting
= IGB_DEFAULT_ITR
;
2417 /* set default work limits */
2418 adapter
->tx_work_limit
= IGB_DEFAULT_TX_WORK
;
2420 adapter
->max_frame_size
= netdev
->mtu
+ ETH_HLEN
+ ETH_FCS_LEN
+
2422 adapter
->min_frame_size
= ETH_ZLEN
+ ETH_FCS_LEN
;
2426 spin_lock_init(&adapter
->stats64_lock
);
2427 #ifdef CONFIG_PCI_IOV
2428 switch (hw
->mac
.type
) {
2432 dev_warn(&pdev
->dev
,
2433 "Maximum of 7 VFs per PF, using max\n");
2434 adapter
->vfs_allocated_count
= 7;
2436 adapter
->vfs_allocated_count
= max_vfs
;
2441 #endif /* CONFIG_PCI_IOV */
2442 adapter
->rss_queues
= min_t(u32
, IGB_MAX_RX_QUEUES
, num_online_cpus());
2443 /* i350 cannot do RSS and SR-IOV at the same time */
2444 if (hw
->mac
.type
== e1000_i350
&& adapter
->vfs_allocated_count
)
2445 adapter
->rss_queues
= 1;
2448 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2449 * then we should combine the queues into a queue pair in order to
2450 * conserve interrupts due to limited supply
2452 if ((adapter
->rss_queues
> 4) ||
2453 ((adapter
->rss_queues
> 1) && (adapter
->vfs_allocated_count
> 6)))
2454 adapter
->flags
|= IGB_FLAG_QUEUE_PAIRS
;
2456 /* Setup and initialize a copy of the hw vlan table array */
2457 adapter
->shadow_vfta
= kzalloc(sizeof(u32
) *
2458 E1000_VLAN_FILTER_TBL_SIZE
,
2461 /* This call may decrease the number of queues */
2462 if (igb_init_interrupt_scheme(adapter
)) {
2463 dev_err(&pdev
->dev
, "Unable to allocate memory for queues\n");
2467 igb_probe_vfs(adapter
);
2469 /* Explicitly disable IRQ since the NIC can be in any state. */
2470 igb_irq_disable(adapter
);
2472 if (hw
->mac
.type
== e1000_i350
)
2473 adapter
->flags
&= ~IGB_FLAG_DMAC
;
2475 set_bit(__IGB_DOWN
, &adapter
->state
);
2480 * igb_open - Called when a network interface is made active
2481 * @netdev: network interface device structure
2483 * Returns 0 on success, negative value on failure
2485 * The open entry point is called when a network interface is made
2486 * active by the system (IFF_UP). At this point all resources needed
2487 * for transmit and receive operations are allocated, the interrupt
2488 * handler is registered with the OS, the watchdog timer is started,
2489 * and the stack is notified that the interface is ready.
2491 static int __igb_open(struct net_device
*netdev
, bool resuming
)
2493 struct igb_adapter
*adapter
= netdev_priv(netdev
);
2494 struct e1000_hw
*hw
= &adapter
->hw
;
2495 struct pci_dev
*pdev
= adapter
->pdev
;
2499 /* disallow open during test */
2500 if (test_bit(__IGB_TESTING
, &adapter
->state
)) {
2506 pm_runtime_get_sync(&pdev
->dev
);
2508 netif_carrier_off(netdev
);
2510 /* allocate transmit descriptors */
2511 err
= igb_setup_all_tx_resources(adapter
);
2515 /* allocate receive descriptors */
2516 err
= igb_setup_all_rx_resources(adapter
);
2520 igb_power_up_link(adapter
);
2522 /* before we allocate an interrupt, we must be ready to handle it.
2523 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2524 * as soon as we call pci_request_irq, so we have to setup our
2525 * clean_rx handler before we do so. */
2526 igb_configure(adapter
);
2528 err
= igb_request_irq(adapter
);
2532 /* From here on the code is the same as igb_up() */
2533 clear_bit(__IGB_DOWN
, &adapter
->state
);
2535 for (i
= 0; i
< adapter
->num_q_vectors
; i
++)
2536 napi_enable(&(adapter
->q_vector
[i
]->napi
));
2538 /* Clear any pending interrupts. */
2541 igb_irq_enable(adapter
);
2543 /* notify VFs that reset has been completed */
2544 if (adapter
->vfs_allocated_count
) {
2545 u32 reg_data
= rd32(E1000_CTRL_EXT
);
2546 reg_data
|= E1000_CTRL_EXT_PFRSTD
;
2547 wr32(E1000_CTRL_EXT
, reg_data
);
2550 netif_tx_start_all_queues(netdev
);
2553 pm_runtime_put(&pdev
->dev
);
2555 /* start the watchdog. */
2556 hw
->mac
.get_link_status
= 1;
2557 schedule_work(&adapter
->watchdog_task
);
2562 igb_release_hw_control(adapter
);
2563 igb_power_down_link(adapter
);
2564 igb_free_all_rx_resources(adapter
);
2566 igb_free_all_tx_resources(adapter
);
2570 pm_runtime_put(&pdev
->dev
);
2575 static int igb_open(struct net_device
*netdev
)
2577 return __igb_open(netdev
, false);
2581 * igb_close - Disables a network interface
2582 * @netdev: network interface device structure
2584 * Returns 0, this is not allowed to fail
2586 * The close entry point is called when an interface is de-activated
2587 * by the OS. The hardware is still under the driver's control, but
2588 * needs to be disabled. A global MAC reset is issued to stop the
2589 * hardware, and all transmit and receive resources are freed.
2591 static int __igb_close(struct net_device
*netdev
, bool suspending
)
2593 struct igb_adapter
*adapter
= netdev_priv(netdev
);
2594 struct pci_dev
*pdev
= adapter
->pdev
;
2596 WARN_ON(test_bit(__IGB_RESETTING
, &adapter
->state
));
2599 pm_runtime_get_sync(&pdev
->dev
);
2602 igb_free_irq(adapter
);
2604 igb_free_all_tx_resources(adapter
);
2605 igb_free_all_rx_resources(adapter
);
2608 pm_runtime_put_sync(&pdev
->dev
);
2612 static int igb_close(struct net_device
*netdev
)
2614 return __igb_close(netdev
, false);
2618 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2619 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2621 * Return 0 on success, negative on failure
2623 int igb_setup_tx_resources(struct igb_ring
*tx_ring
)
2625 struct device
*dev
= tx_ring
->dev
;
2626 int orig_node
= dev_to_node(dev
);
2629 size
= sizeof(struct igb_tx_buffer
) * tx_ring
->count
;
2630 tx_ring
->tx_buffer_info
= vzalloc_node(size
, tx_ring
->numa_node
);
2631 if (!tx_ring
->tx_buffer_info
)
2632 tx_ring
->tx_buffer_info
= vzalloc(size
);
2633 if (!tx_ring
->tx_buffer_info
)
2636 /* round up to nearest 4K */
2637 tx_ring
->size
= tx_ring
->count
* sizeof(union e1000_adv_tx_desc
);
2638 tx_ring
->size
= ALIGN(tx_ring
->size
, 4096);
2640 set_dev_node(dev
, tx_ring
->numa_node
);
2641 tx_ring
->desc
= dma_alloc_coherent(dev
,
2645 set_dev_node(dev
, orig_node
);
2647 tx_ring
->desc
= dma_alloc_coherent(dev
,
2655 tx_ring
->next_to_use
= 0;
2656 tx_ring
->next_to_clean
= 0;
2661 vfree(tx_ring
->tx_buffer_info
);
2663 "Unable to allocate memory for the transmit descriptor ring\n");
2668 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2669 * (Descriptors) for all queues
2670 * @adapter: board private structure
2672 * Return 0 on success, negative on failure
2674 static int igb_setup_all_tx_resources(struct igb_adapter
*adapter
)
2676 struct pci_dev
*pdev
= adapter
->pdev
;
2679 for (i
= 0; i
< adapter
->num_tx_queues
; i
++) {
2680 err
= igb_setup_tx_resources(adapter
->tx_ring
[i
]);
2683 "Allocation for Tx Queue %u failed\n", i
);
2684 for (i
--; i
>= 0; i
--)
2685 igb_free_tx_resources(adapter
->tx_ring
[i
]);
2694 * igb_setup_tctl - configure the transmit control registers
2695 * @adapter: Board private structure
2697 void igb_setup_tctl(struct igb_adapter
*adapter
)
2699 struct e1000_hw
*hw
= &adapter
->hw
;
2702 /* disable queue 0 which is enabled by default on 82575 and 82576 */
2703 wr32(E1000_TXDCTL(0), 0);
2705 /* Program the Transmit Control Register */
2706 tctl
= rd32(E1000_TCTL
);
2707 tctl
&= ~E1000_TCTL_CT
;
2708 tctl
|= E1000_TCTL_PSP
| E1000_TCTL_RTLC
|
2709 (E1000_COLLISION_THRESHOLD
<< E1000_CT_SHIFT
);
2711 igb_config_collision_dist(hw
);
2713 /* Enable transmits */
2714 tctl
|= E1000_TCTL_EN
;
2716 wr32(E1000_TCTL
, tctl
);
2720 * igb_configure_tx_ring - Configure transmit ring after Reset
2721 * @adapter: board private structure
2722 * @ring: tx ring to configure
2724 * Configure a transmit ring after a reset.
2726 void igb_configure_tx_ring(struct igb_adapter
*adapter
,
2727 struct igb_ring
*ring
)
2729 struct e1000_hw
*hw
= &adapter
->hw
;
2731 u64 tdba
= ring
->dma
;
2732 int reg_idx
= ring
->reg_idx
;
2734 /* disable the queue */
2735 wr32(E1000_TXDCTL(reg_idx
), 0);
2739 wr32(E1000_TDLEN(reg_idx
),
2740 ring
->count
* sizeof(union e1000_adv_tx_desc
));
2741 wr32(E1000_TDBAL(reg_idx
),
2742 tdba
& 0x00000000ffffffffULL
);
2743 wr32(E1000_TDBAH(reg_idx
), tdba
>> 32);
2745 ring
->tail
= hw
->hw_addr
+ E1000_TDT(reg_idx
);
2746 wr32(E1000_TDH(reg_idx
), 0);
2747 writel(0, ring
->tail
);
2749 txdctl
|= IGB_TX_PTHRESH
;
2750 txdctl
|= IGB_TX_HTHRESH
<< 8;
2751 txdctl
|= IGB_TX_WTHRESH
<< 16;
2753 txdctl
|= E1000_TXDCTL_QUEUE_ENABLE
;
2754 wr32(E1000_TXDCTL(reg_idx
), txdctl
);
2756 netdev_tx_reset_queue(txring_txq(ring
));
2760 * igb_configure_tx - Configure transmit Unit after Reset
2761 * @adapter: board private structure
2763 * Configure the Tx unit of the MAC after a reset.
2765 static void igb_configure_tx(struct igb_adapter
*adapter
)
2769 for (i
= 0; i
< adapter
->num_tx_queues
; i
++)
2770 igb_configure_tx_ring(adapter
, adapter
->tx_ring
[i
]);
2774 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2775 * @rx_ring: rx descriptor ring (for a specific queue) to setup
2777 * Returns 0 on success, negative on failure
2779 int igb_setup_rx_resources(struct igb_ring
*rx_ring
)
2781 struct device
*dev
= rx_ring
->dev
;
2782 int orig_node
= dev_to_node(dev
);
2785 size
= sizeof(struct igb_rx_buffer
) * rx_ring
->count
;
2786 rx_ring
->rx_buffer_info
= vzalloc_node(size
, rx_ring
->numa_node
);
2787 if (!rx_ring
->rx_buffer_info
)
2788 rx_ring
->rx_buffer_info
= vzalloc(size
);
2789 if (!rx_ring
->rx_buffer_info
)
2792 desc_len
= sizeof(union e1000_adv_rx_desc
);
2794 /* Round up to nearest 4K */
2795 rx_ring
->size
= rx_ring
->count
* desc_len
;
2796 rx_ring
->size
= ALIGN(rx_ring
->size
, 4096);
2798 set_dev_node(dev
, rx_ring
->numa_node
);
2799 rx_ring
->desc
= dma_alloc_coherent(dev
,
2803 set_dev_node(dev
, orig_node
);
2805 rx_ring
->desc
= dma_alloc_coherent(dev
,
2813 rx_ring
->next_to_clean
= 0;
2814 rx_ring
->next_to_use
= 0;
2819 vfree(rx_ring
->rx_buffer_info
);
2820 rx_ring
->rx_buffer_info
= NULL
;
2821 dev_err(dev
, "Unable to allocate memory for the receive descriptor"
2827 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2828 * (Descriptors) for all queues
2829 * @adapter: board private structure
2831 * Return 0 on success, negative on failure
2833 static int igb_setup_all_rx_resources(struct igb_adapter
*adapter
)
2835 struct pci_dev
*pdev
= adapter
->pdev
;
2838 for (i
= 0; i
< adapter
->num_rx_queues
; i
++) {
2839 err
= igb_setup_rx_resources(adapter
->rx_ring
[i
]);
2842 "Allocation for Rx Queue %u failed\n", i
);
2843 for (i
--; i
>= 0; i
--)
2844 igb_free_rx_resources(adapter
->rx_ring
[i
]);
2853 * igb_setup_mrqc - configure the multiple receive queue control registers
2854 * @adapter: Board private structure
2856 static void igb_setup_mrqc(struct igb_adapter
*adapter
)
2858 struct e1000_hw
*hw
= &adapter
->hw
;
2860 u32 j
, num_rx_queues
, shift
= 0, shift2
= 0;
2865 static const u8 rsshash
[40] = {
2866 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2867 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2868 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2869 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2871 /* Fill out hash function seeds */
2872 for (j
= 0; j
< 10; j
++) {
2873 u32 rsskey
= rsshash
[(j
* 4)];
2874 rsskey
|= rsshash
[(j
* 4) + 1] << 8;
2875 rsskey
|= rsshash
[(j
* 4) + 2] << 16;
2876 rsskey
|= rsshash
[(j
* 4) + 3] << 24;
2877 array_wr32(E1000_RSSRK(0), j
, rsskey
);
2880 num_rx_queues
= adapter
->rss_queues
;
2882 if (adapter
->vfs_allocated_count
) {
2883 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2884 switch (hw
->mac
.type
) {
2901 if (hw
->mac
.type
== e1000_82575
)
2905 for (j
= 0; j
< (32 * 4); j
++) {
2906 reta
.bytes
[j
& 3] = (j
% num_rx_queues
) << shift
;
2908 reta
.bytes
[j
& 3] |= num_rx_queues
<< shift2
;
2910 wr32(E1000_RETA(j
>> 2), reta
.dword
);
2914 * Disable raw packet checksumming so that RSS hash is placed in
2915 * descriptor on writeback. No need to enable TCP/UDP/IP checksum
2916 * offloads as they are enabled by default
2918 rxcsum
= rd32(E1000_RXCSUM
);
2919 rxcsum
|= E1000_RXCSUM_PCSD
;
2921 if (adapter
->hw
.mac
.type
>= e1000_82576
)
2922 /* Enable Receive Checksum Offload for SCTP */
2923 rxcsum
|= E1000_RXCSUM_CRCOFL
;
2925 /* Don't need to set TUOFL or IPOFL, they default to 1 */
2926 wr32(E1000_RXCSUM
, rxcsum
);
2928 /* If VMDq is enabled then we set the appropriate mode for that, else
2929 * we default to RSS so that an RSS hash is calculated per packet even
2930 * if we are only using one queue */
2931 if (adapter
->vfs_allocated_count
) {
2932 if (hw
->mac
.type
> e1000_82575
) {
2933 /* Set the default pool for the PF's first queue */
2934 u32 vtctl
= rd32(E1000_VT_CTL
);
2935 vtctl
&= ~(E1000_VT_CTL_DEFAULT_POOL_MASK
|
2936 E1000_VT_CTL_DISABLE_DEF_POOL
);
2937 vtctl
|= adapter
->vfs_allocated_count
<<
2938 E1000_VT_CTL_DEFAULT_POOL_SHIFT
;
2939 wr32(E1000_VT_CTL
, vtctl
);
2941 if (adapter
->rss_queues
> 1)
2942 mrqc
= E1000_MRQC_ENABLE_VMDQ_RSS_2Q
;
2944 mrqc
= E1000_MRQC_ENABLE_VMDQ
;
2946 mrqc
= E1000_MRQC_ENABLE_RSS_4Q
;
2948 igb_vmm_control(adapter
);
2951 * Generate RSS hash based on TCP port numbers and/or
2952 * IPv4/v6 src and dst addresses since UDP cannot be
2953 * hashed reliably due to IP fragmentation
2955 mrqc
|= E1000_MRQC_RSS_FIELD_IPV4
|
2956 E1000_MRQC_RSS_FIELD_IPV4_TCP
|
2957 E1000_MRQC_RSS_FIELD_IPV6
|
2958 E1000_MRQC_RSS_FIELD_IPV6_TCP
|
2959 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX
;
2961 wr32(E1000_MRQC
, mrqc
);
2965 * igb_setup_rctl - configure the receive control registers
2966 * @adapter: Board private structure
2968 void igb_setup_rctl(struct igb_adapter
*adapter
)
2970 struct e1000_hw
*hw
= &adapter
->hw
;
2973 rctl
= rd32(E1000_RCTL
);
2975 rctl
&= ~(3 << E1000_RCTL_MO_SHIFT
);
2976 rctl
&= ~(E1000_RCTL_LBM_TCVR
| E1000_RCTL_LBM_MAC
);
2978 rctl
|= E1000_RCTL_EN
| E1000_RCTL_BAM
| E1000_RCTL_RDMTS_HALF
|
2979 (hw
->mac
.mc_filter_type
<< E1000_RCTL_MO_SHIFT
);
2982 * enable stripping of CRC. It's unlikely this will break BMC
2983 * redirection as it did with e1000. Newer features require
2984 * that the HW strips the CRC.
2986 rctl
|= E1000_RCTL_SECRC
;
2988 /* disable store bad packets and clear size bits. */
2989 rctl
&= ~(E1000_RCTL_SBP
| E1000_RCTL_SZ_256
);
2991 /* enable LPE to prevent packets larger than max_frame_size */
2992 rctl
|= E1000_RCTL_LPE
;
2994 /* disable queue 0 to prevent tail write w/o re-config */
2995 wr32(E1000_RXDCTL(0), 0);
2997 /* Attention!!! For SR-IOV PF driver operations you must enable
2998 * queue drop for all VF and PF queues to prevent head of line blocking
2999 * if an un-trusted VF does not provide descriptors to hardware.
3001 if (adapter
->vfs_allocated_count
) {
3002 /* set all queue drop enable bits */
3003 wr32(E1000_QDE
, ALL_QUEUES
);
3006 wr32(E1000_RCTL
, rctl
);
3009 static inline int igb_set_vf_rlpml(struct igb_adapter
*adapter
, int size
,
3012 struct e1000_hw
*hw
= &adapter
->hw
;
3015 /* if it isn't the PF check to see if VFs are enabled and
3016 * increase the size to support vlan tags */
3017 if (vfn
< adapter
->vfs_allocated_count
&&
3018 adapter
->vf_data
[vfn
].vlans_enabled
)
3019 size
+= VLAN_TAG_SIZE
;
3021 vmolr
= rd32(E1000_VMOLR(vfn
));
3022 vmolr
&= ~E1000_VMOLR_RLPML_MASK
;
3023 vmolr
|= size
| E1000_VMOLR_LPE
;
3024 wr32(E1000_VMOLR(vfn
), vmolr
);
3030 * igb_rlpml_set - set maximum receive packet size
3031 * @adapter: board private structure
3033 * Configure maximum receivable packet size.
3035 static void igb_rlpml_set(struct igb_adapter
*adapter
)
3037 u32 max_frame_size
= adapter
->max_frame_size
;
3038 struct e1000_hw
*hw
= &adapter
->hw
;
3039 u16 pf_id
= adapter
->vfs_allocated_count
;
3042 igb_set_vf_rlpml(adapter
, max_frame_size
, pf_id
);
3044 * If we're in VMDQ or SR-IOV mode, then set global RLPML
3045 * to our max jumbo frame size, in case we need to enable
3046 * jumbo frames on one of the rings later.
3047 * This will not pass over-length frames into the default
3048 * queue because it's gated by the VMOLR.RLPML.
3050 max_frame_size
= MAX_JUMBO_FRAME_SIZE
;
3053 wr32(E1000_RLPML
, max_frame_size
);
3056 static inline void igb_set_vmolr(struct igb_adapter
*adapter
,
3059 struct e1000_hw
*hw
= &adapter
->hw
;
3063 * This register exists only on 82576 and newer so if we are older then
3064 * we should exit and do nothing
3066 if (hw
->mac
.type
< e1000_82576
)
3069 vmolr
= rd32(E1000_VMOLR(vfn
));
3070 vmolr
|= E1000_VMOLR_STRVLAN
; /* Strip vlan tags */
3072 vmolr
|= E1000_VMOLR_AUPE
; /* Accept untagged packets */
3074 vmolr
&= ~(E1000_VMOLR_AUPE
); /* Tagged packets ONLY */
3076 /* clear all bits that might not be set */
3077 vmolr
&= ~(E1000_VMOLR_BAM
| E1000_VMOLR_RSSE
);
3079 if (adapter
->rss_queues
> 1 && vfn
== adapter
->vfs_allocated_count
)
3080 vmolr
|= E1000_VMOLR_RSSE
; /* enable RSS */
3082 * for VMDq only allow the VFs and pool 0 to accept broadcast and
3085 if (vfn
<= adapter
->vfs_allocated_count
)
3086 vmolr
|= E1000_VMOLR_BAM
; /* Accept broadcast */
3088 wr32(E1000_VMOLR(vfn
), vmolr
);
3092 * igb_configure_rx_ring - Configure a receive ring after Reset
3093 * @adapter: board private structure
3094 * @ring: receive ring to be configured
3096 * Configure the Rx unit of the MAC after a reset.
3098 void igb_configure_rx_ring(struct igb_adapter
*adapter
,
3099 struct igb_ring
*ring
)
3101 struct e1000_hw
*hw
= &adapter
->hw
;
3102 u64 rdba
= ring
->dma
;
3103 int reg_idx
= ring
->reg_idx
;
3104 u32 srrctl
= 0, rxdctl
= 0;
3106 /* disable the queue */
3107 wr32(E1000_RXDCTL(reg_idx
), 0);
3109 /* Set DMA base address registers */
3110 wr32(E1000_RDBAL(reg_idx
),
3111 rdba
& 0x00000000ffffffffULL
);
3112 wr32(E1000_RDBAH(reg_idx
), rdba
>> 32);
3113 wr32(E1000_RDLEN(reg_idx
),
3114 ring
->count
* sizeof(union e1000_adv_rx_desc
));
3116 /* initialize head and tail */
3117 ring
->tail
= hw
->hw_addr
+ E1000_RDT(reg_idx
);
3118 wr32(E1000_RDH(reg_idx
), 0);
3119 writel(0, ring
->tail
);
3121 /* set descriptor configuration */
3122 srrctl
= IGB_RX_HDR_LEN
<< E1000_SRRCTL_BSIZEHDRSIZE_SHIFT
;
3123 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3124 srrctl
|= IGB_RXBUFFER_16384
>> E1000_SRRCTL_BSIZEPKT_SHIFT
;
3126 srrctl
|= (PAGE_SIZE
/ 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT
;
3128 srrctl
|= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS
;
3129 if (hw
->mac
.type
>= e1000_82580
)
3130 srrctl
|= E1000_SRRCTL_TIMESTAMP
;
3131 /* Only set Drop Enable if we are supporting multiple queues */
3132 if (adapter
->vfs_allocated_count
|| adapter
->num_rx_queues
> 1)
3133 srrctl
|= E1000_SRRCTL_DROP_EN
;
3135 wr32(E1000_SRRCTL(reg_idx
), srrctl
);
3137 /* set filtering for VMDQ pools */
3138 igb_set_vmolr(adapter
, reg_idx
& 0x7, true);
3140 rxdctl
|= IGB_RX_PTHRESH
;
3141 rxdctl
|= IGB_RX_HTHRESH
<< 8;
3142 rxdctl
|= IGB_RX_WTHRESH
<< 16;
3144 /* enable receive descriptor fetching */
3145 rxdctl
|= E1000_RXDCTL_QUEUE_ENABLE
;
3146 wr32(E1000_RXDCTL(reg_idx
), rxdctl
);
3150 * igb_configure_rx - Configure receive Unit after Reset
3151 * @adapter: board private structure
3153 * Configure the Rx unit of the MAC after a reset.
3155 static void igb_configure_rx(struct igb_adapter
*adapter
)
3159 /* set UTA to appropriate mode */
3160 igb_set_uta(adapter
);
3162 /* set the correct pool for the PF default MAC address in entry 0 */
3163 igb_rar_set_qsel(adapter
, adapter
->hw
.mac
.addr
, 0,
3164 adapter
->vfs_allocated_count
);
3166 /* Setup the HW Rx Head and Tail Descriptor Pointers and
3167 * the Base and Length of the Rx Descriptor Ring */
3168 for (i
= 0; i
< adapter
->num_rx_queues
; i
++)
3169 igb_configure_rx_ring(adapter
, adapter
->rx_ring
[i
]);
3173 * igb_free_tx_resources - Free Tx Resources per Queue
3174 * @tx_ring: Tx descriptor ring for a specific queue
3176 * Free all transmit software resources
3178 void igb_free_tx_resources(struct igb_ring
*tx_ring
)
3180 igb_clean_tx_ring(tx_ring
);
3182 vfree(tx_ring
->tx_buffer_info
);
3183 tx_ring
->tx_buffer_info
= NULL
;
3185 /* if not set, then don't free */
3189 dma_free_coherent(tx_ring
->dev
, tx_ring
->size
,
3190 tx_ring
->desc
, tx_ring
->dma
);
3192 tx_ring
->desc
= NULL
;
3196 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3197 * @adapter: board private structure
3199 * Free all transmit software resources
3201 static void igb_free_all_tx_resources(struct igb_adapter
*adapter
)
3205 for (i
= 0; i
< adapter
->num_tx_queues
; i
++)
3206 igb_free_tx_resources(adapter
->tx_ring
[i
]);
3209 void igb_unmap_and_free_tx_resource(struct igb_ring
*ring
,
3210 struct igb_tx_buffer
*tx_buffer
)
3212 if (tx_buffer
->skb
) {
3213 dev_kfree_skb_any(tx_buffer
->skb
);
3215 dma_unmap_single(ring
->dev
,
3219 } else if (tx_buffer
->dma
) {
3220 dma_unmap_page(ring
->dev
,
3225 tx_buffer
->next_to_watch
= NULL
;
3226 tx_buffer
->skb
= NULL
;
3228 /* buffer_info must be completely set up in the transmit path */
3232 * igb_clean_tx_ring - Free Tx Buffers
3233 * @tx_ring: ring to be cleaned
3235 static void igb_clean_tx_ring(struct igb_ring
*tx_ring
)
3237 struct igb_tx_buffer
*buffer_info
;
3241 if (!tx_ring
->tx_buffer_info
)
3243 /* Free all the Tx ring sk_buffs */
3245 for (i
= 0; i
< tx_ring
->count
; i
++) {
3246 buffer_info
= &tx_ring
->tx_buffer_info
[i
];
3247 igb_unmap_and_free_tx_resource(tx_ring
, buffer_info
);
3250 size
= sizeof(struct igb_tx_buffer
) * tx_ring
->count
;
3251 memset(tx_ring
->tx_buffer_info
, 0, size
);
3253 /* Zero out the descriptor ring */
3254 memset(tx_ring
->desc
, 0, tx_ring
->size
);
3256 tx_ring
->next_to_use
= 0;
3257 tx_ring
->next_to_clean
= 0;
3261 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3262 * @adapter: board private structure
3264 static void igb_clean_all_tx_rings(struct igb_adapter
*adapter
)
3268 for (i
= 0; i
< adapter
->num_tx_queues
; i
++)
3269 igb_clean_tx_ring(adapter
->tx_ring
[i
]);
3273 * igb_free_rx_resources - Free Rx Resources
3274 * @rx_ring: ring to clean the resources from
3276 * Free all receive software resources
3278 void igb_free_rx_resources(struct igb_ring
*rx_ring
)
3280 igb_clean_rx_ring(rx_ring
);
3282 vfree(rx_ring
->rx_buffer_info
);
3283 rx_ring
->rx_buffer_info
= NULL
;
3285 /* if not set, then don't free */
3289 dma_free_coherent(rx_ring
->dev
, rx_ring
->size
,
3290 rx_ring
->desc
, rx_ring
->dma
);
3292 rx_ring
->desc
= NULL
;
3296 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3297 * @adapter: board private structure
3299 * Free all receive software resources
3301 static void igb_free_all_rx_resources(struct igb_adapter
*adapter
)
3305 for (i
= 0; i
< adapter
->num_rx_queues
; i
++)
3306 igb_free_rx_resources(adapter
->rx_ring
[i
]);
3310 * igb_clean_rx_ring - Free Rx Buffers per Queue
3311 * @rx_ring: ring to free buffers from
3313 static void igb_clean_rx_ring(struct igb_ring
*rx_ring
)
3318 if (!rx_ring
->rx_buffer_info
)
3321 /* Free all the Rx ring sk_buffs */
3322 for (i
= 0; i
< rx_ring
->count
; i
++) {
3323 struct igb_rx_buffer
*buffer_info
= &rx_ring
->rx_buffer_info
[i
];
3324 if (buffer_info
->dma
) {
3325 dma_unmap_single(rx_ring
->dev
,
3329 buffer_info
->dma
= 0;
3332 if (buffer_info
->skb
) {
3333 dev_kfree_skb(buffer_info
->skb
);
3334 buffer_info
->skb
= NULL
;
3336 if (buffer_info
->page_dma
) {
3337 dma_unmap_page(rx_ring
->dev
,
3338 buffer_info
->page_dma
,
3341 buffer_info
->page_dma
= 0;
3343 if (buffer_info
->page
) {
3344 put_page(buffer_info
->page
);
3345 buffer_info
->page
= NULL
;
3346 buffer_info
->page_offset
= 0;
3350 size
= sizeof(struct igb_rx_buffer
) * rx_ring
->count
;
3351 memset(rx_ring
->rx_buffer_info
, 0, size
);
3353 /* Zero out the descriptor ring */
3354 memset(rx_ring
->desc
, 0, rx_ring
->size
);
3356 rx_ring
->next_to_clean
= 0;
3357 rx_ring
->next_to_use
= 0;
3361 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3362 * @adapter: board private structure
3364 static void igb_clean_all_rx_rings(struct igb_adapter
*adapter
)
3368 for (i
= 0; i
< adapter
->num_rx_queues
; i
++)
3369 igb_clean_rx_ring(adapter
->rx_ring
[i
]);
3373 * igb_set_mac - Change the Ethernet Address of the NIC
3374 * @netdev: network interface device structure
3375 * @p: pointer to an address structure
3377 * Returns 0 on success, negative on failure
3379 static int igb_set_mac(struct net_device
*netdev
, void *p
)
3381 struct igb_adapter
*adapter
= netdev_priv(netdev
);
3382 struct e1000_hw
*hw
= &adapter
->hw
;
3383 struct sockaddr
*addr
= p
;
3385 if (!is_valid_ether_addr(addr
->sa_data
))
3386 return -EADDRNOTAVAIL
;
3388 memcpy(netdev
->dev_addr
, addr
->sa_data
, netdev
->addr_len
);
3389 memcpy(hw
->mac
.addr
, addr
->sa_data
, netdev
->addr_len
);
3391 /* set the correct pool for the new PF MAC address in entry 0 */
3392 igb_rar_set_qsel(adapter
, hw
->mac
.addr
, 0,
3393 adapter
->vfs_allocated_count
);
3399 * igb_write_mc_addr_list - write multicast addresses to MTA
3400 * @netdev: network interface device structure
3402 * Writes multicast address list to the MTA hash table.
3403 * Returns: -ENOMEM on failure
3404 * 0 on no addresses written
3405 * X on writing X addresses to MTA
3407 static int igb_write_mc_addr_list(struct net_device
*netdev
)
3409 struct igb_adapter
*adapter
= netdev_priv(netdev
);
3410 struct e1000_hw
*hw
= &adapter
->hw
;
3411 struct netdev_hw_addr
*ha
;
3415 if (netdev_mc_empty(netdev
)) {
3416 /* nothing to program, so clear mc list */
3417 igb_update_mc_addr_list(hw
, NULL
, 0);
3418 igb_restore_vf_multicasts(adapter
);
3422 mta_list
= kzalloc(netdev_mc_count(netdev
) * 6, GFP_ATOMIC
);
3426 /* The shared function expects a packed array of only addresses. */
3428 netdev_for_each_mc_addr(ha
, netdev
)
3429 memcpy(mta_list
+ (i
++ * ETH_ALEN
), ha
->addr
, ETH_ALEN
);
3431 igb_update_mc_addr_list(hw
, mta_list
, i
);
3434 return netdev_mc_count(netdev
);
3438 * igb_write_uc_addr_list - write unicast addresses to RAR table
3439 * @netdev: network interface device structure
3441 * Writes unicast address list to the RAR table.
3442 * Returns: -ENOMEM on failure/insufficient address space
3443 * 0 on no addresses written
3444 * X on writing X addresses to the RAR table
3446 static int igb_write_uc_addr_list(struct net_device
*netdev
)
3448 struct igb_adapter
*adapter
= netdev_priv(netdev
);
3449 struct e1000_hw
*hw
= &adapter
->hw
;
3450 unsigned int vfn
= adapter
->vfs_allocated_count
;
3451 unsigned int rar_entries
= hw
->mac
.rar_entry_count
- (vfn
+ 1);
3454 /* return ENOMEM indicating insufficient memory for addresses */
3455 if (netdev_uc_count(netdev
) > rar_entries
)
3458 if (!netdev_uc_empty(netdev
) && rar_entries
) {
3459 struct netdev_hw_addr
*ha
;
3461 netdev_for_each_uc_addr(ha
, netdev
) {
3464 igb_rar_set_qsel(adapter
, ha
->addr
,
3470 /* write the addresses in reverse order to avoid write combining */
3471 for (; rar_entries
> 0 ; rar_entries
--) {
3472 wr32(E1000_RAH(rar_entries
), 0);
3473 wr32(E1000_RAL(rar_entries
), 0);
3481 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3482 * @netdev: network interface device structure
3484 * The set_rx_mode entry point is called whenever the unicast or multicast
3485 * address lists or the network interface flags are updated. This routine is
3486 * responsible for configuring the hardware for proper unicast, multicast,
3487 * promiscuous mode, and all-multi behavior.
3489 static void igb_set_rx_mode(struct net_device
*netdev
)
3491 struct igb_adapter
*adapter
= netdev_priv(netdev
);
3492 struct e1000_hw
*hw
= &adapter
->hw
;
3493 unsigned int vfn
= adapter
->vfs_allocated_count
;
3494 u32 rctl
, vmolr
= 0;
3497 /* Check for Promiscuous and All Multicast modes */
3498 rctl
= rd32(E1000_RCTL
);
3500 /* clear the effected bits */
3501 rctl
&= ~(E1000_RCTL_UPE
| E1000_RCTL_MPE
| E1000_RCTL_VFE
);
3503 if (netdev
->flags
& IFF_PROMISC
) {
3504 rctl
|= (E1000_RCTL_UPE
| E1000_RCTL_MPE
);
3505 vmolr
|= (E1000_VMOLR_ROPE
| E1000_VMOLR_MPME
);
3507 if (netdev
->flags
& IFF_ALLMULTI
) {
3508 rctl
|= E1000_RCTL_MPE
;
3509 vmolr
|= E1000_VMOLR_MPME
;
3512 * Write addresses to the MTA, if the attempt fails
3513 * then we should just turn on promiscuous mode so
3514 * that we can at least receive multicast traffic
3516 count
= igb_write_mc_addr_list(netdev
);
3518 rctl
|= E1000_RCTL_MPE
;
3519 vmolr
|= E1000_VMOLR_MPME
;
3521 vmolr
|= E1000_VMOLR_ROMPE
;
3525 * Write addresses to available RAR registers, if there is not
3526 * sufficient space to store all the addresses then enable
3527 * unicast promiscuous mode
3529 count
= igb_write_uc_addr_list(netdev
);
3531 rctl
|= E1000_RCTL_UPE
;
3532 vmolr
|= E1000_VMOLR_ROPE
;
3534 rctl
|= E1000_RCTL_VFE
;
3536 wr32(E1000_RCTL
, rctl
);
3539 * In order to support SR-IOV and eventually VMDq it is necessary to set
3540 * the VMOLR to enable the appropriate modes. Without this workaround
3541 * we will have issues with VLAN tag stripping not being done for frames
3542 * that are only arriving because we are the default pool
3544 if (hw
->mac
.type
< e1000_82576
)
3547 vmolr
|= rd32(E1000_VMOLR(vfn
)) &
3548 ~(E1000_VMOLR_ROPE
| E1000_VMOLR_MPME
| E1000_VMOLR_ROMPE
);
3549 wr32(E1000_VMOLR(vfn
), vmolr
);
3550 igb_restore_vf_multicasts(adapter
);
3553 static void igb_check_wvbr(struct igb_adapter
*adapter
)
3555 struct e1000_hw
*hw
= &adapter
->hw
;
3558 switch (hw
->mac
.type
) {
3561 if (!(wvbr
= rd32(E1000_WVBR
)))
3568 adapter
->wvbr
|= wvbr
;
3571 #define IGB_STAGGERED_QUEUE_OFFSET 8
3573 static void igb_spoof_check(struct igb_adapter
*adapter
)
3580 for(j
= 0; j
< adapter
->vfs_allocated_count
; j
++) {
3581 if (adapter
->wvbr
& (1 << j
) ||
3582 adapter
->wvbr
& (1 << (j
+ IGB_STAGGERED_QUEUE_OFFSET
))) {
3583 dev_warn(&adapter
->pdev
->dev
,
3584 "Spoof event(s) detected on VF %d\n", j
);
3587 (1 << (j
+ IGB_STAGGERED_QUEUE_OFFSET
)));
3592 /* Need to wait a few seconds after link up to get diagnostic information from
3594 static void igb_update_phy_info(unsigned long data
)
3596 struct igb_adapter
*adapter
= (struct igb_adapter
*) data
;
3597 igb_get_phy_info(&adapter
->hw
);
3601 * igb_has_link - check shared code for link and determine up/down
3602 * @adapter: pointer to driver private info
3604 bool igb_has_link(struct igb_adapter
*adapter
)
3606 struct e1000_hw
*hw
= &adapter
->hw
;
3607 bool link_active
= false;
3610 /* get_link_status is set on LSC (link status) interrupt or
3611 * rx sequence error interrupt. get_link_status will stay
3612 * false until the e1000_check_for_link establishes link
3613 * for copper adapters ONLY
3615 switch (hw
->phy
.media_type
) {
3616 case e1000_media_type_copper
:
3617 if (hw
->mac
.get_link_status
) {
3618 ret_val
= hw
->mac
.ops
.check_for_link(hw
);
3619 link_active
= !hw
->mac
.get_link_status
;
3624 case e1000_media_type_internal_serdes
:
3625 ret_val
= hw
->mac
.ops
.check_for_link(hw
);
3626 link_active
= hw
->mac
.serdes_has_link
;
3629 case e1000_media_type_unknown
:
3636 static bool igb_thermal_sensor_event(struct e1000_hw
*hw
, u32 event
)
3639 u32 ctrl_ext
, thstat
;
3641 /* check for thermal sensor event on i350, copper only */
3642 if (hw
->mac
.type
== e1000_i350
) {
3643 thstat
= rd32(E1000_THSTAT
);
3644 ctrl_ext
= rd32(E1000_CTRL_EXT
);
3646 if ((hw
->phy
.media_type
== e1000_media_type_copper
) &&
3647 !(ctrl_ext
& E1000_CTRL_EXT_LINK_MODE_SGMII
)) {
3648 ret
= !!(thstat
& event
);
3656 * igb_watchdog - Timer Call-back
3657 * @data: pointer to adapter cast into an unsigned long
3659 static void igb_watchdog(unsigned long data
)
3661 struct igb_adapter
*adapter
= (struct igb_adapter
*)data
;
3662 /* Do the rest outside of interrupt context */
3663 schedule_work(&adapter
->watchdog_task
);
3666 static void igb_watchdog_task(struct work_struct
*work
)
3668 struct igb_adapter
*adapter
= container_of(work
,
3671 struct e1000_hw
*hw
= &adapter
->hw
;
3672 struct net_device
*netdev
= adapter
->netdev
;
3676 link
= igb_has_link(adapter
);
3678 /* Cancel scheduled suspend requests. */
3679 pm_runtime_resume(netdev
->dev
.parent
);
3681 if (!netif_carrier_ok(netdev
)) {
3683 hw
->mac
.ops
.get_speed_and_duplex(hw
,
3684 &adapter
->link_speed
,
3685 &adapter
->link_duplex
);
3687 ctrl
= rd32(E1000_CTRL
);
3688 /* Links status message must follow this format */
3689 printk(KERN_INFO
"igb: %s NIC Link is Up %d Mbps %s "
3690 "Duplex, Flow Control: %s\n",
3692 adapter
->link_speed
,
3693 adapter
->link_duplex
== FULL_DUPLEX
?
3695 (ctrl
& E1000_CTRL_TFCE
) &&
3696 (ctrl
& E1000_CTRL_RFCE
) ? "RX/TX" :
3697 (ctrl
& E1000_CTRL_RFCE
) ? "RX" :
3698 (ctrl
& E1000_CTRL_TFCE
) ? "TX" : "None");
3700 /* check for thermal sensor event */
3701 if (igb_thermal_sensor_event(hw
,
3702 E1000_THSTAT_LINK_THROTTLE
)) {
3703 netdev_info(netdev
, "The network adapter link "
3704 "speed was downshifted because it "
3708 /* adjust timeout factor according to speed/duplex */
3709 adapter
->tx_timeout_factor
= 1;
3710 switch (adapter
->link_speed
) {
3712 adapter
->tx_timeout_factor
= 14;
3715 /* maybe add some timeout factor ? */
3719 netif_carrier_on(netdev
);
3721 igb_ping_all_vfs(adapter
);
3722 igb_check_vf_rate_limit(adapter
);
3724 /* link state has changed, schedule phy info update */
3725 if (!test_bit(__IGB_DOWN
, &adapter
->state
))
3726 mod_timer(&adapter
->phy_info_timer
,
3727 round_jiffies(jiffies
+ 2 * HZ
));
3730 if (netif_carrier_ok(netdev
)) {
3731 adapter
->link_speed
= 0;
3732 adapter
->link_duplex
= 0;
3734 /* check for thermal sensor event */
3735 if (igb_thermal_sensor_event(hw
,
3736 E1000_THSTAT_PWR_DOWN
)) {
3737 netdev_err(netdev
, "The network adapter was "
3738 "stopped because it overheated\n");
3741 /* Links status message must follow this format */
3742 printk(KERN_INFO
"igb: %s NIC Link is Down\n",
3744 netif_carrier_off(netdev
);
3746 igb_ping_all_vfs(adapter
);
3748 /* link state has changed, schedule phy info update */
3749 if (!test_bit(__IGB_DOWN
, &adapter
->state
))
3750 mod_timer(&adapter
->phy_info_timer
,
3751 round_jiffies(jiffies
+ 2 * HZ
));
3753 pm_schedule_suspend(netdev
->dev
.parent
,
3758 spin_lock(&adapter
->stats64_lock
);
3759 igb_update_stats(adapter
, &adapter
->stats64
);
3760 spin_unlock(&adapter
->stats64_lock
);
3762 for (i
= 0; i
< adapter
->num_tx_queues
; i
++) {
3763 struct igb_ring
*tx_ring
= adapter
->tx_ring
[i
];
3764 if (!netif_carrier_ok(netdev
)) {
3765 /* We've lost link, so the controller stops DMA,
3766 * but we've got queued Tx work that's never going
3767 * to get done, so reset controller to flush Tx.
3768 * (Do the reset outside of interrupt context). */
3769 if (igb_desc_unused(tx_ring
) + 1 < tx_ring
->count
) {
3770 adapter
->tx_timeout_count
++;
3771 schedule_work(&adapter
->reset_task
);
3772 /* return immediately since reset is imminent */
3777 /* Force detection of hung controller every watchdog period */
3778 set_bit(IGB_RING_FLAG_TX_DETECT_HANG
, &tx_ring
->flags
);
3781 /* Cause software interrupt to ensure rx ring is cleaned */
3782 if (adapter
->msix_entries
) {
3784 for (i
= 0; i
< adapter
->num_q_vectors
; i
++)
3785 eics
|= adapter
->q_vector
[i
]->eims_value
;
3786 wr32(E1000_EICS
, eics
);
3788 wr32(E1000_ICS
, E1000_ICS_RXDMT0
);
3791 igb_spoof_check(adapter
);
3793 /* Reset the timer */
3794 if (!test_bit(__IGB_DOWN
, &adapter
->state
))
3795 mod_timer(&adapter
->watchdog_timer
,
3796 round_jiffies(jiffies
+ 2 * HZ
));
3799 enum latency_range
{
3803 latency_invalid
= 255
3807 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3809 * Stores a new ITR value based on strictly on packet size. This
3810 * algorithm is less sophisticated than that used in igb_update_itr,
3811 * due to the difficulty of synchronizing statistics across multiple
3812 * receive rings. The divisors and thresholds used by this function
3813 * were determined based on theoretical maximum wire speed and testing
3814 * data, in order to minimize response time while increasing bulk
3816 * This functionality is controlled by the InterruptThrottleRate module
3817 * parameter (see igb_param.c)
3818 * NOTE: This function is called only when operating in a multiqueue
3819 * receive environment.
3820 * @q_vector: pointer to q_vector
3822 static void igb_update_ring_itr(struct igb_q_vector
*q_vector
)
3824 int new_val
= q_vector
->itr_val
;
3825 int avg_wire_size
= 0;
3826 struct igb_adapter
*adapter
= q_vector
->adapter
;
3827 unsigned int packets
;
3829 /* For non-gigabit speeds, just fix the interrupt rate at 4000
3830 * ints/sec - ITR timer value of 120 ticks.
3832 if (adapter
->link_speed
!= SPEED_1000
) {
3833 new_val
= IGB_4K_ITR
;
3837 packets
= q_vector
->rx
.total_packets
;
3839 avg_wire_size
= q_vector
->rx
.total_bytes
/ packets
;
3841 packets
= q_vector
->tx
.total_packets
;
3843 avg_wire_size
= max_t(u32
, avg_wire_size
,
3844 q_vector
->tx
.total_bytes
/ packets
);
3846 /* if avg_wire_size isn't set no work was done */
3850 /* Add 24 bytes to size to account for CRC, preamble, and gap */
3851 avg_wire_size
+= 24;
3853 /* Don't starve jumbo frames */
3854 avg_wire_size
= min(avg_wire_size
, 3000);
3856 /* Give a little boost to mid-size frames */
3857 if ((avg_wire_size
> 300) && (avg_wire_size
< 1200))
3858 new_val
= avg_wire_size
/ 3;
3860 new_val
= avg_wire_size
/ 2;
3862 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3863 if (new_val
< IGB_20K_ITR
&&
3864 ((q_vector
->rx
.ring
&& adapter
->rx_itr_setting
== 3) ||
3865 (!q_vector
->rx
.ring
&& adapter
->tx_itr_setting
== 3)))
3866 new_val
= IGB_20K_ITR
;
3869 if (new_val
!= q_vector
->itr_val
) {
3870 q_vector
->itr_val
= new_val
;
3871 q_vector
->set_itr
= 1;
3874 q_vector
->rx
.total_bytes
= 0;
3875 q_vector
->rx
.total_packets
= 0;
3876 q_vector
->tx
.total_bytes
= 0;
3877 q_vector
->tx
.total_packets
= 0;
3881 * igb_update_itr - update the dynamic ITR value based on statistics
3882 * Stores a new ITR value based on packets and byte
3883 * counts during the last interrupt. The advantage of per interrupt
3884 * computation is faster updates and more accurate ITR for the current
3885 * traffic pattern. Constants in this function were computed
3886 * based on theoretical maximum wire speed and thresholds were set based
3887 * on testing data as well as attempting to minimize response time
3888 * while increasing bulk throughput.
3889 * this functionality is controlled by the InterruptThrottleRate module
3890 * parameter (see igb_param.c)
3891 * NOTE: These calculations are only valid when operating in a single-
3892 * queue environment.
3893 * @q_vector: pointer to q_vector
3894 * @ring_container: ring info to update the itr for
3896 static void igb_update_itr(struct igb_q_vector
*q_vector
,
3897 struct igb_ring_container
*ring_container
)
3899 unsigned int packets
= ring_container
->total_packets
;
3900 unsigned int bytes
= ring_container
->total_bytes
;
3901 u8 itrval
= ring_container
->itr
;
3903 /* no packets, exit with status unchanged */
3908 case lowest_latency
:
3909 /* handle TSO and jumbo frames */
3910 if (bytes
/packets
> 8000)
3911 itrval
= bulk_latency
;
3912 else if ((packets
< 5) && (bytes
> 512))
3913 itrval
= low_latency
;
3915 case low_latency
: /* 50 usec aka 20000 ints/s */
3916 if (bytes
> 10000) {
3917 /* this if handles the TSO accounting */
3918 if (bytes
/packets
> 8000) {
3919 itrval
= bulk_latency
;
3920 } else if ((packets
< 10) || ((bytes
/packets
) > 1200)) {
3921 itrval
= bulk_latency
;
3922 } else if ((packets
> 35)) {
3923 itrval
= lowest_latency
;
3925 } else if (bytes
/packets
> 2000) {
3926 itrval
= bulk_latency
;
3927 } else if (packets
<= 2 && bytes
< 512) {
3928 itrval
= lowest_latency
;
3931 case bulk_latency
: /* 250 usec aka 4000 ints/s */
3932 if (bytes
> 25000) {
3934 itrval
= low_latency
;
3935 } else if (bytes
< 1500) {
3936 itrval
= low_latency
;
3941 /* clear work counters since we have the values we need */
3942 ring_container
->total_bytes
= 0;
3943 ring_container
->total_packets
= 0;
3945 /* write updated itr to ring container */
3946 ring_container
->itr
= itrval
;
3949 static void igb_set_itr(struct igb_q_vector
*q_vector
)
3951 struct igb_adapter
*adapter
= q_vector
->adapter
;
3952 u32 new_itr
= q_vector
->itr_val
;
3955 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3956 if (adapter
->link_speed
!= SPEED_1000
) {
3958 new_itr
= IGB_4K_ITR
;
3962 igb_update_itr(q_vector
, &q_vector
->tx
);
3963 igb_update_itr(q_vector
, &q_vector
->rx
);
3965 current_itr
= max(q_vector
->rx
.itr
, q_vector
->tx
.itr
);
3967 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3968 if (current_itr
== lowest_latency
&&
3969 ((q_vector
->rx
.ring
&& adapter
->rx_itr_setting
== 3) ||
3970 (!q_vector
->rx
.ring
&& adapter
->tx_itr_setting
== 3)))
3971 current_itr
= low_latency
;
3973 switch (current_itr
) {
3974 /* counts and packets in update_itr are dependent on these numbers */
3975 case lowest_latency
:
3976 new_itr
= IGB_70K_ITR
; /* 70,000 ints/sec */
3979 new_itr
= IGB_20K_ITR
; /* 20,000 ints/sec */
3982 new_itr
= IGB_4K_ITR
; /* 4,000 ints/sec */
3989 if (new_itr
!= q_vector
->itr_val
) {
3990 /* this attempts to bias the interrupt rate towards Bulk
3991 * by adding intermediate steps when interrupt rate is
3993 new_itr
= new_itr
> q_vector
->itr_val
?
3994 max((new_itr
* q_vector
->itr_val
) /
3995 (new_itr
+ (q_vector
->itr_val
>> 2)),
3998 /* Don't write the value here; it resets the adapter's
3999 * internal timer, and causes us to delay far longer than
4000 * we should between interrupts. Instead, we write the ITR
4001 * value at the beginning of the next interrupt so the timing
4002 * ends up being correct.
4004 q_vector
->itr_val
= new_itr
;
4005 q_vector
->set_itr
= 1;
4009 static void igb_tx_ctxtdesc(struct igb_ring
*tx_ring
, u32 vlan_macip_lens
,
4010 u32 type_tucmd
, u32 mss_l4len_idx
)
4012 struct e1000_adv_tx_context_desc
*context_desc
;
4013 u16 i
= tx_ring
->next_to_use
;
4015 context_desc
= IGB_TX_CTXTDESC(tx_ring
, i
);
4018 tx_ring
->next_to_use
= (i
< tx_ring
->count
) ? i
: 0;
4020 /* set bits to identify this as an advanced context descriptor */
4021 type_tucmd
|= E1000_TXD_CMD_DEXT
| E1000_ADVTXD_DTYP_CTXT
;
4023 /* For 82575, context index must be unique per ring. */
4024 if (test_bit(IGB_RING_FLAG_TX_CTX_IDX
, &tx_ring
->flags
))
4025 mss_l4len_idx
|= tx_ring
->reg_idx
<< 4;
4027 context_desc
->vlan_macip_lens
= cpu_to_le32(vlan_macip_lens
);
4028 context_desc
->seqnum_seed
= 0;
4029 context_desc
->type_tucmd_mlhl
= cpu_to_le32(type_tucmd
);
4030 context_desc
->mss_l4len_idx
= cpu_to_le32(mss_l4len_idx
);
4033 static int igb_tso(struct igb_ring
*tx_ring
,
4034 struct igb_tx_buffer
*first
,
4037 struct sk_buff
*skb
= first
->skb
;
4038 u32 vlan_macip_lens
, type_tucmd
;
4039 u32 mss_l4len_idx
, l4len
;
4041 if (!skb_is_gso(skb
))
4044 if (skb_header_cloned(skb
)) {
4045 int err
= pskb_expand_head(skb
, 0, 0, GFP_ATOMIC
);
4050 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4051 type_tucmd
= E1000_ADVTXD_TUCMD_L4T_TCP
;
4053 if (first
->protocol
== __constant_htons(ETH_P_IP
)) {
4054 struct iphdr
*iph
= ip_hdr(skb
);
4057 tcp_hdr(skb
)->check
= ~csum_tcpudp_magic(iph
->saddr
,
4061 type_tucmd
|= E1000_ADVTXD_TUCMD_IPV4
;
4062 first
->tx_flags
|= IGB_TX_FLAGS_TSO
|
4065 } else if (skb_is_gso_v6(skb
)) {
4066 ipv6_hdr(skb
)->payload_len
= 0;
4067 tcp_hdr(skb
)->check
= ~csum_ipv6_magic(&ipv6_hdr(skb
)->saddr
,
4068 &ipv6_hdr(skb
)->daddr
,
4070 first
->tx_flags
|= IGB_TX_FLAGS_TSO
|
4074 /* compute header lengths */
4075 l4len
= tcp_hdrlen(skb
);
4076 *hdr_len
= skb_transport_offset(skb
) + l4len
;
4078 /* update gso size and bytecount with header size */
4079 first
->gso_segs
= skb_shinfo(skb
)->gso_segs
;
4080 first
->bytecount
+= (first
->gso_segs
- 1) * *hdr_len
;
4083 mss_l4len_idx
= l4len
<< E1000_ADVTXD_L4LEN_SHIFT
;
4084 mss_l4len_idx
|= skb_shinfo(skb
)->gso_size
<< E1000_ADVTXD_MSS_SHIFT
;
4086 /* VLAN MACLEN IPLEN */
4087 vlan_macip_lens
= skb_network_header_len(skb
);
4088 vlan_macip_lens
|= skb_network_offset(skb
) << E1000_ADVTXD_MACLEN_SHIFT
;
4089 vlan_macip_lens
|= first
->tx_flags
& IGB_TX_FLAGS_VLAN_MASK
;
4091 igb_tx_ctxtdesc(tx_ring
, vlan_macip_lens
, type_tucmd
, mss_l4len_idx
);
4096 static void igb_tx_csum(struct igb_ring
*tx_ring
, struct igb_tx_buffer
*first
)
4098 struct sk_buff
*skb
= first
->skb
;
4099 u32 vlan_macip_lens
= 0;
4100 u32 mss_l4len_idx
= 0;
4103 if (skb
->ip_summed
!= CHECKSUM_PARTIAL
) {
4104 if (!(first
->tx_flags
& IGB_TX_FLAGS_VLAN
))
4108 switch (first
->protocol
) {
4109 case __constant_htons(ETH_P_IP
):
4110 vlan_macip_lens
|= skb_network_header_len(skb
);
4111 type_tucmd
|= E1000_ADVTXD_TUCMD_IPV4
;
4112 l4_hdr
= ip_hdr(skb
)->protocol
;
4114 case __constant_htons(ETH_P_IPV6
):
4115 vlan_macip_lens
|= skb_network_header_len(skb
);
4116 l4_hdr
= ipv6_hdr(skb
)->nexthdr
;
4119 if (unlikely(net_ratelimit())) {
4120 dev_warn(tx_ring
->dev
,
4121 "partial checksum but proto=%x!\n",
4129 type_tucmd
|= E1000_ADVTXD_TUCMD_L4T_TCP
;
4130 mss_l4len_idx
= tcp_hdrlen(skb
) <<
4131 E1000_ADVTXD_L4LEN_SHIFT
;
4134 type_tucmd
|= E1000_ADVTXD_TUCMD_L4T_SCTP
;
4135 mss_l4len_idx
= sizeof(struct sctphdr
) <<
4136 E1000_ADVTXD_L4LEN_SHIFT
;
4139 mss_l4len_idx
= sizeof(struct udphdr
) <<
4140 E1000_ADVTXD_L4LEN_SHIFT
;
4143 if (unlikely(net_ratelimit())) {
4144 dev_warn(tx_ring
->dev
,
4145 "partial checksum but l4 proto=%x!\n",
4151 /* update TX checksum flag */
4152 first
->tx_flags
|= IGB_TX_FLAGS_CSUM
;
4155 vlan_macip_lens
|= skb_network_offset(skb
) << E1000_ADVTXD_MACLEN_SHIFT
;
4156 vlan_macip_lens
|= first
->tx_flags
& IGB_TX_FLAGS_VLAN_MASK
;
4158 igb_tx_ctxtdesc(tx_ring
, vlan_macip_lens
, type_tucmd
, mss_l4len_idx
);
4161 static __le32
igb_tx_cmd_type(u32 tx_flags
)
4163 /* set type for advanced descriptor with frame checksum insertion */
4164 __le32 cmd_type
= cpu_to_le32(E1000_ADVTXD_DTYP_DATA
|
4165 E1000_ADVTXD_DCMD_IFCS
|
4166 E1000_ADVTXD_DCMD_DEXT
);
4168 /* set HW vlan bit if vlan is present */
4169 if (tx_flags
& IGB_TX_FLAGS_VLAN
)
4170 cmd_type
|= cpu_to_le32(E1000_ADVTXD_DCMD_VLE
);
4172 /* set timestamp bit if present */
4173 if (tx_flags
& IGB_TX_FLAGS_TSTAMP
)
4174 cmd_type
|= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP
);
4176 /* set segmentation bits for TSO */
4177 if (tx_flags
& IGB_TX_FLAGS_TSO
)
4178 cmd_type
|= cpu_to_le32(E1000_ADVTXD_DCMD_TSE
);
4183 static void igb_tx_olinfo_status(struct igb_ring
*tx_ring
,
4184 union e1000_adv_tx_desc
*tx_desc
,
4185 u32 tx_flags
, unsigned int paylen
)
4187 u32 olinfo_status
= paylen
<< E1000_ADVTXD_PAYLEN_SHIFT
;
4189 /* 82575 requires a unique index per ring if any offload is enabled */
4190 if ((tx_flags
& (IGB_TX_FLAGS_CSUM
| IGB_TX_FLAGS_VLAN
)) &&
4191 test_bit(IGB_RING_FLAG_TX_CTX_IDX
, &tx_ring
->flags
))
4192 olinfo_status
|= tx_ring
->reg_idx
<< 4;
4194 /* insert L4 checksum */
4195 if (tx_flags
& IGB_TX_FLAGS_CSUM
) {
4196 olinfo_status
|= E1000_TXD_POPTS_TXSM
<< 8;
4198 /* insert IPv4 checksum */
4199 if (tx_flags
& IGB_TX_FLAGS_IPV4
)
4200 olinfo_status
|= E1000_TXD_POPTS_IXSM
<< 8;
4203 tx_desc
->read
.olinfo_status
= cpu_to_le32(olinfo_status
);
4207 * The largest size we can write to the descriptor is 65535. In order to
4208 * maintain a power of two alignment we have to limit ourselves to 32K.
4210 #define IGB_MAX_TXD_PWR 15
4211 #define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR)
4213 static void igb_tx_map(struct igb_ring
*tx_ring
,
4214 struct igb_tx_buffer
*first
,
4217 struct sk_buff
*skb
= first
->skb
;
4218 struct igb_tx_buffer
*tx_buffer_info
;
4219 union e1000_adv_tx_desc
*tx_desc
;
4221 struct skb_frag_struct
*frag
= &skb_shinfo(skb
)->frags
[0];
4222 unsigned int data_len
= skb
->data_len
;
4223 unsigned int size
= skb_headlen(skb
);
4224 unsigned int paylen
= skb
->len
- hdr_len
;
4226 u32 tx_flags
= first
->tx_flags
;
4227 u16 i
= tx_ring
->next_to_use
;
4229 tx_desc
= IGB_TX_DESC(tx_ring
, i
);
4231 igb_tx_olinfo_status(tx_ring
, tx_desc
, tx_flags
, paylen
);
4232 cmd_type
= igb_tx_cmd_type(tx_flags
);
4234 dma
= dma_map_single(tx_ring
->dev
, skb
->data
, size
, DMA_TO_DEVICE
);
4235 if (dma_mapping_error(tx_ring
->dev
, dma
))
4238 /* record length, and DMA address */
4239 first
->length
= size
;
4241 tx_desc
->read
.buffer_addr
= cpu_to_le64(dma
);
4244 while (unlikely(size
> IGB_MAX_DATA_PER_TXD
)) {
4245 tx_desc
->read
.cmd_type_len
=
4246 cmd_type
| cpu_to_le32(IGB_MAX_DATA_PER_TXD
);
4250 if (i
== tx_ring
->count
) {
4251 tx_desc
= IGB_TX_DESC(tx_ring
, 0);
4255 dma
+= IGB_MAX_DATA_PER_TXD
;
4256 size
-= IGB_MAX_DATA_PER_TXD
;
4258 tx_desc
->read
.olinfo_status
= 0;
4259 tx_desc
->read
.buffer_addr
= cpu_to_le64(dma
);
4262 if (likely(!data_len
))
4265 tx_desc
->read
.cmd_type_len
= cmd_type
| cpu_to_le32(size
);
4269 if (i
== tx_ring
->count
) {
4270 tx_desc
= IGB_TX_DESC(tx_ring
, 0);
4274 size
= skb_frag_size(frag
);
4277 dma
= skb_frag_dma_map(tx_ring
->dev
, frag
, 0,
4278 size
, DMA_TO_DEVICE
);
4279 if (dma_mapping_error(tx_ring
->dev
, dma
))
4282 tx_buffer_info
= &tx_ring
->tx_buffer_info
[i
];
4283 tx_buffer_info
->length
= size
;
4284 tx_buffer_info
->dma
= dma
;
4286 tx_desc
->read
.olinfo_status
= 0;
4287 tx_desc
->read
.buffer_addr
= cpu_to_le64(dma
);
4292 netdev_tx_sent_queue(txring_txq(tx_ring
), first
->bytecount
);
4294 /* write last descriptor with RS and EOP bits */
4295 cmd_type
|= cpu_to_le32(size
) | cpu_to_le32(IGB_TXD_DCMD
);
4296 tx_desc
->read
.cmd_type_len
= cmd_type
;
4298 /* set the timestamp */
4299 first
->time_stamp
= jiffies
;
4302 * Force memory writes to complete before letting h/w know there
4303 * are new descriptors to fetch. (Only applicable for weak-ordered
4304 * memory model archs, such as IA-64).
4306 * We also need this memory barrier to make certain all of the
4307 * status bits have been updated before next_to_watch is written.
4311 /* set next_to_watch value indicating a packet is present */
4312 first
->next_to_watch
= tx_desc
;
4315 if (i
== tx_ring
->count
)
4318 tx_ring
->next_to_use
= i
;
4320 writel(i
, tx_ring
->tail
);
4322 /* we need this if more than one processor can write to our tail
4323 * at a time, it syncronizes IO on IA64/Altix systems */
4329 dev_err(tx_ring
->dev
, "TX DMA map failed\n");
4331 /* clear dma mappings for failed tx_buffer_info map */
4333 tx_buffer_info
= &tx_ring
->tx_buffer_info
[i
];
4334 igb_unmap_and_free_tx_resource(tx_ring
, tx_buffer_info
);
4335 if (tx_buffer_info
== first
)
4342 tx_ring
->next_to_use
= i
;
4345 static int __igb_maybe_stop_tx(struct igb_ring
*tx_ring
, const u16 size
)
4347 struct net_device
*netdev
= tx_ring
->netdev
;
4349 netif_stop_subqueue(netdev
, tx_ring
->queue_index
);
4351 /* Herbert's original patch had:
4352 * smp_mb__after_netif_stop_queue();
4353 * but since that doesn't exist yet, just open code it. */
4356 /* We need to check again in a case another CPU has just
4357 * made room available. */
4358 if (igb_desc_unused(tx_ring
) < size
)
4362 netif_wake_subqueue(netdev
, tx_ring
->queue_index
);
4364 u64_stats_update_begin(&tx_ring
->tx_syncp2
);
4365 tx_ring
->tx_stats
.restart_queue2
++;
4366 u64_stats_update_end(&tx_ring
->tx_syncp2
);
4371 static inline int igb_maybe_stop_tx(struct igb_ring
*tx_ring
, const u16 size
)
4373 if (igb_desc_unused(tx_ring
) >= size
)
4375 return __igb_maybe_stop_tx(tx_ring
, size
);
4378 netdev_tx_t
igb_xmit_frame_ring(struct sk_buff
*skb
,
4379 struct igb_ring
*tx_ring
)
4381 struct igb_tx_buffer
*first
;
4384 __be16 protocol
= vlan_get_protocol(skb
);
4387 /* need: 1 descriptor per page,
4388 * + 2 desc gap to keep tail from touching head,
4389 * + 1 desc for skb->data,
4390 * + 1 desc for context descriptor,
4391 * otherwise try next time */
4392 if (igb_maybe_stop_tx(tx_ring
, skb_shinfo(skb
)->nr_frags
+ 4)) {
4393 /* this is a hard error */
4394 return NETDEV_TX_BUSY
;
4397 /* record the location of the first descriptor for this packet */
4398 first
= &tx_ring
->tx_buffer_info
[tx_ring
->next_to_use
];
4400 first
->bytecount
= skb
->len
;
4401 first
->gso_segs
= 1;
4403 if (unlikely(skb_shinfo(skb
)->tx_flags
& SKBTX_HW_TSTAMP
)) {
4404 skb_shinfo(skb
)->tx_flags
|= SKBTX_IN_PROGRESS
;
4405 tx_flags
|= IGB_TX_FLAGS_TSTAMP
;
4408 if (vlan_tx_tag_present(skb
)) {
4409 tx_flags
|= IGB_TX_FLAGS_VLAN
;
4410 tx_flags
|= (vlan_tx_tag_get(skb
) << IGB_TX_FLAGS_VLAN_SHIFT
);
4413 /* record initial flags and protocol */
4414 first
->tx_flags
= tx_flags
;
4415 first
->protocol
= protocol
;
4417 tso
= igb_tso(tx_ring
, first
, &hdr_len
);
4421 igb_tx_csum(tx_ring
, first
);
4423 igb_tx_map(tx_ring
, first
, hdr_len
);
4425 /* Make sure there is space in the ring for the next send. */
4426 igb_maybe_stop_tx(tx_ring
, MAX_SKB_FRAGS
+ 4);
4428 return NETDEV_TX_OK
;
4431 igb_unmap_and_free_tx_resource(tx_ring
, first
);
4433 return NETDEV_TX_OK
;
4436 static inline struct igb_ring
*igb_tx_queue_mapping(struct igb_adapter
*adapter
,
4437 struct sk_buff
*skb
)
4439 unsigned int r_idx
= skb
->queue_mapping
;
4441 if (r_idx
>= adapter
->num_tx_queues
)
4442 r_idx
= r_idx
% adapter
->num_tx_queues
;
4444 return adapter
->tx_ring
[r_idx
];
4447 static netdev_tx_t
igb_xmit_frame(struct sk_buff
*skb
,
4448 struct net_device
*netdev
)
4450 struct igb_adapter
*adapter
= netdev_priv(netdev
);
4452 if (test_bit(__IGB_DOWN
, &adapter
->state
)) {
4453 dev_kfree_skb_any(skb
);
4454 return NETDEV_TX_OK
;
4457 if (skb
->len
<= 0) {
4458 dev_kfree_skb_any(skb
);
4459 return NETDEV_TX_OK
;
4463 * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4464 * in order to meet this minimum size requirement.
4466 if (skb
->len
< 17) {
4467 if (skb_padto(skb
, 17))
4468 return NETDEV_TX_OK
;
4472 return igb_xmit_frame_ring(skb
, igb_tx_queue_mapping(adapter
, skb
));
4476 * igb_tx_timeout - Respond to a Tx Hang
4477 * @netdev: network interface device structure
4479 static void igb_tx_timeout(struct net_device
*netdev
)
4481 struct igb_adapter
*adapter
= netdev_priv(netdev
);
4482 struct e1000_hw
*hw
= &adapter
->hw
;
4484 /* Do the reset outside of interrupt context */
4485 adapter
->tx_timeout_count
++;
4487 if (hw
->mac
.type
>= e1000_82580
)
4488 hw
->dev_spec
._82575
.global_device_reset
= true;
4490 schedule_work(&adapter
->reset_task
);
4492 (adapter
->eims_enable_mask
& ~adapter
->eims_other
));
4495 static void igb_reset_task(struct work_struct
*work
)
4497 struct igb_adapter
*adapter
;
4498 adapter
= container_of(work
, struct igb_adapter
, reset_task
);
4501 netdev_err(adapter
->netdev
, "Reset adapter\n");
4502 igb_reinit_locked(adapter
);
4506 * igb_get_stats64 - Get System Network Statistics
4507 * @netdev: network interface device structure
4508 * @stats: rtnl_link_stats64 pointer
4511 static struct rtnl_link_stats64
*igb_get_stats64(struct net_device
*netdev
,
4512 struct rtnl_link_stats64
*stats
)
4514 struct igb_adapter
*adapter
= netdev_priv(netdev
);
4516 spin_lock(&adapter
->stats64_lock
);
4517 igb_update_stats(adapter
, &adapter
->stats64
);
4518 memcpy(stats
, &adapter
->stats64
, sizeof(*stats
));
4519 spin_unlock(&adapter
->stats64_lock
);
4525 * igb_change_mtu - Change the Maximum Transfer Unit
4526 * @netdev: network interface device structure
4527 * @new_mtu: new value for maximum frame size
4529 * Returns 0 on success, negative on failure
4531 static int igb_change_mtu(struct net_device
*netdev
, int new_mtu
)
4533 struct igb_adapter
*adapter
= netdev_priv(netdev
);
4534 struct pci_dev
*pdev
= adapter
->pdev
;
4535 int max_frame
= new_mtu
+ ETH_HLEN
+ ETH_FCS_LEN
+ VLAN_HLEN
;
4537 if ((new_mtu
< 68) || (max_frame
> MAX_JUMBO_FRAME_SIZE
)) {
4538 dev_err(&pdev
->dev
, "Invalid MTU setting\n");
4542 #define MAX_STD_JUMBO_FRAME_SIZE 9238
4543 if (max_frame
> MAX_STD_JUMBO_FRAME_SIZE
) {
4544 dev_err(&pdev
->dev
, "MTU > 9216 not supported.\n");
4548 while (test_and_set_bit(__IGB_RESETTING
, &adapter
->state
))
4551 /* igb_down has a dependency on max_frame_size */
4552 adapter
->max_frame_size
= max_frame
;
4554 if (netif_running(netdev
))
4557 dev_info(&pdev
->dev
, "changing MTU from %d to %d\n",
4558 netdev
->mtu
, new_mtu
);
4559 netdev
->mtu
= new_mtu
;
4561 if (netif_running(netdev
))
4566 clear_bit(__IGB_RESETTING
, &adapter
->state
);
4572 * igb_update_stats - Update the board statistics counters
4573 * @adapter: board private structure
4576 void igb_update_stats(struct igb_adapter
*adapter
,
4577 struct rtnl_link_stats64
*net_stats
)
4579 struct e1000_hw
*hw
= &adapter
->hw
;
4580 struct pci_dev
*pdev
= adapter
->pdev
;
4586 u64 _bytes
, _packets
;
4588 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4591 * Prevent stats update while adapter is being reset, or if the pci
4592 * connection is down.
4594 if (adapter
->link_speed
== 0)
4596 if (pci_channel_offline(pdev
))
4601 for (i
= 0; i
< adapter
->num_rx_queues
; i
++) {
4602 u32 rqdpc_tmp
= rd32(E1000_RQDPC(i
)) & 0x0FFF;
4603 struct igb_ring
*ring
= adapter
->rx_ring
[i
];
4605 ring
->rx_stats
.drops
+= rqdpc_tmp
;
4606 net_stats
->rx_fifo_errors
+= rqdpc_tmp
;
4609 start
= u64_stats_fetch_begin_bh(&ring
->rx_syncp
);
4610 _bytes
= ring
->rx_stats
.bytes
;
4611 _packets
= ring
->rx_stats
.packets
;
4612 } while (u64_stats_fetch_retry_bh(&ring
->rx_syncp
, start
));
4614 packets
+= _packets
;
4617 net_stats
->rx_bytes
= bytes
;
4618 net_stats
->rx_packets
= packets
;
4622 for (i
= 0; i
< adapter
->num_tx_queues
; i
++) {
4623 struct igb_ring
*ring
= adapter
->tx_ring
[i
];
4625 start
= u64_stats_fetch_begin_bh(&ring
->tx_syncp
);
4626 _bytes
= ring
->tx_stats
.bytes
;
4627 _packets
= ring
->tx_stats
.packets
;
4628 } while (u64_stats_fetch_retry_bh(&ring
->tx_syncp
, start
));
4630 packets
+= _packets
;
4632 net_stats
->tx_bytes
= bytes
;
4633 net_stats
->tx_packets
= packets
;
4635 /* read stats registers */
4636 adapter
->stats
.crcerrs
+= rd32(E1000_CRCERRS
);
4637 adapter
->stats
.gprc
+= rd32(E1000_GPRC
);
4638 adapter
->stats
.gorc
+= rd32(E1000_GORCL
);
4639 rd32(E1000_GORCH
); /* clear GORCL */
4640 adapter
->stats
.bprc
+= rd32(E1000_BPRC
);
4641 adapter
->stats
.mprc
+= rd32(E1000_MPRC
);
4642 adapter
->stats
.roc
+= rd32(E1000_ROC
);
4644 adapter
->stats
.prc64
+= rd32(E1000_PRC64
);
4645 adapter
->stats
.prc127
+= rd32(E1000_PRC127
);
4646 adapter
->stats
.prc255
+= rd32(E1000_PRC255
);
4647 adapter
->stats
.prc511
+= rd32(E1000_PRC511
);
4648 adapter
->stats
.prc1023
+= rd32(E1000_PRC1023
);
4649 adapter
->stats
.prc1522
+= rd32(E1000_PRC1522
);
4650 adapter
->stats
.symerrs
+= rd32(E1000_SYMERRS
);
4651 adapter
->stats
.sec
+= rd32(E1000_SEC
);
4653 mpc
= rd32(E1000_MPC
);
4654 adapter
->stats
.mpc
+= mpc
;
4655 net_stats
->rx_fifo_errors
+= mpc
;
4656 adapter
->stats
.scc
+= rd32(E1000_SCC
);
4657 adapter
->stats
.ecol
+= rd32(E1000_ECOL
);
4658 adapter
->stats
.mcc
+= rd32(E1000_MCC
);
4659 adapter
->stats
.latecol
+= rd32(E1000_LATECOL
);
4660 adapter
->stats
.dc
+= rd32(E1000_DC
);
4661 adapter
->stats
.rlec
+= rd32(E1000_RLEC
);
4662 adapter
->stats
.xonrxc
+= rd32(E1000_XONRXC
);
4663 adapter
->stats
.xontxc
+= rd32(E1000_XONTXC
);
4664 adapter
->stats
.xoffrxc
+= rd32(E1000_XOFFRXC
);
4665 adapter
->stats
.xofftxc
+= rd32(E1000_XOFFTXC
);
4666 adapter
->stats
.fcruc
+= rd32(E1000_FCRUC
);
4667 adapter
->stats
.gptc
+= rd32(E1000_GPTC
);
4668 adapter
->stats
.gotc
+= rd32(E1000_GOTCL
);
4669 rd32(E1000_GOTCH
); /* clear GOTCL */
4670 adapter
->stats
.rnbc
+= rd32(E1000_RNBC
);
4671 adapter
->stats
.ruc
+= rd32(E1000_RUC
);
4672 adapter
->stats
.rfc
+= rd32(E1000_RFC
);
4673 adapter
->stats
.rjc
+= rd32(E1000_RJC
);
4674 adapter
->stats
.tor
+= rd32(E1000_TORH
);
4675 adapter
->stats
.tot
+= rd32(E1000_TOTH
);
4676 adapter
->stats
.tpr
+= rd32(E1000_TPR
);
4678 adapter
->stats
.ptc64
+= rd32(E1000_PTC64
);
4679 adapter
->stats
.ptc127
+= rd32(E1000_PTC127
);
4680 adapter
->stats
.ptc255
+= rd32(E1000_PTC255
);
4681 adapter
->stats
.ptc511
+= rd32(E1000_PTC511
);
4682 adapter
->stats
.ptc1023
+= rd32(E1000_PTC1023
);
4683 adapter
->stats
.ptc1522
+= rd32(E1000_PTC1522
);
4685 adapter
->stats
.mptc
+= rd32(E1000_MPTC
);
4686 adapter
->stats
.bptc
+= rd32(E1000_BPTC
);
4688 adapter
->stats
.tpt
+= rd32(E1000_TPT
);
4689 adapter
->stats
.colc
+= rd32(E1000_COLC
);
4691 adapter
->stats
.algnerrc
+= rd32(E1000_ALGNERRC
);
4692 /* read internal phy specific stats */
4693 reg
= rd32(E1000_CTRL_EXT
);
4694 if (!(reg
& E1000_CTRL_EXT_LINK_MODE_MASK
)) {
4695 adapter
->stats
.rxerrc
+= rd32(E1000_RXERRC
);
4696 adapter
->stats
.tncrs
+= rd32(E1000_TNCRS
);
4699 adapter
->stats
.tsctc
+= rd32(E1000_TSCTC
);
4700 adapter
->stats
.tsctfc
+= rd32(E1000_TSCTFC
);
4702 adapter
->stats
.iac
+= rd32(E1000_IAC
);
4703 adapter
->stats
.icrxoc
+= rd32(E1000_ICRXOC
);
4704 adapter
->stats
.icrxptc
+= rd32(E1000_ICRXPTC
);
4705 adapter
->stats
.icrxatc
+= rd32(E1000_ICRXATC
);
4706 adapter
->stats
.ictxptc
+= rd32(E1000_ICTXPTC
);
4707 adapter
->stats
.ictxatc
+= rd32(E1000_ICTXATC
);
4708 adapter
->stats
.ictxqec
+= rd32(E1000_ICTXQEC
);
4709 adapter
->stats
.ictxqmtc
+= rd32(E1000_ICTXQMTC
);
4710 adapter
->stats
.icrxdmtc
+= rd32(E1000_ICRXDMTC
);
4712 /* Fill out the OS statistics structure */
4713 net_stats
->multicast
= adapter
->stats
.mprc
;
4714 net_stats
->collisions
= adapter
->stats
.colc
;
4718 /* RLEC on some newer hardware can be incorrect so build
4719 * our own version based on RUC and ROC */
4720 net_stats
->rx_errors
= adapter
->stats
.rxerrc
+
4721 adapter
->stats
.crcerrs
+ adapter
->stats
.algnerrc
+
4722 adapter
->stats
.ruc
+ adapter
->stats
.roc
+
4723 adapter
->stats
.cexterr
;
4724 net_stats
->rx_length_errors
= adapter
->stats
.ruc
+
4726 net_stats
->rx_crc_errors
= adapter
->stats
.crcerrs
;
4727 net_stats
->rx_frame_errors
= adapter
->stats
.algnerrc
;
4728 net_stats
->rx_missed_errors
= adapter
->stats
.mpc
;
4731 net_stats
->tx_errors
= adapter
->stats
.ecol
+
4732 adapter
->stats
.latecol
;
4733 net_stats
->tx_aborted_errors
= adapter
->stats
.ecol
;
4734 net_stats
->tx_window_errors
= adapter
->stats
.latecol
;
4735 net_stats
->tx_carrier_errors
= adapter
->stats
.tncrs
;
4737 /* Tx Dropped needs to be maintained elsewhere */
4740 if (hw
->phy
.media_type
== e1000_media_type_copper
) {
4741 if ((adapter
->link_speed
== SPEED_1000
) &&
4742 (!igb_read_phy_reg(hw
, PHY_1000T_STATUS
, &phy_tmp
))) {
4743 phy_tmp
&= PHY_IDLE_ERROR_COUNT_MASK
;
4744 adapter
->phy_stats
.idle_errors
+= phy_tmp
;
4748 /* Management Stats */
4749 adapter
->stats
.mgptc
+= rd32(E1000_MGTPTC
);
4750 adapter
->stats
.mgprc
+= rd32(E1000_MGTPRC
);
4751 adapter
->stats
.mgpdc
+= rd32(E1000_MGTPDC
);
4754 reg
= rd32(E1000_MANC
);
4755 if (reg
& E1000_MANC_EN_BMC2OS
) {
4756 adapter
->stats
.o2bgptc
+= rd32(E1000_O2BGPTC
);
4757 adapter
->stats
.o2bspc
+= rd32(E1000_O2BSPC
);
4758 adapter
->stats
.b2ospc
+= rd32(E1000_B2OSPC
);
4759 adapter
->stats
.b2ogprc
+= rd32(E1000_B2OGPRC
);
4763 static irqreturn_t
igb_msix_other(int irq
, void *data
)
4765 struct igb_adapter
*adapter
= data
;
4766 struct e1000_hw
*hw
= &adapter
->hw
;
4767 u32 icr
= rd32(E1000_ICR
);
4768 /* reading ICR causes bit 31 of EICR to be cleared */
4770 if (icr
& E1000_ICR_DRSTA
)
4771 schedule_work(&adapter
->reset_task
);
4773 if (icr
& E1000_ICR_DOUTSYNC
) {
4774 /* HW is reporting DMA is out of sync */
4775 adapter
->stats
.doosync
++;
4776 /* The DMA Out of Sync is also indication of a spoof event
4777 * in IOV mode. Check the Wrong VM Behavior register to
4778 * see if it is really a spoof event. */
4779 igb_check_wvbr(adapter
);
4782 /* Check for a mailbox event */
4783 if (icr
& E1000_ICR_VMMB
)
4784 igb_msg_task(adapter
);
4786 if (icr
& E1000_ICR_LSC
) {
4787 hw
->mac
.get_link_status
= 1;
4788 /* guard against interrupt when we're going down */
4789 if (!test_bit(__IGB_DOWN
, &adapter
->state
))
4790 mod_timer(&adapter
->watchdog_timer
, jiffies
+ 1);
4793 wr32(E1000_EIMS
, adapter
->eims_other
);
4798 static void igb_write_itr(struct igb_q_vector
*q_vector
)
4800 struct igb_adapter
*adapter
= q_vector
->adapter
;
4801 u32 itr_val
= q_vector
->itr_val
& 0x7FFC;
4803 if (!q_vector
->set_itr
)
4809 if (adapter
->hw
.mac
.type
== e1000_82575
)
4810 itr_val
|= itr_val
<< 16;
4812 itr_val
|= E1000_EITR_CNT_IGNR
;
4814 writel(itr_val
, q_vector
->itr_register
);
4815 q_vector
->set_itr
= 0;
4818 static irqreturn_t
igb_msix_ring(int irq
, void *data
)
4820 struct igb_q_vector
*q_vector
= data
;
4822 /* Write the ITR value calculated from the previous interrupt. */
4823 igb_write_itr(q_vector
);
4825 napi_schedule(&q_vector
->napi
);
4830 #ifdef CONFIG_IGB_DCA
4831 static void igb_update_dca(struct igb_q_vector
*q_vector
)
4833 struct igb_adapter
*adapter
= q_vector
->adapter
;
4834 struct e1000_hw
*hw
= &adapter
->hw
;
4835 int cpu
= get_cpu();
4837 if (q_vector
->cpu
== cpu
)
4840 if (q_vector
->tx
.ring
) {
4841 int q
= q_vector
->tx
.ring
->reg_idx
;
4842 u32 dca_txctrl
= rd32(E1000_DCA_TXCTRL(q
));
4843 if (hw
->mac
.type
== e1000_82575
) {
4844 dca_txctrl
&= ~E1000_DCA_TXCTRL_CPUID_MASK
;
4845 dca_txctrl
|= dca3_get_tag(&adapter
->pdev
->dev
, cpu
);
4847 dca_txctrl
&= ~E1000_DCA_TXCTRL_CPUID_MASK_82576
;
4848 dca_txctrl
|= dca3_get_tag(&adapter
->pdev
->dev
, cpu
) <<
4849 E1000_DCA_TXCTRL_CPUID_SHIFT
;
4851 dca_txctrl
|= E1000_DCA_TXCTRL_DESC_DCA_EN
;
4852 wr32(E1000_DCA_TXCTRL(q
), dca_txctrl
);
4854 if (q_vector
->rx
.ring
) {
4855 int q
= q_vector
->rx
.ring
->reg_idx
;
4856 u32 dca_rxctrl
= rd32(E1000_DCA_RXCTRL(q
));
4857 if (hw
->mac
.type
== e1000_82575
) {
4858 dca_rxctrl
&= ~E1000_DCA_RXCTRL_CPUID_MASK
;
4859 dca_rxctrl
|= dca3_get_tag(&adapter
->pdev
->dev
, cpu
);
4861 dca_rxctrl
&= ~E1000_DCA_RXCTRL_CPUID_MASK_82576
;
4862 dca_rxctrl
|= dca3_get_tag(&adapter
->pdev
->dev
, cpu
) <<
4863 E1000_DCA_RXCTRL_CPUID_SHIFT
;
4865 dca_rxctrl
|= E1000_DCA_RXCTRL_DESC_DCA_EN
;
4866 dca_rxctrl
|= E1000_DCA_RXCTRL_HEAD_DCA_EN
;
4867 dca_rxctrl
|= E1000_DCA_RXCTRL_DATA_DCA_EN
;
4868 wr32(E1000_DCA_RXCTRL(q
), dca_rxctrl
);
4870 q_vector
->cpu
= cpu
;
4875 static void igb_setup_dca(struct igb_adapter
*adapter
)
4877 struct e1000_hw
*hw
= &adapter
->hw
;
4880 if (!(adapter
->flags
& IGB_FLAG_DCA_ENABLED
))
4883 /* Always use CB2 mode, difference is masked in the CB driver. */
4884 wr32(E1000_DCA_CTRL
, E1000_DCA_CTRL_DCA_MODE_CB2
);
4886 for (i
= 0; i
< adapter
->num_q_vectors
; i
++) {
4887 adapter
->q_vector
[i
]->cpu
= -1;
4888 igb_update_dca(adapter
->q_vector
[i
]);
4892 static int __igb_notify_dca(struct device
*dev
, void *data
)
4894 struct net_device
*netdev
= dev_get_drvdata(dev
);
4895 struct igb_adapter
*adapter
= netdev_priv(netdev
);
4896 struct pci_dev
*pdev
= adapter
->pdev
;
4897 struct e1000_hw
*hw
= &adapter
->hw
;
4898 unsigned long event
= *(unsigned long *)data
;
4901 case DCA_PROVIDER_ADD
:
4902 /* if already enabled, don't do it again */
4903 if (adapter
->flags
& IGB_FLAG_DCA_ENABLED
)
4905 if (dca_add_requester(dev
) == 0) {
4906 adapter
->flags
|= IGB_FLAG_DCA_ENABLED
;
4907 dev_info(&pdev
->dev
, "DCA enabled\n");
4908 igb_setup_dca(adapter
);
4911 /* Fall Through since DCA is disabled. */
4912 case DCA_PROVIDER_REMOVE
:
4913 if (adapter
->flags
& IGB_FLAG_DCA_ENABLED
) {
4914 /* without this a class_device is left
4915 * hanging around in the sysfs model */
4916 dca_remove_requester(dev
);
4917 dev_info(&pdev
->dev
, "DCA disabled\n");
4918 adapter
->flags
&= ~IGB_FLAG_DCA_ENABLED
;
4919 wr32(E1000_DCA_CTRL
, E1000_DCA_CTRL_DCA_MODE_DISABLE
);
4927 static int igb_notify_dca(struct notifier_block
*nb
, unsigned long event
,
4932 ret_val
= driver_for_each_device(&igb_driver
.driver
, NULL
, &event
,
4935 return ret_val
? NOTIFY_BAD
: NOTIFY_DONE
;
4937 #endif /* CONFIG_IGB_DCA */
4939 #ifdef CONFIG_PCI_IOV
4940 static int igb_vf_configure(struct igb_adapter
*adapter
, int vf
)
4942 unsigned char mac_addr
[ETH_ALEN
];
4943 struct pci_dev
*pdev
= adapter
->pdev
;
4944 struct e1000_hw
*hw
= &adapter
->hw
;
4945 struct pci_dev
*pvfdev
;
4946 unsigned int device_id
;
4949 random_ether_addr(mac_addr
);
4950 igb_set_vf_mac(adapter
, vf
, mac_addr
);
4952 switch (adapter
->hw
.mac
.type
) {
4954 device_id
= IGB_82576_VF_DEV_ID
;
4955 /* VF Stride for 82576 is 2 */
4956 thisvf_devfn
= (pdev
->devfn
+ 0x80 + (vf
<< 1)) |
4960 device_id
= IGB_I350_VF_DEV_ID
;
4961 /* VF Stride for I350 is 4 */
4962 thisvf_devfn
= (pdev
->devfn
+ 0x80 + (vf
<< 2)) |
4971 pvfdev
= pci_get_device(hw
->vendor_id
, device_id
, NULL
);
4973 if (pvfdev
->devfn
== thisvf_devfn
)
4975 pvfdev
= pci_get_device(hw
->vendor_id
,
4980 adapter
->vf_data
[vf
].vfdev
= pvfdev
;
4983 "Couldn't find pci dev ptr for VF %4.4x\n",
4985 return pvfdev
!= NULL
;
4988 static int igb_find_enabled_vfs(struct igb_adapter
*adapter
)
4990 struct e1000_hw
*hw
= &adapter
->hw
;
4991 struct pci_dev
*pdev
= adapter
->pdev
;
4992 struct pci_dev
*pvfdev
;
4995 unsigned int device_id
;
4998 switch (adapter
->hw
.mac
.type
) {
5000 device_id
= IGB_82576_VF_DEV_ID
;
5001 /* VF Stride for 82576 is 2 */
5005 device_id
= IGB_I350_VF_DEV_ID
;
5006 /* VF Stride for I350 is 4 */
5015 vf_devfn
= pdev
->devfn
+ 0x80;
5016 pvfdev
= pci_get_device(hw
->vendor_id
, device_id
, NULL
);
5018 if (pvfdev
->devfn
== vf_devfn
&&
5019 (pvfdev
->bus
->number
>= pdev
->bus
->number
))
5021 vf_devfn
+= vf_stride
;
5022 pvfdev
= pci_get_device(hw
->vendor_id
,
5029 static int igb_check_vf_assignment(struct igb_adapter
*adapter
)
5032 for (i
= 0; i
< adapter
->vfs_allocated_count
; i
++) {
5033 if (adapter
->vf_data
[i
].vfdev
) {
5034 if (adapter
->vf_data
[i
].vfdev
->dev_flags
&
5035 PCI_DEV_FLAGS_ASSIGNED
)
5043 static void igb_ping_all_vfs(struct igb_adapter
*adapter
)
5045 struct e1000_hw
*hw
= &adapter
->hw
;
5049 for (i
= 0 ; i
< adapter
->vfs_allocated_count
; i
++) {
5050 ping
= E1000_PF_CONTROL_MSG
;
5051 if (adapter
->vf_data
[i
].flags
& IGB_VF_FLAG_CTS
)
5052 ping
|= E1000_VT_MSGTYPE_CTS
;
5053 igb_write_mbx(hw
, &ping
, 1, i
);
5057 static int igb_set_vf_promisc(struct igb_adapter
*adapter
, u32
*msgbuf
, u32 vf
)
5059 struct e1000_hw
*hw
= &adapter
->hw
;
5060 u32 vmolr
= rd32(E1000_VMOLR(vf
));
5061 struct vf_data_storage
*vf_data
= &adapter
->vf_data
[vf
];
5063 vf_data
->flags
&= ~(IGB_VF_FLAG_UNI_PROMISC
|
5064 IGB_VF_FLAG_MULTI_PROMISC
);
5065 vmolr
&= ~(E1000_VMOLR_ROPE
| E1000_VMOLR_ROMPE
| E1000_VMOLR_MPME
);
5067 if (*msgbuf
& E1000_VF_SET_PROMISC_MULTICAST
) {
5068 vmolr
|= E1000_VMOLR_MPME
;
5069 vf_data
->flags
|= IGB_VF_FLAG_MULTI_PROMISC
;
5070 *msgbuf
&= ~E1000_VF_SET_PROMISC_MULTICAST
;
5073 * if we have hashes and we are clearing a multicast promisc
5074 * flag we need to write the hashes to the MTA as this step
5075 * was previously skipped
5077 if (vf_data
->num_vf_mc_hashes
> 30) {
5078 vmolr
|= E1000_VMOLR_MPME
;
5079 } else if (vf_data
->num_vf_mc_hashes
) {
5081 vmolr
|= E1000_VMOLR_ROMPE
;
5082 for (j
= 0; j
< vf_data
->num_vf_mc_hashes
; j
++)
5083 igb_mta_set(hw
, vf_data
->vf_mc_hashes
[j
]);
5087 wr32(E1000_VMOLR(vf
), vmolr
);
5089 /* there are flags left unprocessed, likely not supported */
5090 if (*msgbuf
& E1000_VT_MSGINFO_MASK
)
5097 static int igb_set_vf_multicasts(struct igb_adapter
*adapter
,
5098 u32
*msgbuf
, u32 vf
)
5100 int n
= (msgbuf
[0] & E1000_VT_MSGINFO_MASK
) >> E1000_VT_MSGINFO_SHIFT
;
5101 u16
*hash_list
= (u16
*)&msgbuf
[1];
5102 struct vf_data_storage
*vf_data
= &adapter
->vf_data
[vf
];
5105 /* salt away the number of multicast addresses assigned
5106 * to this VF for later use to restore when the PF multi cast
5109 vf_data
->num_vf_mc_hashes
= n
;
5111 /* only up to 30 hash values supported */
5115 /* store the hashes for later use */
5116 for (i
= 0; i
< n
; i
++)
5117 vf_data
->vf_mc_hashes
[i
] = hash_list
[i
];
5119 /* Flush and reset the mta with the new values */
5120 igb_set_rx_mode(adapter
->netdev
);
5125 static void igb_restore_vf_multicasts(struct igb_adapter
*adapter
)
5127 struct e1000_hw
*hw
= &adapter
->hw
;
5128 struct vf_data_storage
*vf_data
;
5131 for (i
= 0; i
< adapter
->vfs_allocated_count
; i
++) {
5132 u32 vmolr
= rd32(E1000_VMOLR(i
));
5133 vmolr
&= ~(E1000_VMOLR_ROMPE
| E1000_VMOLR_MPME
);
5135 vf_data
= &adapter
->vf_data
[i
];
5137 if ((vf_data
->num_vf_mc_hashes
> 30) ||
5138 (vf_data
->flags
& IGB_VF_FLAG_MULTI_PROMISC
)) {
5139 vmolr
|= E1000_VMOLR_MPME
;
5140 } else if (vf_data
->num_vf_mc_hashes
) {
5141 vmolr
|= E1000_VMOLR_ROMPE
;
5142 for (j
= 0; j
< vf_data
->num_vf_mc_hashes
; j
++)
5143 igb_mta_set(hw
, vf_data
->vf_mc_hashes
[j
]);
5145 wr32(E1000_VMOLR(i
), vmolr
);
5149 static void igb_clear_vf_vfta(struct igb_adapter
*adapter
, u32 vf
)
5151 struct e1000_hw
*hw
= &adapter
->hw
;
5152 u32 pool_mask
, reg
, vid
;
5155 pool_mask
= 1 << (E1000_VLVF_POOLSEL_SHIFT
+ vf
);
5157 /* Find the vlan filter for this id */
5158 for (i
= 0; i
< E1000_VLVF_ARRAY_SIZE
; i
++) {
5159 reg
= rd32(E1000_VLVF(i
));
5161 /* remove the vf from the pool */
5164 /* if pool is empty then remove entry from vfta */
5165 if (!(reg
& E1000_VLVF_POOLSEL_MASK
) &&
5166 (reg
& E1000_VLVF_VLANID_ENABLE
)) {
5168 vid
= reg
& E1000_VLVF_VLANID_MASK
;
5169 igb_vfta_set(hw
, vid
, false);
5172 wr32(E1000_VLVF(i
), reg
);
5175 adapter
->vf_data
[vf
].vlans_enabled
= 0;
5178 static s32
igb_vlvf_set(struct igb_adapter
*adapter
, u32 vid
, bool add
, u32 vf
)
5180 struct e1000_hw
*hw
= &adapter
->hw
;
5183 /* The vlvf table only exists on 82576 hardware and newer */
5184 if (hw
->mac
.type
< e1000_82576
)
5187 /* we only need to do this if VMDq is enabled */
5188 if (!adapter
->vfs_allocated_count
)
5191 /* Find the vlan filter for this id */
5192 for (i
= 0; i
< E1000_VLVF_ARRAY_SIZE
; i
++) {
5193 reg
= rd32(E1000_VLVF(i
));
5194 if ((reg
& E1000_VLVF_VLANID_ENABLE
) &&
5195 vid
== (reg
& E1000_VLVF_VLANID_MASK
))
5200 if (i
== E1000_VLVF_ARRAY_SIZE
) {
5201 /* Did not find a matching VLAN ID entry that was
5202 * enabled. Search for a free filter entry, i.e.
5203 * one without the enable bit set
5205 for (i
= 0; i
< E1000_VLVF_ARRAY_SIZE
; i
++) {
5206 reg
= rd32(E1000_VLVF(i
));
5207 if (!(reg
& E1000_VLVF_VLANID_ENABLE
))
5211 if (i
< E1000_VLVF_ARRAY_SIZE
) {
5212 /* Found an enabled/available entry */
5213 reg
|= 1 << (E1000_VLVF_POOLSEL_SHIFT
+ vf
);
5215 /* if !enabled we need to set this up in vfta */
5216 if (!(reg
& E1000_VLVF_VLANID_ENABLE
)) {
5217 /* add VID to filter table */
5218 igb_vfta_set(hw
, vid
, true);
5219 reg
|= E1000_VLVF_VLANID_ENABLE
;
5221 reg
&= ~E1000_VLVF_VLANID_MASK
;
5223 wr32(E1000_VLVF(i
), reg
);
5225 /* do not modify RLPML for PF devices */
5226 if (vf
>= adapter
->vfs_allocated_count
)
5229 if (!adapter
->vf_data
[vf
].vlans_enabled
) {
5231 reg
= rd32(E1000_VMOLR(vf
));
5232 size
= reg
& E1000_VMOLR_RLPML_MASK
;
5234 reg
&= ~E1000_VMOLR_RLPML_MASK
;
5236 wr32(E1000_VMOLR(vf
), reg
);
5239 adapter
->vf_data
[vf
].vlans_enabled
++;
5242 if (i
< E1000_VLVF_ARRAY_SIZE
) {
5243 /* remove vf from the pool */
5244 reg
&= ~(1 << (E1000_VLVF_POOLSEL_SHIFT
+ vf
));
5245 /* if pool is empty then remove entry from vfta */
5246 if (!(reg
& E1000_VLVF_POOLSEL_MASK
)) {
5248 igb_vfta_set(hw
, vid
, false);
5250 wr32(E1000_VLVF(i
), reg
);
5252 /* do not modify RLPML for PF devices */
5253 if (vf
>= adapter
->vfs_allocated_count
)
5256 adapter
->vf_data
[vf
].vlans_enabled
--;
5257 if (!adapter
->vf_data
[vf
].vlans_enabled
) {
5259 reg
= rd32(E1000_VMOLR(vf
));
5260 size
= reg
& E1000_VMOLR_RLPML_MASK
;
5262 reg
&= ~E1000_VMOLR_RLPML_MASK
;
5264 wr32(E1000_VMOLR(vf
), reg
);
5271 static void igb_set_vmvir(struct igb_adapter
*adapter
, u32 vid
, u32 vf
)
5273 struct e1000_hw
*hw
= &adapter
->hw
;
5276 wr32(E1000_VMVIR(vf
), (vid
| E1000_VMVIR_VLANA_DEFAULT
));
5278 wr32(E1000_VMVIR(vf
), 0);
5281 static int igb_ndo_set_vf_vlan(struct net_device
*netdev
,
5282 int vf
, u16 vlan
, u8 qos
)
5285 struct igb_adapter
*adapter
= netdev_priv(netdev
);
5287 if ((vf
>= adapter
->vfs_allocated_count
) || (vlan
> 4095) || (qos
> 7))
5290 err
= igb_vlvf_set(adapter
, vlan
, !!vlan
, vf
);
5293 igb_set_vmvir(adapter
, vlan
| (qos
<< VLAN_PRIO_SHIFT
), vf
);
5294 igb_set_vmolr(adapter
, vf
, !vlan
);
5295 adapter
->vf_data
[vf
].pf_vlan
= vlan
;
5296 adapter
->vf_data
[vf
].pf_qos
= qos
;
5297 dev_info(&adapter
->pdev
->dev
,
5298 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan
, qos
, vf
);
5299 if (test_bit(__IGB_DOWN
, &adapter
->state
)) {
5300 dev_warn(&adapter
->pdev
->dev
,
5301 "The VF VLAN has been set,"
5302 " but the PF device is not up.\n");
5303 dev_warn(&adapter
->pdev
->dev
,
5304 "Bring the PF device up before"
5305 " attempting to use the VF device.\n");
5308 igb_vlvf_set(adapter
, adapter
->vf_data
[vf
].pf_vlan
,
5310 igb_set_vmvir(adapter
, vlan
, vf
);
5311 igb_set_vmolr(adapter
, vf
, true);
5312 adapter
->vf_data
[vf
].pf_vlan
= 0;
5313 adapter
->vf_data
[vf
].pf_qos
= 0;
5319 static int igb_set_vf_vlan(struct igb_adapter
*adapter
, u32
*msgbuf
, u32 vf
)
5321 int add
= (msgbuf
[0] & E1000_VT_MSGINFO_MASK
) >> E1000_VT_MSGINFO_SHIFT
;
5322 int vid
= (msgbuf
[1] & E1000_VLVF_VLANID_MASK
);
5324 return igb_vlvf_set(adapter
, vid
, add
, vf
);
5327 static inline void igb_vf_reset(struct igb_adapter
*adapter
, u32 vf
)
5329 /* clear flags - except flag that indicates PF has set the MAC */
5330 adapter
->vf_data
[vf
].flags
&= IGB_VF_FLAG_PF_SET_MAC
;
5331 adapter
->vf_data
[vf
].last_nack
= jiffies
;
5333 /* reset offloads to defaults */
5334 igb_set_vmolr(adapter
, vf
, true);
5336 /* reset vlans for device */
5337 igb_clear_vf_vfta(adapter
, vf
);
5338 if (adapter
->vf_data
[vf
].pf_vlan
)
5339 igb_ndo_set_vf_vlan(adapter
->netdev
, vf
,
5340 adapter
->vf_data
[vf
].pf_vlan
,
5341 adapter
->vf_data
[vf
].pf_qos
);
5343 igb_clear_vf_vfta(adapter
, vf
);
5345 /* reset multicast table array for vf */
5346 adapter
->vf_data
[vf
].num_vf_mc_hashes
= 0;
5348 /* Flush and reset the mta with the new values */
5349 igb_set_rx_mode(adapter
->netdev
);
5352 static void igb_vf_reset_event(struct igb_adapter
*adapter
, u32 vf
)
5354 unsigned char *vf_mac
= adapter
->vf_data
[vf
].vf_mac_addresses
;
5356 /* generate a new mac address as we were hotplug removed/added */
5357 if (!(adapter
->vf_data
[vf
].flags
& IGB_VF_FLAG_PF_SET_MAC
))
5358 random_ether_addr(vf_mac
);
5360 /* process remaining reset events */
5361 igb_vf_reset(adapter
, vf
);
5364 static void igb_vf_reset_msg(struct igb_adapter
*adapter
, u32 vf
)
5366 struct e1000_hw
*hw
= &adapter
->hw
;
5367 unsigned char *vf_mac
= adapter
->vf_data
[vf
].vf_mac_addresses
;
5368 int rar_entry
= hw
->mac
.rar_entry_count
- (vf
+ 1);
5370 u8
*addr
= (u8
*)(&msgbuf
[1]);
5372 /* process all the same items cleared in a function level reset */
5373 igb_vf_reset(adapter
, vf
);
5375 /* set vf mac address */
5376 igb_rar_set_qsel(adapter
, vf_mac
, rar_entry
, vf
);
5378 /* enable transmit and receive for vf */
5379 reg
= rd32(E1000_VFTE
);
5380 wr32(E1000_VFTE
, reg
| (1 << vf
));
5381 reg
= rd32(E1000_VFRE
);
5382 wr32(E1000_VFRE
, reg
| (1 << vf
));
5384 adapter
->vf_data
[vf
].flags
|= IGB_VF_FLAG_CTS
;
5386 /* reply to reset with ack and vf mac address */
5387 msgbuf
[0] = E1000_VF_RESET
| E1000_VT_MSGTYPE_ACK
;
5388 memcpy(addr
, vf_mac
, 6);
5389 igb_write_mbx(hw
, msgbuf
, 3, vf
);
5392 static int igb_set_vf_mac_addr(struct igb_adapter
*adapter
, u32
*msg
, int vf
)
5395 * The VF MAC Address is stored in a packed array of bytes
5396 * starting at the second 32 bit word of the msg array
5398 unsigned char *addr
= (char *)&msg
[1];
5401 if (is_valid_ether_addr(addr
))
5402 err
= igb_set_vf_mac(adapter
, vf
, addr
);
5407 static void igb_rcv_ack_from_vf(struct igb_adapter
*adapter
, u32 vf
)
5409 struct e1000_hw
*hw
= &adapter
->hw
;
5410 struct vf_data_storage
*vf_data
= &adapter
->vf_data
[vf
];
5411 u32 msg
= E1000_VT_MSGTYPE_NACK
;
5413 /* if device isn't clear to send it shouldn't be reading either */
5414 if (!(vf_data
->flags
& IGB_VF_FLAG_CTS
) &&
5415 time_after(jiffies
, vf_data
->last_nack
+ (2 * HZ
))) {
5416 igb_write_mbx(hw
, &msg
, 1, vf
);
5417 vf_data
->last_nack
= jiffies
;
5421 static void igb_rcv_msg_from_vf(struct igb_adapter
*adapter
, u32 vf
)
5423 struct pci_dev
*pdev
= adapter
->pdev
;
5424 u32 msgbuf
[E1000_VFMAILBOX_SIZE
];
5425 struct e1000_hw
*hw
= &adapter
->hw
;
5426 struct vf_data_storage
*vf_data
= &adapter
->vf_data
[vf
];
5429 retval
= igb_read_mbx(hw
, msgbuf
, E1000_VFMAILBOX_SIZE
, vf
);
5432 /* if receive failed revoke VF CTS stats and restart init */
5433 dev_err(&pdev
->dev
, "Error receiving message from VF\n");
5434 vf_data
->flags
&= ~IGB_VF_FLAG_CTS
;
5435 if (!time_after(jiffies
, vf_data
->last_nack
+ (2 * HZ
)))
5440 /* this is a message we already processed, do nothing */
5441 if (msgbuf
[0] & (E1000_VT_MSGTYPE_ACK
| E1000_VT_MSGTYPE_NACK
))
5445 * until the vf completes a reset it should not be
5446 * allowed to start any configuration.
5449 if (msgbuf
[0] == E1000_VF_RESET
) {
5450 igb_vf_reset_msg(adapter
, vf
);
5454 if (!(vf_data
->flags
& IGB_VF_FLAG_CTS
)) {
5455 if (!time_after(jiffies
, vf_data
->last_nack
+ (2 * HZ
)))
5461 switch ((msgbuf
[0] & 0xFFFF)) {
5462 case E1000_VF_SET_MAC_ADDR
:
5464 if (!(vf_data
->flags
& IGB_VF_FLAG_PF_SET_MAC
))
5465 retval
= igb_set_vf_mac_addr(adapter
, msgbuf
, vf
);
5467 dev_warn(&pdev
->dev
,
5468 "VF %d attempted to override administratively "
5469 "set MAC address\nReload the VF driver to "
5470 "resume operations\n", vf
);
5472 case E1000_VF_SET_PROMISC
:
5473 retval
= igb_set_vf_promisc(adapter
, msgbuf
, vf
);
5475 case E1000_VF_SET_MULTICAST
:
5476 retval
= igb_set_vf_multicasts(adapter
, msgbuf
, vf
);
5478 case E1000_VF_SET_LPE
:
5479 retval
= igb_set_vf_rlpml(adapter
, msgbuf
[1], vf
);
5481 case E1000_VF_SET_VLAN
:
5483 if (vf_data
->pf_vlan
)
5484 dev_warn(&pdev
->dev
,
5485 "VF %d attempted to override administratively "
5486 "set VLAN tag\nReload the VF driver to "
5487 "resume operations\n", vf
);
5489 retval
= igb_set_vf_vlan(adapter
, msgbuf
, vf
);
5492 dev_err(&pdev
->dev
, "Unhandled Msg %08x\n", msgbuf
[0]);
5497 msgbuf
[0] |= E1000_VT_MSGTYPE_CTS
;
5499 /* notify the VF of the results of what it sent us */
5501 msgbuf
[0] |= E1000_VT_MSGTYPE_NACK
;
5503 msgbuf
[0] |= E1000_VT_MSGTYPE_ACK
;
5505 igb_write_mbx(hw
, msgbuf
, 1, vf
);
5508 static void igb_msg_task(struct igb_adapter
*adapter
)
5510 struct e1000_hw
*hw
= &adapter
->hw
;
5513 for (vf
= 0; vf
< adapter
->vfs_allocated_count
; vf
++) {
5514 /* process any reset requests */
5515 if (!igb_check_for_rst(hw
, vf
))
5516 igb_vf_reset_event(adapter
, vf
);
5518 /* process any messages pending */
5519 if (!igb_check_for_msg(hw
, vf
))
5520 igb_rcv_msg_from_vf(adapter
, vf
);
5522 /* process any acks */
5523 if (!igb_check_for_ack(hw
, vf
))
5524 igb_rcv_ack_from_vf(adapter
, vf
);
5529 * igb_set_uta - Set unicast filter table address
5530 * @adapter: board private structure
5532 * The unicast table address is a register array of 32-bit registers.
5533 * The table is meant to be used in a way similar to how the MTA is used
5534 * however due to certain limitations in the hardware it is necessary to
5535 * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5536 * enable bit to allow vlan tag stripping when promiscuous mode is enabled
5538 static void igb_set_uta(struct igb_adapter
*adapter
)
5540 struct e1000_hw
*hw
= &adapter
->hw
;
5543 /* The UTA table only exists on 82576 hardware and newer */
5544 if (hw
->mac
.type
< e1000_82576
)
5547 /* we only need to do this if VMDq is enabled */
5548 if (!adapter
->vfs_allocated_count
)
5551 for (i
= 0; i
< hw
->mac
.uta_reg_count
; i
++)
5552 array_wr32(E1000_UTA
, i
, ~0);
5556 * igb_intr_msi - Interrupt Handler
5557 * @irq: interrupt number
5558 * @data: pointer to a network interface device structure
5560 static irqreturn_t
igb_intr_msi(int irq
, void *data
)
5562 struct igb_adapter
*adapter
= data
;
5563 struct igb_q_vector
*q_vector
= adapter
->q_vector
[0];
5564 struct e1000_hw
*hw
= &adapter
->hw
;
5565 /* read ICR disables interrupts using IAM */
5566 u32 icr
= rd32(E1000_ICR
);
5568 igb_write_itr(q_vector
);
5570 if (icr
& E1000_ICR_DRSTA
)
5571 schedule_work(&adapter
->reset_task
);
5573 if (icr
& E1000_ICR_DOUTSYNC
) {
5574 /* HW is reporting DMA is out of sync */
5575 adapter
->stats
.doosync
++;
5578 if (icr
& (E1000_ICR_RXSEQ
| E1000_ICR_LSC
)) {
5579 hw
->mac
.get_link_status
= 1;
5580 if (!test_bit(__IGB_DOWN
, &adapter
->state
))
5581 mod_timer(&adapter
->watchdog_timer
, jiffies
+ 1);
5584 napi_schedule(&q_vector
->napi
);
5590 * igb_intr - Legacy Interrupt Handler
5591 * @irq: interrupt number
5592 * @data: pointer to a network interface device structure
5594 static irqreturn_t
igb_intr(int irq
, void *data
)
5596 struct igb_adapter
*adapter
= data
;
5597 struct igb_q_vector
*q_vector
= adapter
->q_vector
[0];
5598 struct e1000_hw
*hw
= &adapter
->hw
;
5599 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No
5600 * need for the IMC write */
5601 u32 icr
= rd32(E1000_ICR
);
5603 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5604 * not set, then the adapter didn't send an interrupt */
5605 if (!(icr
& E1000_ICR_INT_ASSERTED
))
5608 igb_write_itr(q_vector
);
5610 if (icr
& E1000_ICR_DRSTA
)
5611 schedule_work(&adapter
->reset_task
);
5613 if (icr
& E1000_ICR_DOUTSYNC
) {
5614 /* HW is reporting DMA is out of sync */
5615 adapter
->stats
.doosync
++;
5618 if (icr
& (E1000_ICR_RXSEQ
| E1000_ICR_LSC
)) {
5619 hw
->mac
.get_link_status
= 1;
5620 /* guard against interrupt when we're going down */
5621 if (!test_bit(__IGB_DOWN
, &adapter
->state
))
5622 mod_timer(&adapter
->watchdog_timer
, jiffies
+ 1);
5625 napi_schedule(&q_vector
->napi
);
5630 static void igb_ring_irq_enable(struct igb_q_vector
*q_vector
)
5632 struct igb_adapter
*adapter
= q_vector
->adapter
;
5633 struct e1000_hw
*hw
= &adapter
->hw
;
5635 if ((q_vector
->rx
.ring
&& (adapter
->rx_itr_setting
& 3)) ||
5636 (!q_vector
->rx
.ring
&& (adapter
->tx_itr_setting
& 3))) {
5637 if ((adapter
->num_q_vectors
== 1) && !adapter
->vf_data
)
5638 igb_set_itr(q_vector
);
5640 igb_update_ring_itr(q_vector
);
5643 if (!test_bit(__IGB_DOWN
, &adapter
->state
)) {
5644 if (adapter
->msix_entries
)
5645 wr32(E1000_EIMS
, q_vector
->eims_value
);
5647 igb_irq_enable(adapter
);
5652 * igb_poll - NAPI Rx polling callback
5653 * @napi: napi polling structure
5654 * @budget: count of how many packets we should handle
5656 static int igb_poll(struct napi_struct
*napi
, int budget
)
5658 struct igb_q_vector
*q_vector
= container_of(napi
,
5659 struct igb_q_vector
,
5661 bool clean_complete
= true;
5663 #ifdef CONFIG_IGB_DCA
5664 if (q_vector
->adapter
->flags
& IGB_FLAG_DCA_ENABLED
)
5665 igb_update_dca(q_vector
);
5667 if (q_vector
->tx
.ring
)
5668 clean_complete
= igb_clean_tx_irq(q_vector
);
5670 if (q_vector
->rx
.ring
)
5671 clean_complete
&= igb_clean_rx_irq(q_vector
, budget
);
5673 /* If all work not completed, return budget and keep polling */
5674 if (!clean_complete
)
5677 /* If not enough Rx work done, exit the polling mode */
5678 napi_complete(napi
);
5679 igb_ring_irq_enable(q_vector
);
5685 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5686 * @adapter: board private structure
5687 * @shhwtstamps: timestamp structure to update
5688 * @regval: unsigned 64bit system time value.
5690 * We need to convert the system time value stored in the RX/TXSTMP registers
5691 * into a hwtstamp which can be used by the upper level timestamping functions
5693 static void igb_systim_to_hwtstamp(struct igb_adapter
*adapter
,
5694 struct skb_shared_hwtstamps
*shhwtstamps
,
5700 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5701 * 24 to match clock shift we setup earlier.
5703 if (adapter
->hw
.mac
.type
>= e1000_82580
)
5704 regval
<<= IGB_82580_TSYNC_SHIFT
;
5706 ns
= timecounter_cyc2time(&adapter
->clock
, regval
);
5707 timecompare_update(&adapter
->compare
, ns
);
5708 memset(shhwtstamps
, 0, sizeof(struct skb_shared_hwtstamps
));
5709 shhwtstamps
->hwtstamp
= ns_to_ktime(ns
);
5710 shhwtstamps
->syststamp
= timecompare_transform(&adapter
->compare
, ns
);
5714 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5715 * @q_vector: pointer to q_vector containing needed info
5716 * @buffer: pointer to igb_tx_buffer structure
5718 * If we were asked to do hardware stamping and such a time stamp is
5719 * available, then it must have been for this skb here because we only
5720 * allow only one such packet into the queue.
5722 static void igb_tx_hwtstamp(struct igb_q_vector
*q_vector
,
5723 struct igb_tx_buffer
*buffer_info
)
5725 struct igb_adapter
*adapter
= q_vector
->adapter
;
5726 struct e1000_hw
*hw
= &adapter
->hw
;
5727 struct skb_shared_hwtstamps shhwtstamps
;
5730 /* if skb does not support hw timestamp or TX stamp not valid exit */
5731 if (likely(!(buffer_info
->tx_flags
& IGB_TX_FLAGS_TSTAMP
)) ||
5732 !(rd32(E1000_TSYNCTXCTL
) & E1000_TSYNCTXCTL_VALID
))
5735 regval
= rd32(E1000_TXSTMPL
);
5736 regval
|= (u64
)rd32(E1000_TXSTMPH
) << 32;
5738 igb_systim_to_hwtstamp(adapter
, &shhwtstamps
, regval
);
5739 skb_tstamp_tx(buffer_info
->skb
, &shhwtstamps
);
5743 * igb_clean_tx_irq - Reclaim resources after transmit completes
5744 * @q_vector: pointer to q_vector containing needed info
5745 * returns true if ring is completely cleaned
5747 static bool igb_clean_tx_irq(struct igb_q_vector
*q_vector
)
5749 struct igb_adapter
*adapter
= q_vector
->adapter
;
5750 struct igb_ring
*tx_ring
= q_vector
->tx
.ring
;
5751 struct igb_tx_buffer
*tx_buffer
;
5752 union e1000_adv_tx_desc
*tx_desc
, *eop_desc
;
5753 unsigned int total_bytes
= 0, total_packets
= 0;
5754 unsigned int budget
= q_vector
->tx
.work_limit
;
5755 unsigned int i
= tx_ring
->next_to_clean
;
5757 if (test_bit(__IGB_DOWN
, &adapter
->state
))
5760 tx_buffer
= &tx_ring
->tx_buffer_info
[i
];
5761 tx_desc
= IGB_TX_DESC(tx_ring
, i
);
5762 i
-= tx_ring
->count
;
5764 for (; budget
; budget
--) {
5765 eop_desc
= tx_buffer
->next_to_watch
;
5767 /* prevent any other reads prior to eop_desc */
5770 /* if next_to_watch is not set then there is no work pending */
5774 /* if DD is not set pending work has not been completed */
5775 if (!(eop_desc
->wb
.status
& cpu_to_le32(E1000_TXD_STAT_DD
)))
5778 /* clear next_to_watch to prevent false hangs */
5779 tx_buffer
->next_to_watch
= NULL
;
5781 /* update the statistics for this packet */
5782 total_bytes
+= tx_buffer
->bytecount
;
5783 total_packets
+= tx_buffer
->gso_segs
;
5785 /* retrieve hardware timestamp */
5786 igb_tx_hwtstamp(q_vector
, tx_buffer
);
5789 dev_kfree_skb_any(tx_buffer
->skb
);
5790 tx_buffer
->skb
= NULL
;
5792 /* unmap skb header data */
5793 dma_unmap_single(tx_ring
->dev
,
5798 /* clear last DMA location and unmap remaining buffers */
5799 while (tx_desc
!= eop_desc
) {
5806 i
-= tx_ring
->count
;
5807 tx_buffer
= tx_ring
->tx_buffer_info
;
5808 tx_desc
= IGB_TX_DESC(tx_ring
, 0);
5811 /* unmap any remaining paged data */
5812 if (tx_buffer
->dma
) {
5813 dma_unmap_page(tx_ring
->dev
,
5820 /* clear last DMA location */
5823 /* move us one more past the eop_desc for start of next pkt */
5828 i
-= tx_ring
->count
;
5829 tx_buffer
= tx_ring
->tx_buffer_info
;
5830 tx_desc
= IGB_TX_DESC(tx_ring
, 0);
5834 netdev_tx_completed_queue(txring_txq(tx_ring
),
5835 total_packets
, total_bytes
);
5836 i
+= tx_ring
->count
;
5837 tx_ring
->next_to_clean
= i
;
5838 u64_stats_update_begin(&tx_ring
->tx_syncp
);
5839 tx_ring
->tx_stats
.bytes
+= total_bytes
;
5840 tx_ring
->tx_stats
.packets
+= total_packets
;
5841 u64_stats_update_end(&tx_ring
->tx_syncp
);
5842 q_vector
->tx
.total_bytes
+= total_bytes
;
5843 q_vector
->tx
.total_packets
+= total_packets
;
5845 if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG
, &tx_ring
->flags
)) {
5846 struct e1000_hw
*hw
= &adapter
->hw
;
5848 eop_desc
= tx_buffer
->next_to_watch
;
5850 /* Detect a transmit hang in hardware, this serializes the
5851 * check with the clearing of time_stamp and movement of i */
5852 clear_bit(IGB_RING_FLAG_TX_DETECT_HANG
, &tx_ring
->flags
);
5854 time_after(jiffies
, tx_buffer
->time_stamp
+
5855 (adapter
->tx_timeout_factor
* HZ
)) &&
5856 !(rd32(E1000_STATUS
) & E1000_STATUS_TXOFF
)) {
5858 /* detected Tx unit hang */
5859 dev_err(tx_ring
->dev
,
5860 "Detected Tx Unit Hang\n"
5864 " next_to_use <%x>\n"
5865 " next_to_clean <%x>\n"
5866 "buffer_info[next_to_clean]\n"
5867 " time_stamp <%lx>\n"
5868 " next_to_watch <%p>\n"
5870 " desc.status <%x>\n",
5871 tx_ring
->queue_index
,
5872 rd32(E1000_TDH(tx_ring
->reg_idx
)),
5873 readl(tx_ring
->tail
),
5874 tx_ring
->next_to_use
,
5875 tx_ring
->next_to_clean
,
5876 tx_buffer
->time_stamp
,
5879 eop_desc
->wb
.status
);
5880 netif_stop_subqueue(tx_ring
->netdev
,
5881 tx_ring
->queue_index
);
5883 /* we are about to reset, no point in enabling stuff */
5888 if (unlikely(total_packets
&&
5889 netif_carrier_ok(tx_ring
->netdev
) &&
5890 igb_desc_unused(tx_ring
) >= IGB_TX_QUEUE_WAKE
)) {
5891 /* Make sure that anybody stopping the queue after this
5892 * sees the new next_to_clean.
5895 if (__netif_subqueue_stopped(tx_ring
->netdev
,
5896 tx_ring
->queue_index
) &&
5897 !(test_bit(__IGB_DOWN
, &adapter
->state
))) {
5898 netif_wake_subqueue(tx_ring
->netdev
,
5899 tx_ring
->queue_index
);
5901 u64_stats_update_begin(&tx_ring
->tx_syncp
);
5902 tx_ring
->tx_stats
.restart_queue
++;
5903 u64_stats_update_end(&tx_ring
->tx_syncp
);
5910 static inline void igb_rx_checksum(struct igb_ring
*ring
,
5911 union e1000_adv_rx_desc
*rx_desc
,
5912 struct sk_buff
*skb
)
5914 skb_checksum_none_assert(skb
);
5916 /* Ignore Checksum bit is set */
5917 if (igb_test_staterr(rx_desc
, E1000_RXD_STAT_IXSM
))
5920 /* Rx checksum disabled via ethtool */
5921 if (!(ring
->netdev
->features
& NETIF_F_RXCSUM
))
5924 /* TCP/UDP checksum error bit is set */
5925 if (igb_test_staterr(rx_desc
,
5926 E1000_RXDEXT_STATERR_TCPE
|
5927 E1000_RXDEXT_STATERR_IPE
)) {
5929 * work around errata with sctp packets where the TCPE aka
5930 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5931 * packets, (aka let the stack check the crc32c)
5933 if (!((skb
->len
== 60) &&
5934 test_bit(IGB_RING_FLAG_RX_SCTP_CSUM
, &ring
->flags
))) {
5935 u64_stats_update_begin(&ring
->rx_syncp
);
5936 ring
->rx_stats
.csum_err
++;
5937 u64_stats_update_end(&ring
->rx_syncp
);
5939 /* let the stack verify checksum errors */
5942 /* It must be a TCP or UDP packet with a valid checksum */
5943 if (igb_test_staterr(rx_desc
, E1000_RXD_STAT_TCPCS
|
5944 E1000_RXD_STAT_UDPCS
))
5945 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
5947 dev_dbg(ring
->dev
, "cksum success: bits %08X\n",
5948 le32_to_cpu(rx_desc
->wb
.upper
.status_error
));
5951 static inline void igb_rx_hash(struct igb_ring
*ring
,
5952 union e1000_adv_rx_desc
*rx_desc
,
5953 struct sk_buff
*skb
)
5955 if (ring
->netdev
->features
& NETIF_F_RXHASH
)
5956 skb
->rxhash
= le32_to_cpu(rx_desc
->wb
.lower
.hi_dword
.rss
);
5959 static void igb_rx_hwtstamp(struct igb_q_vector
*q_vector
,
5960 union e1000_adv_rx_desc
*rx_desc
,
5961 struct sk_buff
*skb
)
5963 struct igb_adapter
*adapter
= q_vector
->adapter
;
5964 struct e1000_hw
*hw
= &adapter
->hw
;
5967 if (!igb_test_staterr(rx_desc
, E1000_RXDADV_STAT_TSIP
|
5968 E1000_RXDADV_STAT_TS
))
5972 * If this bit is set, then the RX registers contain the time stamp. No
5973 * other packet will be time stamped until we read these registers, so
5974 * read the registers to make them available again. Because only one
5975 * packet can be time stamped at a time, we know that the register
5976 * values must belong to this one here and therefore we don't need to
5977 * compare any of the additional attributes stored for it.
5979 * If nothing went wrong, then it should have a shared tx_flags that we
5980 * can turn into a skb_shared_hwtstamps.
5982 if (igb_test_staterr(rx_desc
, E1000_RXDADV_STAT_TSIP
)) {
5983 u32
*stamp
= (u32
*)skb
->data
;
5984 regval
= le32_to_cpu(*(stamp
+ 2));
5985 regval
|= (u64
)le32_to_cpu(*(stamp
+ 3)) << 32;
5986 skb_pull(skb
, IGB_TS_HDR_LEN
);
5988 if(!(rd32(E1000_TSYNCRXCTL
) & E1000_TSYNCRXCTL_VALID
))
5991 regval
= rd32(E1000_RXSTMPL
);
5992 regval
|= (u64
)rd32(E1000_RXSTMPH
) << 32;
5995 igb_systim_to_hwtstamp(adapter
, skb_hwtstamps(skb
), regval
);
5998 static void igb_rx_vlan(struct igb_ring
*ring
,
5999 union e1000_adv_rx_desc
*rx_desc
,
6000 struct sk_buff
*skb
)
6002 if (igb_test_staterr(rx_desc
, E1000_RXD_STAT_VP
)) {
6004 if (igb_test_staterr(rx_desc
, E1000_RXDEXT_STATERR_LB
) &&
6005 test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP
, &ring
->flags
))
6006 vid
= be16_to_cpu(rx_desc
->wb
.upper
.vlan
);
6008 vid
= le16_to_cpu(rx_desc
->wb
.upper
.vlan
);
6010 __vlan_hwaccel_put_tag(skb
, vid
);
6014 static inline u16
igb_get_hlen(union e1000_adv_rx_desc
*rx_desc
)
6016 /* HW will not DMA in data larger than the given buffer, even if it
6017 * parses the (NFS, of course) header to be larger. In that case, it
6018 * fills the header buffer and spills the rest into the page.
6020 u16 hlen
= (le16_to_cpu(rx_desc
->wb
.lower
.lo_dword
.hdr_info
) &
6021 E1000_RXDADV_HDRBUFLEN_MASK
) >> E1000_RXDADV_HDRBUFLEN_SHIFT
;
6022 if (hlen
> IGB_RX_HDR_LEN
)
6023 hlen
= IGB_RX_HDR_LEN
;
6027 static bool igb_clean_rx_irq(struct igb_q_vector
*q_vector
, int budget
)
6029 struct igb_ring
*rx_ring
= q_vector
->rx
.ring
;
6030 union e1000_adv_rx_desc
*rx_desc
;
6031 const int current_node
= numa_node_id();
6032 unsigned int total_bytes
= 0, total_packets
= 0;
6033 u16 cleaned_count
= igb_desc_unused(rx_ring
);
6034 u16 i
= rx_ring
->next_to_clean
;
6036 rx_desc
= IGB_RX_DESC(rx_ring
, i
);
6038 while (igb_test_staterr(rx_desc
, E1000_RXD_STAT_DD
)) {
6039 struct igb_rx_buffer
*buffer_info
= &rx_ring
->rx_buffer_info
[i
];
6040 struct sk_buff
*skb
= buffer_info
->skb
;
6041 union e1000_adv_rx_desc
*next_rxd
;
6043 buffer_info
->skb
= NULL
;
6044 prefetch(skb
->data
);
6047 if (i
== rx_ring
->count
)
6050 next_rxd
= IGB_RX_DESC(rx_ring
, i
);
6054 * This memory barrier is needed to keep us from reading
6055 * any other fields out of the rx_desc until we know the
6056 * RXD_STAT_DD bit is set
6060 if (!skb_is_nonlinear(skb
)) {
6061 __skb_put(skb
, igb_get_hlen(rx_desc
));
6062 dma_unmap_single(rx_ring
->dev
, buffer_info
->dma
,
6065 buffer_info
->dma
= 0;
6068 if (rx_desc
->wb
.upper
.length
) {
6069 u16 length
= le16_to_cpu(rx_desc
->wb
.upper
.length
);
6071 skb_fill_page_desc(skb
, skb_shinfo(skb
)->nr_frags
,
6073 buffer_info
->page_offset
,
6077 skb
->data_len
+= length
;
6078 skb
->truesize
+= PAGE_SIZE
/ 2;
6080 if ((page_count(buffer_info
->page
) != 1) ||
6081 (page_to_nid(buffer_info
->page
) != current_node
))
6082 buffer_info
->page
= NULL
;
6084 get_page(buffer_info
->page
);
6086 dma_unmap_page(rx_ring
->dev
, buffer_info
->page_dma
,
6087 PAGE_SIZE
/ 2, DMA_FROM_DEVICE
);
6088 buffer_info
->page_dma
= 0;
6091 if (!igb_test_staterr(rx_desc
, E1000_RXD_STAT_EOP
)) {
6092 struct igb_rx_buffer
*next_buffer
;
6093 next_buffer
= &rx_ring
->rx_buffer_info
[i
];
6094 buffer_info
->skb
= next_buffer
->skb
;
6095 buffer_info
->dma
= next_buffer
->dma
;
6096 next_buffer
->skb
= skb
;
6097 next_buffer
->dma
= 0;
6101 if (igb_test_staterr(rx_desc
,
6102 E1000_RXDEXT_ERR_FRAME_ERR_MASK
)) {
6103 dev_kfree_skb_any(skb
);
6107 igb_rx_hwtstamp(q_vector
, rx_desc
, skb
);
6108 igb_rx_hash(rx_ring
, rx_desc
, skb
);
6109 igb_rx_checksum(rx_ring
, rx_desc
, skb
);
6110 igb_rx_vlan(rx_ring
, rx_desc
, skb
);
6112 total_bytes
+= skb
->len
;
6115 skb
->protocol
= eth_type_trans(skb
, rx_ring
->netdev
);
6117 napi_gro_receive(&q_vector
->napi
, skb
);
6125 /* return some buffers to hardware, one at a time is too slow */
6126 if (cleaned_count
>= IGB_RX_BUFFER_WRITE
) {
6127 igb_alloc_rx_buffers(rx_ring
, cleaned_count
);
6131 /* use prefetched values */
6135 rx_ring
->next_to_clean
= i
;
6136 u64_stats_update_begin(&rx_ring
->rx_syncp
);
6137 rx_ring
->rx_stats
.packets
+= total_packets
;
6138 rx_ring
->rx_stats
.bytes
+= total_bytes
;
6139 u64_stats_update_end(&rx_ring
->rx_syncp
);
6140 q_vector
->rx
.total_packets
+= total_packets
;
6141 q_vector
->rx
.total_bytes
+= total_bytes
;
6144 igb_alloc_rx_buffers(rx_ring
, cleaned_count
);
6149 static bool igb_alloc_mapped_skb(struct igb_ring
*rx_ring
,
6150 struct igb_rx_buffer
*bi
)
6152 struct sk_buff
*skb
= bi
->skb
;
6153 dma_addr_t dma
= bi
->dma
;
6159 skb
= netdev_alloc_skb_ip_align(rx_ring
->netdev
,
6163 rx_ring
->rx_stats
.alloc_failed
++;
6167 /* initialize skb for ring */
6168 skb_record_rx_queue(skb
, rx_ring
->queue_index
);
6171 dma
= dma_map_single(rx_ring
->dev
, skb
->data
,
6172 IGB_RX_HDR_LEN
, DMA_FROM_DEVICE
);
6174 if (dma_mapping_error(rx_ring
->dev
, dma
)) {
6175 rx_ring
->rx_stats
.alloc_failed
++;
6183 static bool igb_alloc_mapped_page(struct igb_ring
*rx_ring
,
6184 struct igb_rx_buffer
*bi
)
6186 struct page
*page
= bi
->page
;
6187 dma_addr_t page_dma
= bi
->page_dma
;
6188 unsigned int page_offset
= bi
->page_offset
^ (PAGE_SIZE
/ 2);
6194 page
= alloc_page(GFP_ATOMIC
| __GFP_COLD
);
6196 if (unlikely(!page
)) {
6197 rx_ring
->rx_stats
.alloc_failed
++;
6202 page_dma
= dma_map_page(rx_ring
->dev
, page
,
6203 page_offset
, PAGE_SIZE
/ 2,
6206 if (dma_mapping_error(rx_ring
->dev
, page_dma
)) {
6207 rx_ring
->rx_stats
.alloc_failed
++;
6211 bi
->page_dma
= page_dma
;
6212 bi
->page_offset
= page_offset
;
6217 * igb_alloc_rx_buffers - Replace used receive buffers; packet split
6218 * @adapter: address of board private structure
6220 void igb_alloc_rx_buffers(struct igb_ring
*rx_ring
, u16 cleaned_count
)
6222 union e1000_adv_rx_desc
*rx_desc
;
6223 struct igb_rx_buffer
*bi
;
6224 u16 i
= rx_ring
->next_to_use
;
6226 rx_desc
= IGB_RX_DESC(rx_ring
, i
);
6227 bi
= &rx_ring
->rx_buffer_info
[i
];
6228 i
-= rx_ring
->count
;
6230 while (cleaned_count
--) {
6231 if (!igb_alloc_mapped_skb(rx_ring
, bi
))
6234 /* Refresh the desc even if buffer_addrs didn't change
6235 * because each write-back erases this info. */
6236 rx_desc
->read
.hdr_addr
= cpu_to_le64(bi
->dma
);
6238 if (!igb_alloc_mapped_page(rx_ring
, bi
))
6241 rx_desc
->read
.pkt_addr
= cpu_to_le64(bi
->page_dma
);
6247 rx_desc
= IGB_RX_DESC(rx_ring
, 0);
6248 bi
= rx_ring
->rx_buffer_info
;
6249 i
-= rx_ring
->count
;
6252 /* clear the hdr_addr for the next_to_use descriptor */
6253 rx_desc
->read
.hdr_addr
= 0;
6256 i
+= rx_ring
->count
;
6258 if (rx_ring
->next_to_use
!= i
) {
6259 rx_ring
->next_to_use
= i
;
6261 /* Force memory writes to complete before letting h/w
6262 * know there are new descriptors to fetch. (Only
6263 * applicable for weak-ordered memory model archs,
6264 * such as IA-64). */
6266 writel(i
, rx_ring
->tail
);
6276 static int igb_mii_ioctl(struct net_device
*netdev
, struct ifreq
*ifr
, int cmd
)
6278 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6279 struct mii_ioctl_data
*data
= if_mii(ifr
);
6281 if (adapter
->hw
.phy
.media_type
!= e1000_media_type_copper
)
6286 data
->phy_id
= adapter
->hw
.phy
.addr
;
6289 if (igb_read_phy_reg(&adapter
->hw
, data
->reg_num
& 0x1F,
6301 * igb_hwtstamp_ioctl - control hardware time stamping
6306 * Outgoing time stamping can be enabled and disabled. Play nice and
6307 * disable it when requested, although it shouldn't case any overhead
6308 * when no packet needs it. At most one packet in the queue may be
6309 * marked for time stamping, otherwise it would be impossible to tell
6310 * for sure to which packet the hardware time stamp belongs.
6312 * Incoming time stamping has to be configured via the hardware
6313 * filters. Not all combinations are supported, in particular event
6314 * type has to be specified. Matching the kind of event packet is
6315 * not supported, with the exception of "all V2 events regardless of
6319 static int igb_hwtstamp_ioctl(struct net_device
*netdev
,
6320 struct ifreq
*ifr
, int cmd
)
6322 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6323 struct e1000_hw
*hw
= &adapter
->hw
;
6324 struct hwtstamp_config config
;
6325 u32 tsync_tx_ctl
= E1000_TSYNCTXCTL_ENABLED
;
6326 u32 tsync_rx_ctl
= E1000_TSYNCRXCTL_ENABLED
;
6327 u32 tsync_rx_cfg
= 0;
6332 if (copy_from_user(&config
, ifr
->ifr_data
, sizeof(config
)))
6335 /* reserved for future extensions */
6339 switch (config
.tx_type
) {
6340 case HWTSTAMP_TX_OFF
:
6342 case HWTSTAMP_TX_ON
:
6348 switch (config
.rx_filter
) {
6349 case HWTSTAMP_FILTER_NONE
:
6352 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT
:
6353 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT
:
6354 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT
:
6355 case HWTSTAMP_FILTER_ALL
:
6357 * register TSYNCRXCFG must be set, therefore it is not
6358 * possible to time stamp both Sync and Delay_Req messages
6359 * => fall back to time stamping all packets
6361 tsync_rx_ctl
|= E1000_TSYNCRXCTL_TYPE_ALL
;
6362 config
.rx_filter
= HWTSTAMP_FILTER_ALL
;
6364 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC
:
6365 tsync_rx_ctl
|= E1000_TSYNCRXCTL_TYPE_L4_V1
;
6366 tsync_rx_cfg
= E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE
;
6369 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ
:
6370 tsync_rx_ctl
|= E1000_TSYNCRXCTL_TYPE_L4_V1
;
6371 tsync_rx_cfg
= E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE
;
6374 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC
:
6375 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC
:
6376 tsync_rx_ctl
|= E1000_TSYNCRXCTL_TYPE_L2_L4_V2
;
6377 tsync_rx_cfg
= E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE
;
6380 config
.rx_filter
= HWTSTAMP_FILTER_SOME
;
6382 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ
:
6383 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ
:
6384 tsync_rx_ctl
|= E1000_TSYNCRXCTL_TYPE_L2_L4_V2
;
6385 tsync_rx_cfg
= E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE
;
6388 config
.rx_filter
= HWTSTAMP_FILTER_SOME
;
6390 case HWTSTAMP_FILTER_PTP_V2_EVENT
:
6391 case HWTSTAMP_FILTER_PTP_V2_SYNC
:
6392 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ
:
6393 tsync_rx_ctl
|= E1000_TSYNCRXCTL_TYPE_EVENT_V2
;
6394 config
.rx_filter
= HWTSTAMP_FILTER_PTP_V2_EVENT
;
6402 if (hw
->mac
.type
== e1000_82575
) {
6403 if (tsync_rx_ctl
| tsync_tx_ctl
)
6409 * Per-packet timestamping only works if all packets are
6410 * timestamped, so enable timestamping in all packets as
6411 * long as one rx filter was configured.
6413 if ((hw
->mac
.type
>= e1000_82580
) && tsync_rx_ctl
) {
6414 tsync_rx_ctl
= E1000_TSYNCRXCTL_ENABLED
;
6415 tsync_rx_ctl
|= E1000_TSYNCRXCTL_TYPE_ALL
;
6418 /* enable/disable TX */
6419 regval
= rd32(E1000_TSYNCTXCTL
);
6420 regval
&= ~E1000_TSYNCTXCTL_ENABLED
;
6421 regval
|= tsync_tx_ctl
;
6422 wr32(E1000_TSYNCTXCTL
, regval
);
6424 /* enable/disable RX */
6425 regval
= rd32(E1000_TSYNCRXCTL
);
6426 regval
&= ~(E1000_TSYNCRXCTL_ENABLED
| E1000_TSYNCRXCTL_TYPE_MASK
);
6427 regval
|= tsync_rx_ctl
;
6428 wr32(E1000_TSYNCRXCTL
, regval
);
6430 /* define which PTP packets are time stamped */
6431 wr32(E1000_TSYNCRXCFG
, tsync_rx_cfg
);
6433 /* define ethertype filter for timestamped packets */
6436 (E1000_ETQF_FILTER_ENABLE
| /* enable filter */
6437 E1000_ETQF_1588
| /* enable timestamping */
6438 ETH_P_1588
)); /* 1588 eth protocol type */
6440 wr32(E1000_ETQF(3), 0);
6442 #define PTP_PORT 319
6443 /* L4 Queue Filter[3]: filter by destination port and protocol */
6445 u32 ftqf
= (IPPROTO_UDP
/* UDP */
6446 | E1000_FTQF_VF_BP
/* VF not compared */
6447 | E1000_FTQF_1588_TIME_STAMP
/* Enable Timestamping */
6448 | E1000_FTQF_MASK
); /* mask all inputs */
6449 ftqf
&= ~E1000_FTQF_MASK_PROTO_BP
; /* enable protocol check */
6451 wr32(E1000_IMIR(3), htons(PTP_PORT
));
6452 wr32(E1000_IMIREXT(3),
6453 (E1000_IMIREXT_SIZE_BP
| E1000_IMIREXT_CTRL_BP
));
6454 if (hw
->mac
.type
== e1000_82576
) {
6455 /* enable source port check */
6456 wr32(E1000_SPQF(3), htons(PTP_PORT
));
6457 ftqf
&= ~E1000_FTQF_MASK_SOURCE_PORT_BP
;
6459 wr32(E1000_FTQF(3), ftqf
);
6461 wr32(E1000_FTQF(3), E1000_FTQF_MASK
);
6465 adapter
->hwtstamp_config
= config
;
6467 /* clear TX/RX time stamp registers, just to be sure */
6468 regval
= rd32(E1000_TXSTMPH
);
6469 regval
= rd32(E1000_RXSTMPH
);
6471 return copy_to_user(ifr
->ifr_data
, &config
, sizeof(config
)) ?
6481 static int igb_ioctl(struct net_device
*netdev
, struct ifreq
*ifr
, int cmd
)
6487 return igb_mii_ioctl(netdev
, ifr
, cmd
);
6489 return igb_hwtstamp_ioctl(netdev
, ifr
, cmd
);
6495 s32
igb_read_pcie_cap_reg(struct e1000_hw
*hw
, u32 reg
, u16
*value
)
6497 struct igb_adapter
*adapter
= hw
->back
;
6500 cap_offset
= adapter
->pdev
->pcie_cap
;
6502 return -E1000_ERR_CONFIG
;
6504 pci_read_config_word(adapter
->pdev
, cap_offset
+ reg
, value
);
6509 s32
igb_write_pcie_cap_reg(struct e1000_hw
*hw
, u32 reg
, u16
*value
)
6511 struct igb_adapter
*adapter
= hw
->back
;
6514 cap_offset
= adapter
->pdev
->pcie_cap
;
6516 return -E1000_ERR_CONFIG
;
6518 pci_write_config_word(adapter
->pdev
, cap_offset
+ reg
, *value
);
6523 static void igb_vlan_mode(struct net_device
*netdev
, netdev_features_t features
)
6525 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6526 struct e1000_hw
*hw
= &adapter
->hw
;
6528 bool enable
= !!(features
& NETIF_F_HW_VLAN_RX
);
6531 /* enable VLAN tag insert/strip */
6532 ctrl
= rd32(E1000_CTRL
);
6533 ctrl
|= E1000_CTRL_VME
;
6534 wr32(E1000_CTRL
, ctrl
);
6536 /* Disable CFI check */
6537 rctl
= rd32(E1000_RCTL
);
6538 rctl
&= ~E1000_RCTL_CFIEN
;
6539 wr32(E1000_RCTL
, rctl
);
6541 /* disable VLAN tag insert/strip */
6542 ctrl
= rd32(E1000_CTRL
);
6543 ctrl
&= ~E1000_CTRL_VME
;
6544 wr32(E1000_CTRL
, ctrl
);
6547 igb_rlpml_set(adapter
);
6550 static int igb_vlan_rx_add_vid(struct net_device
*netdev
, u16 vid
)
6552 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6553 struct e1000_hw
*hw
= &adapter
->hw
;
6554 int pf_id
= adapter
->vfs_allocated_count
;
6556 /* attempt to add filter to vlvf array */
6557 igb_vlvf_set(adapter
, vid
, true, pf_id
);
6559 /* add the filter since PF can receive vlans w/o entry in vlvf */
6560 igb_vfta_set(hw
, vid
, true);
6562 set_bit(vid
, adapter
->active_vlans
);
6567 static int igb_vlan_rx_kill_vid(struct net_device
*netdev
, u16 vid
)
6569 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6570 struct e1000_hw
*hw
= &adapter
->hw
;
6571 int pf_id
= adapter
->vfs_allocated_count
;
6574 /* remove vlan from VLVF table array */
6575 err
= igb_vlvf_set(adapter
, vid
, false, pf_id
);
6577 /* if vid was not present in VLVF just remove it from table */
6579 igb_vfta_set(hw
, vid
, false);
6581 clear_bit(vid
, adapter
->active_vlans
);
6586 static void igb_restore_vlan(struct igb_adapter
*adapter
)
6590 igb_vlan_mode(adapter
->netdev
, adapter
->netdev
->features
);
6592 for_each_set_bit(vid
, adapter
->active_vlans
, VLAN_N_VID
)
6593 igb_vlan_rx_add_vid(adapter
->netdev
, vid
);
6596 int igb_set_spd_dplx(struct igb_adapter
*adapter
, u32 spd
, u8 dplx
)
6598 struct pci_dev
*pdev
= adapter
->pdev
;
6599 struct e1000_mac_info
*mac
= &adapter
->hw
.mac
;
6603 /* Make sure dplx is at most 1 bit and lsb of speed is not set
6604 * for the switch() below to work */
6605 if ((spd
& 1) || (dplx
& ~1))
6608 /* Fiber NIC's only allow 1000 Gbps Full duplex */
6609 if ((adapter
->hw
.phy
.media_type
== e1000_media_type_internal_serdes
) &&
6610 spd
!= SPEED_1000
&&
6611 dplx
!= DUPLEX_FULL
)
6614 switch (spd
+ dplx
) {
6615 case SPEED_10
+ DUPLEX_HALF
:
6616 mac
->forced_speed_duplex
= ADVERTISE_10_HALF
;
6618 case SPEED_10
+ DUPLEX_FULL
:
6619 mac
->forced_speed_duplex
= ADVERTISE_10_FULL
;
6621 case SPEED_100
+ DUPLEX_HALF
:
6622 mac
->forced_speed_duplex
= ADVERTISE_100_HALF
;
6624 case SPEED_100
+ DUPLEX_FULL
:
6625 mac
->forced_speed_duplex
= ADVERTISE_100_FULL
;
6627 case SPEED_1000
+ DUPLEX_FULL
:
6629 adapter
->hw
.phy
.autoneg_advertised
= ADVERTISE_1000_FULL
;
6631 case SPEED_1000
+ DUPLEX_HALF
: /* not supported */
6638 dev_err(&pdev
->dev
, "Unsupported Speed/Duplex configuration\n");
6642 static int __igb_shutdown(struct pci_dev
*pdev
, bool *enable_wake
,
6645 struct net_device
*netdev
= pci_get_drvdata(pdev
);
6646 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6647 struct e1000_hw
*hw
= &adapter
->hw
;
6648 u32 ctrl
, rctl
, status
;
6649 u32 wufc
= runtime
? E1000_WUFC_LNKC
: adapter
->wol
;
6654 netif_device_detach(netdev
);
6656 if (netif_running(netdev
))
6657 __igb_close(netdev
, true);
6659 igb_clear_interrupt_scheme(adapter
);
6662 retval
= pci_save_state(pdev
);
6667 status
= rd32(E1000_STATUS
);
6668 if (status
& E1000_STATUS_LU
)
6669 wufc
&= ~E1000_WUFC_LNKC
;
6672 igb_setup_rctl(adapter
);
6673 igb_set_rx_mode(netdev
);
6675 /* turn on all-multi mode if wake on multicast is enabled */
6676 if (wufc
& E1000_WUFC_MC
) {
6677 rctl
= rd32(E1000_RCTL
);
6678 rctl
|= E1000_RCTL_MPE
;
6679 wr32(E1000_RCTL
, rctl
);
6682 ctrl
= rd32(E1000_CTRL
);
6683 /* advertise wake from D3Cold */
6684 #define E1000_CTRL_ADVD3WUC 0x00100000
6685 /* phy power management enable */
6686 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6687 ctrl
|= E1000_CTRL_ADVD3WUC
;
6688 wr32(E1000_CTRL
, ctrl
);
6690 /* Allow time for pending master requests to run */
6691 igb_disable_pcie_master(hw
);
6693 wr32(E1000_WUC
, E1000_WUC_PME_EN
);
6694 wr32(E1000_WUFC
, wufc
);
6697 wr32(E1000_WUFC
, 0);
6700 *enable_wake
= wufc
|| adapter
->en_mng_pt
;
6702 igb_power_down_link(adapter
);
6704 igb_power_up_link(adapter
);
6706 /* Release control of h/w to f/w. If f/w is AMT enabled, this
6707 * would have already happened in close and is redundant. */
6708 igb_release_hw_control(adapter
);
6710 pci_disable_device(pdev
);
6716 #ifdef CONFIG_PM_SLEEP
6717 static int igb_suspend(struct device
*dev
)
6721 struct pci_dev
*pdev
= to_pci_dev(dev
);
6723 retval
= __igb_shutdown(pdev
, &wake
, 0);
6728 pci_prepare_to_sleep(pdev
);
6730 pci_wake_from_d3(pdev
, false);
6731 pci_set_power_state(pdev
, PCI_D3hot
);
6736 #endif /* CONFIG_PM_SLEEP */
6738 static int igb_resume(struct device
*dev
)
6740 struct pci_dev
*pdev
= to_pci_dev(dev
);
6741 struct net_device
*netdev
= pci_get_drvdata(pdev
);
6742 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6743 struct e1000_hw
*hw
= &adapter
->hw
;
6746 pci_set_power_state(pdev
, PCI_D0
);
6747 pci_restore_state(pdev
);
6748 pci_save_state(pdev
);
6750 err
= pci_enable_device_mem(pdev
);
6753 "igb: Cannot enable PCI device from suspend\n");
6756 pci_set_master(pdev
);
6758 pci_enable_wake(pdev
, PCI_D3hot
, 0);
6759 pci_enable_wake(pdev
, PCI_D3cold
, 0);
6761 if (!rtnl_is_locked()) {
6763 * shut up ASSERT_RTNL() warning in
6764 * netif_set_real_num_tx/rx_queues.
6767 err
= igb_init_interrupt_scheme(adapter
);
6770 err
= igb_init_interrupt_scheme(adapter
);
6773 dev_err(&pdev
->dev
, "Unable to allocate memory for queues\n");
6779 /* let the f/w know that the h/w is now under the control of the
6781 igb_get_hw_control(adapter
);
6783 wr32(E1000_WUS
, ~0);
6785 if (netdev
->flags
& IFF_UP
) {
6786 err
= __igb_open(netdev
, true);
6791 netif_device_attach(netdev
);
6795 #ifdef CONFIG_PM_RUNTIME
6796 static int igb_runtime_idle(struct device
*dev
)
6798 struct pci_dev
*pdev
= to_pci_dev(dev
);
6799 struct net_device
*netdev
= pci_get_drvdata(pdev
);
6800 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6802 if (!igb_has_link(adapter
))
6803 pm_schedule_suspend(dev
, MSEC_PER_SEC
* 5);
6808 static int igb_runtime_suspend(struct device
*dev
)
6810 struct pci_dev
*pdev
= to_pci_dev(dev
);
6814 retval
= __igb_shutdown(pdev
, &wake
, 1);
6819 pci_prepare_to_sleep(pdev
);
6821 pci_wake_from_d3(pdev
, false);
6822 pci_set_power_state(pdev
, PCI_D3hot
);
6828 static int igb_runtime_resume(struct device
*dev
)
6830 return igb_resume(dev
);
6832 #endif /* CONFIG_PM_RUNTIME */
6835 static void igb_shutdown(struct pci_dev
*pdev
)
6839 __igb_shutdown(pdev
, &wake
, 0);
6841 if (system_state
== SYSTEM_POWER_OFF
) {
6842 pci_wake_from_d3(pdev
, wake
);
6843 pci_set_power_state(pdev
, PCI_D3hot
);
6847 #ifdef CONFIG_NET_POLL_CONTROLLER
6849 * Polling 'interrupt' - used by things like netconsole to send skbs
6850 * without having to re-enable interrupts. It's not called while
6851 * the interrupt routine is executing.
6853 static void igb_netpoll(struct net_device
*netdev
)
6855 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6856 struct e1000_hw
*hw
= &adapter
->hw
;
6857 struct igb_q_vector
*q_vector
;
6860 for (i
= 0; i
< adapter
->num_q_vectors
; i
++) {
6861 q_vector
= adapter
->q_vector
[i
];
6862 if (adapter
->msix_entries
)
6863 wr32(E1000_EIMC
, q_vector
->eims_value
);
6865 igb_irq_disable(adapter
);
6866 napi_schedule(&q_vector
->napi
);
6869 #endif /* CONFIG_NET_POLL_CONTROLLER */
6872 * igb_io_error_detected - called when PCI error is detected
6873 * @pdev: Pointer to PCI device
6874 * @state: The current pci connection state
6876 * This function is called after a PCI bus error affecting
6877 * this device has been detected.
6879 static pci_ers_result_t
igb_io_error_detected(struct pci_dev
*pdev
,
6880 pci_channel_state_t state
)
6882 struct net_device
*netdev
= pci_get_drvdata(pdev
);
6883 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6885 netif_device_detach(netdev
);
6887 if (state
== pci_channel_io_perm_failure
)
6888 return PCI_ERS_RESULT_DISCONNECT
;
6890 if (netif_running(netdev
))
6892 pci_disable_device(pdev
);
6894 /* Request a slot slot reset. */
6895 return PCI_ERS_RESULT_NEED_RESET
;
6899 * igb_io_slot_reset - called after the pci bus has been reset.
6900 * @pdev: Pointer to PCI device
6902 * Restart the card from scratch, as if from a cold-boot. Implementation
6903 * resembles the first-half of the igb_resume routine.
6905 static pci_ers_result_t
igb_io_slot_reset(struct pci_dev
*pdev
)
6907 struct net_device
*netdev
= pci_get_drvdata(pdev
);
6908 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6909 struct e1000_hw
*hw
= &adapter
->hw
;
6910 pci_ers_result_t result
;
6913 if (pci_enable_device_mem(pdev
)) {
6915 "Cannot re-enable PCI device after reset.\n");
6916 result
= PCI_ERS_RESULT_DISCONNECT
;
6918 pci_set_master(pdev
);
6919 pci_restore_state(pdev
);
6920 pci_save_state(pdev
);
6922 pci_enable_wake(pdev
, PCI_D3hot
, 0);
6923 pci_enable_wake(pdev
, PCI_D3cold
, 0);
6926 wr32(E1000_WUS
, ~0);
6927 result
= PCI_ERS_RESULT_RECOVERED
;
6930 err
= pci_cleanup_aer_uncorrect_error_status(pdev
);
6932 dev_err(&pdev
->dev
, "pci_cleanup_aer_uncorrect_error_status "
6933 "failed 0x%0x\n", err
);
6934 /* non-fatal, continue */
6941 * igb_io_resume - called when traffic can start flowing again.
6942 * @pdev: Pointer to PCI device
6944 * This callback is called when the error recovery driver tells us that
6945 * its OK to resume normal operation. Implementation resembles the
6946 * second-half of the igb_resume routine.
6948 static void igb_io_resume(struct pci_dev
*pdev
)
6950 struct net_device
*netdev
= pci_get_drvdata(pdev
);
6951 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6953 if (netif_running(netdev
)) {
6954 if (igb_up(adapter
)) {
6955 dev_err(&pdev
->dev
, "igb_up failed after reset\n");
6960 netif_device_attach(netdev
);
6962 /* let the f/w know that the h/w is now under the control of the
6964 igb_get_hw_control(adapter
);
6967 static void igb_rar_set_qsel(struct igb_adapter
*adapter
, u8
*addr
, u32 index
,
6970 u32 rar_low
, rar_high
;
6971 struct e1000_hw
*hw
= &adapter
->hw
;
6973 /* HW expects these in little endian so we reverse the byte order
6974 * from network order (big endian) to little endian
6976 rar_low
= ((u32
) addr
[0] | ((u32
) addr
[1] << 8) |
6977 ((u32
) addr
[2] << 16) | ((u32
) addr
[3] << 24));
6978 rar_high
= ((u32
) addr
[4] | ((u32
) addr
[5] << 8));
6980 /* Indicate to hardware the Address is Valid. */
6981 rar_high
|= E1000_RAH_AV
;
6983 if (hw
->mac
.type
== e1000_82575
)
6984 rar_high
|= E1000_RAH_POOL_1
* qsel
;
6986 rar_high
|= E1000_RAH_POOL_1
<< qsel
;
6988 wr32(E1000_RAL(index
), rar_low
);
6990 wr32(E1000_RAH(index
), rar_high
);
6994 static int igb_set_vf_mac(struct igb_adapter
*adapter
,
6995 int vf
, unsigned char *mac_addr
)
6997 struct e1000_hw
*hw
= &adapter
->hw
;
6998 /* VF MAC addresses start at end of receive addresses and moves
6999 * torwards the first, as a result a collision should not be possible */
7000 int rar_entry
= hw
->mac
.rar_entry_count
- (vf
+ 1);
7002 memcpy(adapter
->vf_data
[vf
].vf_mac_addresses
, mac_addr
, ETH_ALEN
);
7004 igb_rar_set_qsel(adapter
, mac_addr
, rar_entry
, vf
);
7009 static int igb_ndo_set_vf_mac(struct net_device
*netdev
, int vf
, u8
*mac
)
7011 struct igb_adapter
*adapter
= netdev_priv(netdev
);
7012 if (!is_valid_ether_addr(mac
) || (vf
>= adapter
->vfs_allocated_count
))
7014 adapter
->vf_data
[vf
].flags
|= IGB_VF_FLAG_PF_SET_MAC
;
7015 dev_info(&adapter
->pdev
->dev
, "setting MAC %pM on VF %d\n", mac
, vf
);
7016 dev_info(&adapter
->pdev
->dev
, "Reload the VF driver to make this"
7017 " change effective.");
7018 if (test_bit(__IGB_DOWN
, &adapter
->state
)) {
7019 dev_warn(&adapter
->pdev
->dev
, "The VF MAC address has been set,"
7020 " but the PF device is not up.\n");
7021 dev_warn(&adapter
->pdev
->dev
, "Bring the PF device up before"
7022 " attempting to use the VF device.\n");
7024 return igb_set_vf_mac(adapter
, vf
, mac
);
7027 static int igb_link_mbps(int internal_link_speed
)
7029 switch (internal_link_speed
) {
7039 static void igb_set_vf_rate_limit(struct e1000_hw
*hw
, int vf
, int tx_rate
,
7046 /* Calculate the rate factor values to set */
7047 rf_int
= link_speed
/ tx_rate
;
7048 rf_dec
= (link_speed
- (rf_int
* tx_rate
));
7049 rf_dec
= (rf_dec
* (1<<E1000_RTTBCNRC_RF_INT_SHIFT
)) / tx_rate
;
7051 bcnrc_val
= E1000_RTTBCNRC_RS_ENA
;
7052 bcnrc_val
|= ((rf_int
<<E1000_RTTBCNRC_RF_INT_SHIFT
) &
7053 E1000_RTTBCNRC_RF_INT_MASK
);
7054 bcnrc_val
|= (rf_dec
& E1000_RTTBCNRC_RF_DEC_MASK
);
7059 wr32(E1000_RTTDQSEL
, vf
); /* vf X uses queue X */
7060 wr32(E1000_RTTBCNRC
, bcnrc_val
);
7063 static void igb_check_vf_rate_limit(struct igb_adapter
*adapter
)
7065 int actual_link_speed
, i
;
7066 bool reset_rate
= false;
7068 /* VF TX rate limit was not set or not supported */
7069 if ((adapter
->vf_rate_link_speed
== 0) ||
7070 (adapter
->hw
.mac
.type
!= e1000_82576
))
7073 actual_link_speed
= igb_link_mbps(adapter
->link_speed
);
7074 if (actual_link_speed
!= adapter
->vf_rate_link_speed
) {
7076 adapter
->vf_rate_link_speed
= 0;
7077 dev_info(&adapter
->pdev
->dev
,
7078 "Link speed has been changed. VF Transmit "
7079 "rate is disabled\n");
7082 for (i
= 0; i
< adapter
->vfs_allocated_count
; i
++) {
7084 adapter
->vf_data
[i
].tx_rate
= 0;
7086 igb_set_vf_rate_limit(&adapter
->hw
, i
,
7087 adapter
->vf_data
[i
].tx_rate
,
7092 static int igb_ndo_set_vf_bw(struct net_device
*netdev
, int vf
, int tx_rate
)
7094 struct igb_adapter
*adapter
= netdev_priv(netdev
);
7095 struct e1000_hw
*hw
= &adapter
->hw
;
7096 int actual_link_speed
;
7098 if (hw
->mac
.type
!= e1000_82576
)
7101 actual_link_speed
= igb_link_mbps(adapter
->link_speed
);
7102 if ((vf
>= adapter
->vfs_allocated_count
) ||
7103 (!(rd32(E1000_STATUS
) & E1000_STATUS_LU
)) ||
7104 (tx_rate
< 0) || (tx_rate
> actual_link_speed
))
7107 adapter
->vf_rate_link_speed
= actual_link_speed
;
7108 adapter
->vf_data
[vf
].tx_rate
= (u16
)tx_rate
;
7109 igb_set_vf_rate_limit(hw
, vf
, tx_rate
, actual_link_speed
);
7114 static int igb_ndo_get_vf_config(struct net_device
*netdev
,
7115 int vf
, struct ifla_vf_info
*ivi
)
7117 struct igb_adapter
*adapter
= netdev_priv(netdev
);
7118 if (vf
>= adapter
->vfs_allocated_count
)
7121 memcpy(&ivi
->mac
, adapter
->vf_data
[vf
].vf_mac_addresses
, ETH_ALEN
);
7122 ivi
->tx_rate
= adapter
->vf_data
[vf
].tx_rate
;
7123 ivi
->vlan
= adapter
->vf_data
[vf
].pf_vlan
;
7124 ivi
->qos
= adapter
->vf_data
[vf
].pf_qos
;
7128 static void igb_vmm_control(struct igb_adapter
*adapter
)
7130 struct e1000_hw
*hw
= &adapter
->hw
;
7133 switch (hw
->mac
.type
) {
7136 /* replication is not supported for 82575 */
7139 /* notify HW that the MAC is adding vlan tags */
7140 reg
= rd32(E1000_DTXCTL
);
7141 reg
|= E1000_DTXCTL_VLAN_ADDED
;
7142 wr32(E1000_DTXCTL
, reg
);
7144 /* enable replication vlan tag stripping */
7145 reg
= rd32(E1000_RPLOLR
);
7146 reg
|= E1000_RPLOLR_STRVLAN
;
7147 wr32(E1000_RPLOLR
, reg
);
7149 /* none of the above registers are supported by i350 */
7153 if (adapter
->vfs_allocated_count
) {
7154 igb_vmdq_set_loopback_pf(hw
, true);
7155 igb_vmdq_set_replication_pf(hw
, true);
7156 igb_vmdq_set_anti_spoofing_pf(hw
, true,
7157 adapter
->vfs_allocated_count
);
7159 igb_vmdq_set_loopback_pf(hw
, false);
7160 igb_vmdq_set_replication_pf(hw
, false);
7164 static void igb_init_dmac(struct igb_adapter
*adapter
, u32 pba
)
7166 struct e1000_hw
*hw
= &adapter
->hw
;
7170 if (hw
->mac
.type
> e1000_82580
) {
7171 if (adapter
->flags
& IGB_FLAG_DMAC
) {
7174 /* force threshold to 0. */
7175 wr32(E1000_DMCTXTH
, 0);
7178 * DMA Coalescing high water mark needs to be greater
7179 * than the Rx threshold. Set hwm to PBA - max frame
7180 * size in 16B units, capping it at PBA - 6KB.
7182 hwm
= 64 * pba
- adapter
->max_frame_size
/ 16;
7183 if (hwm
< 64 * (pba
- 6))
7184 hwm
= 64 * (pba
- 6);
7185 reg
= rd32(E1000_FCRTC
);
7186 reg
&= ~E1000_FCRTC_RTH_COAL_MASK
;
7187 reg
|= ((hwm
<< E1000_FCRTC_RTH_COAL_SHIFT
)
7188 & E1000_FCRTC_RTH_COAL_MASK
);
7189 wr32(E1000_FCRTC
, reg
);
7192 * Set the DMA Coalescing Rx threshold to PBA - 2 * max
7193 * frame size, capping it at PBA - 10KB.
7195 dmac_thr
= pba
- adapter
->max_frame_size
/ 512;
7196 if (dmac_thr
< pba
- 10)
7197 dmac_thr
= pba
- 10;
7198 reg
= rd32(E1000_DMACR
);
7199 reg
&= ~E1000_DMACR_DMACTHR_MASK
;
7200 reg
|= ((dmac_thr
<< E1000_DMACR_DMACTHR_SHIFT
)
7201 & E1000_DMACR_DMACTHR_MASK
);
7203 /* transition to L0x or L1 if available..*/
7204 reg
|= (E1000_DMACR_DMAC_EN
| E1000_DMACR_DMAC_LX_MASK
);
7206 /* watchdog timer= +-1000 usec in 32usec intervals */
7208 wr32(E1000_DMACR
, reg
);
7211 * no lower threshold to disable
7212 * coalescing(smart fifb)-UTRESH=0
7214 wr32(E1000_DMCRTRH
, 0);
7216 reg
= (IGB_DMCTLX_DCFLUSH_DIS
| 0x4);
7218 wr32(E1000_DMCTLX
, reg
);
7221 * free space in tx packet buffer to wake from
7224 wr32(E1000_DMCTXTH
, (IGB_MIN_TXPBSIZE
-
7225 (IGB_TX_BUF_4096
+ adapter
->max_frame_size
)) >> 6);
7228 * make low power state decision controlled
7231 reg
= rd32(E1000_PCIEMISC
);
7232 reg
&= ~E1000_PCIEMISC_LX_DECISION
;
7233 wr32(E1000_PCIEMISC
, reg
);
7234 } /* endif adapter->dmac is not disabled */
7235 } else if (hw
->mac
.type
== e1000_82580
) {
7236 u32 reg
= rd32(E1000_PCIEMISC
);
7237 wr32(E1000_PCIEMISC
, reg
& ~E1000_PCIEMISC_LX_DECISION
);
7238 wr32(E1000_DMACR
, 0);