]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - drivers/net/ethernet/intel/igb/igb_main.c
e96cef89f121cccec5189480f6def090916981d5
[mirror_ubuntu-artful-kernel.git] / drivers / net / ethernet / intel / igb / igb_main.c
1 /*******************************************************************************
2
3 Intel(R) Gigabit Ethernet Linux driver
4 Copyright(c) 2007-2012 Intel Corporation.
5
6 This program is free software; you can redistribute it and/or modify it
7 under the terms and conditions of the GNU General Public License,
8 version 2, as published by the Free Software Foundation.
9
10 This program is distributed in the hope it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 more details.
14
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19 The full GNU General Public License is included in this distribution in
20 the file called "COPYING".
21
22 Contact Information:
23 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
29
30 #include <linux/module.h>
31 #include <linux/types.h>
32 #include <linux/init.h>
33 #include <linux/bitops.h>
34 #include <linux/vmalloc.h>
35 #include <linux/pagemap.h>
36 #include <linux/netdevice.h>
37 #include <linux/ipv6.h>
38 #include <linux/slab.h>
39 #include <net/checksum.h>
40 #include <net/ip6_checksum.h>
41 #include <linux/net_tstamp.h>
42 #include <linux/mii.h>
43 #include <linux/ethtool.h>
44 #include <linux/if.h>
45 #include <linux/if_vlan.h>
46 #include <linux/pci.h>
47 #include <linux/pci-aspm.h>
48 #include <linux/delay.h>
49 #include <linux/interrupt.h>
50 #include <linux/ip.h>
51 #include <linux/tcp.h>
52 #include <linux/sctp.h>
53 #include <linux/if_ether.h>
54 #include <linux/aer.h>
55 #include <linux/prefetch.h>
56 #include <linux/pm_runtime.h>
57 #ifdef CONFIG_IGB_DCA
58 #include <linux/dca.h>
59 #endif
60 #include "igb.h"
61
62 #define MAJ 3
63 #define MIN 2
64 #define BUILD 10
65 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
66 __stringify(BUILD) "-k"
67 char igb_driver_name[] = "igb";
68 char igb_driver_version[] = DRV_VERSION;
69 static const char igb_driver_string[] =
70 "Intel(R) Gigabit Ethernet Network Driver";
71 static const char igb_copyright[] = "Copyright (c) 2007-2012 Intel Corporation.";
72
73 static const struct e1000_info *igb_info_tbl[] = {
74 [board_82575] = &e1000_82575_info,
75 };
76
77 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
78 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
79 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
80 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
81 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
82 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
83 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
84 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
85 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
86 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
87 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
88 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
89 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
90 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
91 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
92 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
93 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
94 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
95 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
96 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
97 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
98 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
99 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
100 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
101 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
102 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
103 /* required last entry */
104 {0, }
105 };
106
107 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
108
109 void igb_reset(struct igb_adapter *);
110 static int igb_setup_all_tx_resources(struct igb_adapter *);
111 static int igb_setup_all_rx_resources(struct igb_adapter *);
112 static void igb_free_all_tx_resources(struct igb_adapter *);
113 static void igb_free_all_rx_resources(struct igb_adapter *);
114 static void igb_setup_mrqc(struct igb_adapter *);
115 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
116 static void __devexit igb_remove(struct pci_dev *pdev);
117 static void igb_init_hw_timer(struct igb_adapter *adapter);
118 static int igb_sw_init(struct igb_adapter *);
119 static int igb_open(struct net_device *);
120 static int igb_close(struct net_device *);
121 static void igb_configure_tx(struct igb_adapter *);
122 static void igb_configure_rx(struct igb_adapter *);
123 static void igb_clean_all_tx_rings(struct igb_adapter *);
124 static void igb_clean_all_rx_rings(struct igb_adapter *);
125 static void igb_clean_tx_ring(struct igb_ring *);
126 static void igb_clean_rx_ring(struct igb_ring *);
127 static void igb_set_rx_mode(struct net_device *);
128 static void igb_update_phy_info(unsigned long);
129 static void igb_watchdog(unsigned long);
130 static void igb_watchdog_task(struct work_struct *);
131 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
132 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
133 struct rtnl_link_stats64 *stats);
134 static int igb_change_mtu(struct net_device *, int);
135 static int igb_set_mac(struct net_device *, void *);
136 static void igb_set_uta(struct igb_adapter *adapter);
137 static irqreturn_t igb_intr(int irq, void *);
138 static irqreturn_t igb_intr_msi(int irq, void *);
139 static irqreturn_t igb_msix_other(int irq, void *);
140 static irqreturn_t igb_msix_ring(int irq, void *);
141 #ifdef CONFIG_IGB_DCA
142 static void igb_update_dca(struct igb_q_vector *);
143 static void igb_setup_dca(struct igb_adapter *);
144 #endif /* CONFIG_IGB_DCA */
145 static int igb_poll(struct napi_struct *, int);
146 static bool igb_clean_tx_irq(struct igb_q_vector *);
147 static bool igb_clean_rx_irq(struct igb_q_vector *, int);
148 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
149 static void igb_tx_timeout(struct net_device *);
150 static void igb_reset_task(struct work_struct *);
151 static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features);
152 static int igb_vlan_rx_add_vid(struct net_device *, u16);
153 static int igb_vlan_rx_kill_vid(struct net_device *, u16);
154 static void igb_restore_vlan(struct igb_adapter *);
155 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
156 static void igb_ping_all_vfs(struct igb_adapter *);
157 static void igb_msg_task(struct igb_adapter *);
158 static void igb_vmm_control(struct igb_adapter *);
159 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
160 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
161 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
162 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
163 int vf, u16 vlan, u8 qos);
164 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
165 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
166 struct ifla_vf_info *ivi);
167 static void igb_check_vf_rate_limit(struct igb_adapter *);
168
169 #ifdef CONFIG_PCI_IOV
170 static int igb_vf_configure(struct igb_adapter *adapter, int vf);
171 static int igb_find_enabled_vfs(struct igb_adapter *adapter);
172 static int igb_check_vf_assignment(struct igb_adapter *adapter);
173 #endif
174
175 #ifdef CONFIG_PM
176 #ifdef CONFIG_PM_SLEEP
177 static int igb_suspend(struct device *);
178 #endif
179 static int igb_resume(struct device *);
180 #ifdef CONFIG_PM_RUNTIME
181 static int igb_runtime_suspend(struct device *dev);
182 static int igb_runtime_resume(struct device *dev);
183 static int igb_runtime_idle(struct device *dev);
184 #endif
185 static const struct dev_pm_ops igb_pm_ops = {
186 SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume)
187 SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume,
188 igb_runtime_idle)
189 };
190 #endif
191 static void igb_shutdown(struct pci_dev *);
192 #ifdef CONFIG_IGB_DCA
193 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
194 static struct notifier_block dca_notifier = {
195 .notifier_call = igb_notify_dca,
196 .next = NULL,
197 .priority = 0
198 };
199 #endif
200 #ifdef CONFIG_NET_POLL_CONTROLLER
201 /* for netdump / net console */
202 static void igb_netpoll(struct net_device *);
203 #endif
204 #ifdef CONFIG_PCI_IOV
205 static unsigned int max_vfs = 0;
206 module_param(max_vfs, uint, 0);
207 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
208 "per physical function");
209 #endif /* CONFIG_PCI_IOV */
210
211 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
212 pci_channel_state_t);
213 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
214 static void igb_io_resume(struct pci_dev *);
215
216 static struct pci_error_handlers igb_err_handler = {
217 .error_detected = igb_io_error_detected,
218 .slot_reset = igb_io_slot_reset,
219 .resume = igb_io_resume,
220 };
221
222 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba);
223
224 static struct pci_driver igb_driver = {
225 .name = igb_driver_name,
226 .id_table = igb_pci_tbl,
227 .probe = igb_probe,
228 .remove = __devexit_p(igb_remove),
229 #ifdef CONFIG_PM
230 .driver.pm = &igb_pm_ops,
231 #endif
232 .shutdown = igb_shutdown,
233 .err_handler = &igb_err_handler
234 };
235
236 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
237 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
238 MODULE_LICENSE("GPL");
239 MODULE_VERSION(DRV_VERSION);
240
241 struct igb_reg_info {
242 u32 ofs;
243 char *name;
244 };
245
246 static const struct igb_reg_info igb_reg_info_tbl[] = {
247
248 /* General Registers */
249 {E1000_CTRL, "CTRL"},
250 {E1000_STATUS, "STATUS"},
251 {E1000_CTRL_EXT, "CTRL_EXT"},
252
253 /* Interrupt Registers */
254 {E1000_ICR, "ICR"},
255
256 /* RX Registers */
257 {E1000_RCTL, "RCTL"},
258 {E1000_RDLEN(0), "RDLEN"},
259 {E1000_RDH(0), "RDH"},
260 {E1000_RDT(0), "RDT"},
261 {E1000_RXDCTL(0), "RXDCTL"},
262 {E1000_RDBAL(0), "RDBAL"},
263 {E1000_RDBAH(0), "RDBAH"},
264
265 /* TX Registers */
266 {E1000_TCTL, "TCTL"},
267 {E1000_TDBAL(0), "TDBAL"},
268 {E1000_TDBAH(0), "TDBAH"},
269 {E1000_TDLEN(0), "TDLEN"},
270 {E1000_TDH(0), "TDH"},
271 {E1000_TDT(0), "TDT"},
272 {E1000_TXDCTL(0), "TXDCTL"},
273 {E1000_TDFH, "TDFH"},
274 {E1000_TDFT, "TDFT"},
275 {E1000_TDFHS, "TDFHS"},
276 {E1000_TDFPC, "TDFPC"},
277
278 /* List Terminator */
279 {}
280 };
281
282 /*
283 * igb_regdump - register printout routine
284 */
285 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
286 {
287 int n = 0;
288 char rname[16];
289 u32 regs[8];
290
291 switch (reginfo->ofs) {
292 case E1000_RDLEN(0):
293 for (n = 0; n < 4; n++)
294 regs[n] = rd32(E1000_RDLEN(n));
295 break;
296 case E1000_RDH(0):
297 for (n = 0; n < 4; n++)
298 regs[n] = rd32(E1000_RDH(n));
299 break;
300 case E1000_RDT(0):
301 for (n = 0; n < 4; n++)
302 regs[n] = rd32(E1000_RDT(n));
303 break;
304 case E1000_RXDCTL(0):
305 for (n = 0; n < 4; n++)
306 regs[n] = rd32(E1000_RXDCTL(n));
307 break;
308 case E1000_RDBAL(0):
309 for (n = 0; n < 4; n++)
310 regs[n] = rd32(E1000_RDBAL(n));
311 break;
312 case E1000_RDBAH(0):
313 for (n = 0; n < 4; n++)
314 regs[n] = rd32(E1000_RDBAH(n));
315 break;
316 case E1000_TDBAL(0):
317 for (n = 0; n < 4; n++)
318 regs[n] = rd32(E1000_RDBAL(n));
319 break;
320 case E1000_TDBAH(0):
321 for (n = 0; n < 4; n++)
322 regs[n] = rd32(E1000_TDBAH(n));
323 break;
324 case E1000_TDLEN(0):
325 for (n = 0; n < 4; n++)
326 regs[n] = rd32(E1000_TDLEN(n));
327 break;
328 case E1000_TDH(0):
329 for (n = 0; n < 4; n++)
330 regs[n] = rd32(E1000_TDH(n));
331 break;
332 case E1000_TDT(0):
333 for (n = 0; n < 4; n++)
334 regs[n] = rd32(E1000_TDT(n));
335 break;
336 case E1000_TXDCTL(0):
337 for (n = 0; n < 4; n++)
338 regs[n] = rd32(E1000_TXDCTL(n));
339 break;
340 default:
341 pr_info("%-15s %08x\n", reginfo->name, rd32(reginfo->ofs));
342 return;
343 }
344
345 snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
346 pr_info("%-15s %08x %08x %08x %08x\n", rname, regs[0], regs[1],
347 regs[2], regs[3]);
348 }
349
350 /*
351 * igb_dump - Print registers, tx-rings and rx-rings
352 */
353 static void igb_dump(struct igb_adapter *adapter)
354 {
355 struct net_device *netdev = adapter->netdev;
356 struct e1000_hw *hw = &adapter->hw;
357 struct igb_reg_info *reginfo;
358 struct igb_ring *tx_ring;
359 union e1000_adv_tx_desc *tx_desc;
360 struct my_u0 { u64 a; u64 b; } *u0;
361 struct igb_ring *rx_ring;
362 union e1000_adv_rx_desc *rx_desc;
363 u32 staterr;
364 u16 i, n;
365
366 if (!netif_msg_hw(adapter))
367 return;
368
369 /* Print netdevice Info */
370 if (netdev) {
371 dev_info(&adapter->pdev->dev, "Net device Info\n");
372 pr_info("Device Name state trans_start "
373 "last_rx\n");
374 pr_info("%-15s %016lX %016lX %016lX\n", netdev->name,
375 netdev->state, netdev->trans_start, netdev->last_rx);
376 }
377
378 /* Print Registers */
379 dev_info(&adapter->pdev->dev, "Register Dump\n");
380 pr_info(" Register Name Value\n");
381 for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
382 reginfo->name; reginfo++) {
383 igb_regdump(hw, reginfo);
384 }
385
386 /* Print TX Ring Summary */
387 if (!netdev || !netif_running(netdev))
388 goto exit;
389
390 dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
391 pr_info("Queue [NTU] [NTC] [bi(ntc)->dma ] leng ntw timestamp\n");
392 for (n = 0; n < adapter->num_tx_queues; n++) {
393 struct igb_tx_buffer *buffer_info;
394 tx_ring = adapter->tx_ring[n];
395 buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
396 pr_info(" %5d %5X %5X %016llX %04X %p %016llX\n",
397 n, tx_ring->next_to_use, tx_ring->next_to_clean,
398 (u64)buffer_info->dma,
399 buffer_info->length,
400 buffer_info->next_to_watch,
401 (u64)buffer_info->time_stamp);
402 }
403
404 /* Print TX Rings */
405 if (!netif_msg_tx_done(adapter))
406 goto rx_ring_summary;
407
408 dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
409
410 /* Transmit Descriptor Formats
411 *
412 * Advanced Transmit Descriptor
413 * +--------------------------------------------------------------+
414 * 0 | Buffer Address [63:0] |
415 * +--------------------------------------------------------------+
416 * 8 | PAYLEN | PORTS |CC|IDX | STA | DCMD |DTYP|MAC|RSV| DTALEN |
417 * +--------------------------------------------------------------+
418 * 63 46 45 40 39 38 36 35 32 31 24 15 0
419 */
420
421 for (n = 0; n < adapter->num_tx_queues; n++) {
422 tx_ring = adapter->tx_ring[n];
423 pr_info("------------------------------------\n");
424 pr_info("TX QUEUE INDEX = %d\n", tx_ring->queue_index);
425 pr_info("------------------------------------\n");
426 pr_info("T [desc] [address 63:0 ] [PlPOCIStDDM Ln] "
427 "[bi->dma ] leng ntw timestamp "
428 "bi->skb\n");
429
430 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
431 const char *next_desc;
432 struct igb_tx_buffer *buffer_info;
433 tx_desc = IGB_TX_DESC(tx_ring, i);
434 buffer_info = &tx_ring->tx_buffer_info[i];
435 u0 = (struct my_u0 *)tx_desc;
436 if (i == tx_ring->next_to_use &&
437 i == tx_ring->next_to_clean)
438 next_desc = " NTC/U";
439 else if (i == tx_ring->next_to_use)
440 next_desc = " NTU";
441 else if (i == tx_ring->next_to_clean)
442 next_desc = " NTC";
443 else
444 next_desc = "";
445
446 pr_info("T [0x%03X] %016llX %016llX %016llX"
447 " %04X %p %016llX %p%s\n", i,
448 le64_to_cpu(u0->a),
449 le64_to_cpu(u0->b),
450 (u64)buffer_info->dma,
451 buffer_info->length,
452 buffer_info->next_to_watch,
453 (u64)buffer_info->time_stamp,
454 buffer_info->skb, next_desc);
455
456 if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
457 print_hex_dump(KERN_INFO, "",
458 DUMP_PREFIX_ADDRESS,
459 16, 1, phys_to_virt(buffer_info->dma),
460 buffer_info->length, true);
461 }
462 }
463
464 /* Print RX Rings Summary */
465 rx_ring_summary:
466 dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
467 pr_info("Queue [NTU] [NTC]\n");
468 for (n = 0; n < adapter->num_rx_queues; n++) {
469 rx_ring = adapter->rx_ring[n];
470 pr_info(" %5d %5X %5X\n",
471 n, rx_ring->next_to_use, rx_ring->next_to_clean);
472 }
473
474 /* Print RX Rings */
475 if (!netif_msg_rx_status(adapter))
476 goto exit;
477
478 dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
479
480 /* Advanced Receive Descriptor (Read) Format
481 * 63 1 0
482 * +-----------------------------------------------------+
483 * 0 | Packet Buffer Address [63:1] |A0/NSE|
484 * +----------------------------------------------+------+
485 * 8 | Header Buffer Address [63:1] | DD |
486 * +-----------------------------------------------------+
487 *
488 *
489 * Advanced Receive Descriptor (Write-Back) Format
490 *
491 * 63 48 47 32 31 30 21 20 17 16 4 3 0
492 * +------------------------------------------------------+
493 * 0 | Packet IP |SPH| HDR_LEN | RSV|Packet| RSS |
494 * | Checksum Ident | | | | Type | Type |
495 * +------------------------------------------------------+
496 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
497 * +------------------------------------------------------+
498 * 63 48 47 32 31 20 19 0
499 */
500
501 for (n = 0; n < adapter->num_rx_queues; n++) {
502 rx_ring = adapter->rx_ring[n];
503 pr_info("------------------------------------\n");
504 pr_info("RX QUEUE INDEX = %d\n", rx_ring->queue_index);
505 pr_info("------------------------------------\n");
506 pr_info("R [desc] [ PktBuf A0] [ HeadBuf DD] "
507 "[bi->dma ] [bi->skb] <-- Adv Rx Read format\n");
508 pr_info("RWB[desc] [PcsmIpSHl PtRs] [vl er S cks ln] -----"
509 "----------- [bi->skb] <-- Adv Rx Write-Back format\n");
510
511 for (i = 0; i < rx_ring->count; i++) {
512 const char *next_desc;
513 struct igb_rx_buffer *buffer_info;
514 buffer_info = &rx_ring->rx_buffer_info[i];
515 rx_desc = IGB_RX_DESC(rx_ring, i);
516 u0 = (struct my_u0 *)rx_desc;
517 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
518
519 if (i == rx_ring->next_to_use)
520 next_desc = " NTU";
521 else if (i == rx_ring->next_to_clean)
522 next_desc = " NTC";
523 else
524 next_desc = "";
525
526 if (staterr & E1000_RXD_STAT_DD) {
527 /* Descriptor Done */
528 pr_info("%s[0x%03X] %016llX %016llX -------"
529 "--------- %p%s\n", "RWB", i,
530 le64_to_cpu(u0->a),
531 le64_to_cpu(u0->b),
532 buffer_info->skb, next_desc);
533 } else {
534 pr_info("%s[0x%03X] %016llX %016llX %016llX"
535 " %p%s\n", "R ", i,
536 le64_to_cpu(u0->a),
537 le64_to_cpu(u0->b),
538 (u64)buffer_info->dma,
539 buffer_info->skb, next_desc);
540
541 if (netif_msg_pktdata(adapter)) {
542 print_hex_dump(KERN_INFO, "",
543 DUMP_PREFIX_ADDRESS,
544 16, 1,
545 phys_to_virt(buffer_info->dma),
546 IGB_RX_HDR_LEN, true);
547 print_hex_dump(KERN_INFO, "",
548 DUMP_PREFIX_ADDRESS,
549 16, 1,
550 phys_to_virt(
551 buffer_info->page_dma +
552 buffer_info->page_offset),
553 PAGE_SIZE/2, true);
554 }
555 }
556 }
557 }
558
559 exit:
560 return;
561 }
562
563
564 /**
565 * igb_read_clock - read raw cycle counter (to be used by time counter)
566 */
567 static cycle_t igb_read_clock(const struct cyclecounter *tc)
568 {
569 struct igb_adapter *adapter =
570 container_of(tc, struct igb_adapter, cycles);
571 struct e1000_hw *hw = &adapter->hw;
572 u64 stamp = 0;
573 int shift = 0;
574
575 /*
576 * The timestamp latches on lowest register read. For the 82580
577 * the lowest register is SYSTIMR instead of SYSTIML. However we never
578 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
579 */
580 if (hw->mac.type >= e1000_82580) {
581 stamp = rd32(E1000_SYSTIMR) >> 8;
582 shift = IGB_82580_TSYNC_SHIFT;
583 }
584
585 stamp |= (u64)rd32(E1000_SYSTIML) << shift;
586 stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
587 return stamp;
588 }
589
590 /**
591 * igb_get_hw_dev - return device
592 * used by hardware layer to print debugging information
593 **/
594 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
595 {
596 struct igb_adapter *adapter = hw->back;
597 return adapter->netdev;
598 }
599
600 /**
601 * igb_init_module - Driver Registration Routine
602 *
603 * igb_init_module is the first routine called when the driver is
604 * loaded. All it does is register with the PCI subsystem.
605 **/
606 static int __init igb_init_module(void)
607 {
608 int ret;
609 pr_info("%s - version %s\n",
610 igb_driver_string, igb_driver_version);
611
612 pr_info("%s\n", igb_copyright);
613
614 #ifdef CONFIG_IGB_DCA
615 dca_register_notify(&dca_notifier);
616 #endif
617 ret = pci_register_driver(&igb_driver);
618 return ret;
619 }
620
621 module_init(igb_init_module);
622
623 /**
624 * igb_exit_module - Driver Exit Cleanup Routine
625 *
626 * igb_exit_module is called just before the driver is removed
627 * from memory.
628 **/
629 static void __exit igb_exit_module(void)
630 {
631 #ifdef CONFIG_IGB_DCA
632 dca_unregister_notify(&dca_notifier);
633 #endif
634 pci_unregister_driver(&igb_driver);
635 }
636
637 module_exit(igb_exit_module);
638
639 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
640 /**
641 * igb_cache_ring_register - Descriptor ring to register mapping
642 * @adapter: board private structure to initialize
643 *
644 * Once we know the feature-set enabled for the device, we'll cache
645 * the register offset the descriptor ring is assigned to.
646 **/
647 static void igb_cache_ring_register(struct igb_adapter *adapter)
648 {
649 int i = 0, j = 0;
650 u32 rbase_offset = adapter->vfs_allocated_count;
651
652 switch (adapter->hw.mac.type) {
653 case e1000_82576:
654 /* The queues are allocated for virtualization such that VF 0
655 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
656 * In order to avoid collision we start at the first free queue
657 * and continue consuming queues in the same sequence
658 */
659 if (adapter->vfs_allocated_count) {
660 for (; i < adapter->rss_queues; i++)
661 adapter->rx_ring[i]->reg_idx = rbase_offset +
662 Q_IDX_82576(i);
663 }
664 case e1000_82575:
665 case e1000_82580:
666 case e1000_i350:
667 default:
668 for (; i < adapter->num_rx_queues; i++)
669 adapter->rx_ring[i]->reg_idx = rbase_offset + i;
670 for (; j < adapter->num_tx_queues; j++)
671 adapter->tx_ring[j]->reg_idx = rbase_offset + j;
672 break;
673 }
674 }
675
676 static void igb_free_queues(struct igb_adapter *adapter)
677 {
678 int i;
679
680 for (i = 0; i < adapter->num_tx_queues; i++) {
681 kfree(adapter->tx_ring[i]);
682 adapter->tx_ring[i] = NULL;
683 }
684 for (i = 0; i < adapter->num_rx_queues; i++) {
685 kfree(adapter->rx_ring[i]);
686 adapter->rx_ring[i] = NULL;
687 }
688 adapter->num_rx_queues = 0;
689 adapter->num_tx_queues = 0;
690 }
691
692 /**
693 * igb_alloc_queues - Allocate memory for all rings
694 * @adapter: board private structure to initialize
695 *
696 * We allocate one ring per queue at run-time since we don't know the
697 * number of queues at compile-time.
698 **/
699 static int igb_alloc_queues(struct igb_adapter *adapter)
700 {
701 struct igb_ring *ring;
702 int i;
703 int orig_node = adapter->node;
704
705 for (i = 0; i < adapter->num_tx_queues; i++) {
706 if (orig_node == -1) {
707 int cur_node = next_online_node(adapter->node);
708 if (cur_node == MAX_NUMNODES)
709 cur_node = first_online_node;
710 adapter->node = cur_node;
711 }
712 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
713 adapter->node);
714 if (!ring)
715 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
716 if (!ring)
717 goto err;
718 ring->count = adapter->tx_ring_count;
719 ring->queue_index = i;
720 ring->dev = &adapter->pdev->dev;
721 ring->netdev = adapter->netdev;
722 ring->numa_node = adapter->node;
723 /* For 82575, context index must be unique per ring. */
724 if (adapter->hw.mac.type == e1000_82575)
725 set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
726 adapter->tx_ring[i] = ring;
727 }
728 /* Restore the adapter's original node */
729 adapter->node = orig_node;
730
731 for (i = 0; i < adapter->num_rx_queues; i++) {
732 if (orig_node == -1) {
733 int cur_node = next_online_node(adapter->node);
734 if (cur_node == MAX_NUMNODES)
735 cur_node = first_online_node;
736 adapter->node = cur_node;
737 }
738 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
739 adapter->node);
740 if (!ring)
741 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
742 if (!ring)
743 goto err;
744 ring->count = adapter->rx_ring_count;
745 ring->queue_index = i;
746 ring->dev = &adapter->pdev->dev;
747 ring->netdev = adapter->netdev;
748 ring->numa_node = adapter->node;
749 /* set flag indicating ring supports SCTP checksum offload */
750 if (adapter->hw.mac.type >= e1000_82576)
751 set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
752
753 /* On i350, loopback VLAN packets have the tag byte-swapped. */
754 if (adapter->hw.mac.type == e1000_i350)
755 set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags);
756
757 adapter->rx_ring[i] = ring;
758 }
759 /* Restore the adapter's original node */
760 adapter->node = orig_node;
761
762 igb_cache_ring_register(adapter);
763
764 return 0;
765
766 err:
767 /* Restore the adapter's original node */
768 adapter->node = orig_node;
769 igb_free_queues(adapter);
770
771 return -ENOMEM;
772 }
773
774 /**
775 * igb_write_ivar - configure ivar for given MSI-X vector
776 * @hw: pointer to the HW structure
777 * @msix_vector: vector number we are allocating to a given ring
778 * @index: row index of IVAR register to write within IVAR table
779 * @offset: column offset of in IVAR, should be multiple of 8
780 *
781 * This function is intended to handle the writing of the IVAR register
782 * for adapters 82576 and newer. The IVAR table consists of 2 columns,
783 * each containing an cause allocation for an Rx and Tx ring, and a
784 * variable number of rows depending on the number of queues supported.
785 **/
786 static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
787 int index, int offset)
788 {
789 u32 ivar = array_rd32(E1000_IVAR0, index);
790
791 /* clear any bits that are currently set */
792 ivar &= ~((u32)0xFF << offset);
793
794 /* write vector and valid bit */
795 ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
796
797 array_wr32(E1000_IVAR0, index, ivar);
798 }
799
800 #define IGB_N0_QUEUE -1
801 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
802 {
803 struct igb_adapter *adapter = q_vector->adapter;
804 struct e1000_hw *hw = &adapter->hw;
805 int rx_queue = IGB_N0_QUEUE;
806 int tx_queue = IGB_N0_QUEUE;
807 u32 msixbm = 0;
808
809 if (q_vector->rx.ring)
810 rx_queue = q_vector->rx.ring->reg_idx;
811 if (q_vector->tx.ring)
812 tx_queue = q_vector->tx.ring->reg_idx;
813
814 switch (hw->mac.type) {
815 case e1000_82575:
816 /* The 82575 assigns vectors using a bitmask, which matches the
817 bitmask for the EICR/EIMS/EIMC registers. To assign one
818 or more queues to a vector, we write the appropriate bits
819 into the MSIXBM register for that vector. */
820 if (rx_queue > IGB_N0_QUEUE)
821 msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
822 if (tx_queue > IGB_N0_QUEUE)
823 msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
824 if (!adapter->msix_entries && msix_vector == 0)
825 msixbm |= E1000_EIMS_OTHER;
826 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
827 q_vector->eims_value = msixbm;
828 break;
829 case e1000_82576:
830 /*
831 * 82576 uses a table that essentially consists of 2 columns
832 * with 8 rows. The ordering is column-major so we use the
833 * lower 3 bits as the row index, and the 4th bit as the
834 * column offset.
835 */
836 if (rx_queue > IGB_N0_QUEUE)
837 igb_write_ivar(hw, msix_vector,
838 rx_queue & 0x7,
839 (rx_queue & 0x8) << 1);
840 if (tx_queue > IGB_N0_QUEUE)
841 igb_write_ivar(hw, msix_vector,
842 tx_queue & 0x7,
843 ((tx_queue & 0x8) << 1) + 8);
844 q_vector->eims_value = 1 << msix_vector;
845 break;
846 case e1000_82580:
847 case e1000_i350:
848 /*
849 * On 82580 and newer adapters the scheme is similar to 82576
850 * however instead of ordering column-major we have things
851 * ordered row-major. So we traverse the table by using
852 * bit 0 as the column offset, and the remaining bits as the
853 * row index.
854 */
855 if (rx_queue > IGB_N0_QUEUE)
856 igb_write_ivar(hw, msix_vector,
857 rx_queue >> 1,
858 (rx_queue & 0x1) << 4);
859 if (tx_queue > IGB_N0_QUEUE)
860 igb_write_ivar(hw, msix_vector,
861 tx_queue >> 1,
862 ((tx_queue & 0x1) << 4) + 8);
863 q_vector->eims_value = 1 << msix_vector;
864 break;
865 default:
866 BUG();
867 break;
868 }
869
870 /* add q_vector eims value to global eims_enable_mask */
871 adapter->eims_enable_mask |= q_vector->eims_value;
872
873 /* configure q_vector to set itr on first interrupt */
874 q_vector->set_itr = 1;
875 }
876
877 /**
878 * igb_configure_msix - Configure MSI-X hardware
879 *
880 * igb_configure_msix sets up the hardware to properly
881 * generate MSI-X interrupts.
882 **/
883 static void igb_configure_msix(struct igb_adapter *adapter)
884 {
885 u32 tmp;
886 int i, vector = 0;
887 struct e1000_hw *hw = &adapter->hw;
888
889 adapter->eims_enable_mask = 0;
890
891 /* set vector for other causes, i.e. link changes */
892 switch (hw->mac.type) {
893 case e1000_82575:
894 tmp = rd32(E1000_CTRL_EXT);
895 /* enable MSI-X PBA support*/
896 tmp |= E1000_CTRL_EXT_PBA_CLR;
897
898 /* Auto-Mask interrupts upon ICR read. */
899 tmp |= E1000_CTRL_EXT_EIAME;
900 tmp |= E1000_CTRL_EXT_IRCA;
901
902 wr32(E1000_CTRL_EXT, tmp);
903
904 /* enable msix_other interrupt */
905 array_wr32(E1000_MSIXBM(0), vector++,
906 E1000_EIMS_OTHER);
907 adapter->eims_other = E1000_EIMS_OTHER;
908
909 break;
910
911 case e1000_82576:
912 case e1000_82580:
913 case e1000_i350:
914 /* Turn on MSI-X capability first, or our settings
915 * won't stick. And it will take days to debug. */
916 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
917 E1000_GPIE_PBA | E1000_GPIE_EIAME |
918 E1000_GPIE_NSICR);
919
920 /* enable msix_other interrupt */
921 adapter->eims_other = 1 << vector;
922 tmp = (vector++ | E1000_IVAR_VALID) << 8;
923
924 wr32(E1000_IVAR_MISC, tmp);
925 break;
926 default:
927 /* do nothing, since nothing else supports MSI-X */
928 break;
929 } /* switch (hw->mac.type) */
930
931 adapter->eims_enable_mask |= adapter->eims_other;
932
933 for (i = 0; i < adapter->num_q_vectors; i++)
934 igb_assign_vector(adapter->q_vector[i], vector++);
935
936 wrfl();
937 }
938
939 /**
940 * igb_request_msix - Initialize MSI-X interrupts
941 *
942 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
943 * kernel.
944 **/
945 static int igb_request_msix(struct igb_adapter *adapter)
946 {
947 struct net_device *netdev = adapter->netdev;
948 struct e1000_hw *hw = &adapter->hw;
949 int i, err = 0, vector = 0;
950
951 err = request_irq(adapter->msix_entries[vector].vector,
952 igb_msix_other, 0, netdev->name, adapter);
953 if (err)
954 goto out;
955 vector++;
956
957 for (i = 0; i < adapter->num_q_vectors; i++) {
958 struct igb_q_vector *q_vector = adapter->q_vector[i];
959
960 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
961
962 if (q_vector->rx.ring && q_vector->tx.ring)
963 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
964 q_vector->rx.ring->queue_index);
965 else if (q_vector->tx.ring)
966 sprintf(q_vector->name, "%s-tx-%u", netdev->name,
967 q_vector->tx.ring->queue_index);
968 else if (q_vector->rx.ring)
969 sprintf(q_vector->name, "%s-rx-%u", netdev->name,
970 q_vector->rx.ring->queue_index);
971 else
972 sprintf(q_vector->name, "%s-unused", netdev->name);
973
974 err = request_irq(adapter->msix_entries[vector].vector,
975 igb_msix_ring, 0, q_vector->name,
976 q_vector);
977 if (err)
978 goto out;
979 vector++;
980 }
981
982 igb_configure_msix(adapter);
983 return 0;
984 out:
985 return err;
986 }
987
988 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
989 {
990 if (adapter->msix_entries) {
991 pci_disable_msix(adapter->pdev);
992 kfree(adapter->msix_entries);
993 adapter->msix_entries = NULL;
994 } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
995 pci_disable_msi(adapter->pdev);
996 }
997 }
998
999 /**
1000 * igb_free_q_vectors - Free memory allocated for interrupt vectors
1001 * @adapter: board private structure to initialize
1002 *
1003 * This function frees the memory allocated to the q_vectors. In addition if
1004 * NAPI is enabled it will delete any references to the NAPI struct prior
1005 * to freeing the q_vector.
1006 **/
1007 static void igb_free_q_vectors(struct igb_adapter *adapter)
1008 {
1009 int v_idx;
1010
1011 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1012 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1013 adapter->q_vector[v_idx] = NULL;
1014 if (!q_vector)
1015 continue;
1016 netif_napi_del(&q_vector->napi);
1017 kfree(q_vector);
1018 }
1019 adapter->num_q_vectors = 0;
1020 }
1021
1022 /**
1023 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
1024 *
1025 * This function resets the device so that it has 0 rx queues, tx queues, and
1026 * MSI-X interrupts allocated.
1027 */
1028 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
1029 {
1030 igb_free_queues(adapter);
1031 igb_free_q_vectors(adapter);
1032 igb_reset_interrupt_capability(adapter);
1033 }
1034
1035 /**
1036 * igb_set_interrupt_capability - set MSI or MSI-X if supported
1037 *
1038 * Attempt to configure interrupts using the best available
1039 * capabilities of the hardware and kernel.
1040 **/
1041 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1042 {
1043 int err;
1044 int numvecs, i;
1045
1046 /* Number of supported queues. */
1047 adapter->num_rx_queues = adapter->rss_queues;
1048 if (adapter->vfs_allocated_count)
1049 adapter->num_tx_queues = 1;
1050 else
1051 adapter->num_tx_queues = adapter->rss_queues;
1052
1053 /* start with one vector for every rx queue */
1054 numvecs = adapter->num_rx_queues;
1055
1056 /* if tx handler is separate add 1 for every tx queue */
1057 if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1058 numvecs += adapter->num_tx_queues;
1059
1060 /* store the number of vectors reserved for queues */
1061 adapter->num_q_vectors = numvecs;
1062
1063 /* add 1 vector for link status interrupts */
1064 numvecs++;
1065 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1066 GFP_KERNEL);
1067 if (!adapter->msix_entries)
1068 goto msi_only;
1069
1070 for (i = 0; i < numvecs; i++)
1071 adapter->msix_entries[i].entry = i;
1072
1073 err = pci_enable_msix(adapter->pdev,
1074 adapter->msix_entries,
1075 numvecs);
1076 if (err == 0)
1077 goto out;
1078
1079 igb_reset_interrupt_capability(adapter);
1080
1081 /* If we can't do MSI-X, try MSI */
1082 msi_only:
1083 #ifdef CONFIG_PCI_IOV
1084 /* disable SR-IOV for non MSI-X configurations */
1085 if (adapter->vf_data) {
1086 struct e1000_hw *hw = &adapter->hw;
1087 /* disable iov and allow time for transactions to clear */
1088 pci_disable_sriov(adapter->pdev);
1089 msleep(500);
1090
1091 kfree(adapter->vf_data);
1092 adapter->vf_data = NULL;
1093 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1094 wrfl();
1095 msleep(100);
1096 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1097 }
1098 #endif
1099 adapter->vfs_allocated_count = 0;
1100 adapter->rss_queues = 1;
1101 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1102 adapter->num_rx_queues = 1;
1103 adapter->num_tx_queues = 1;
1104 adapter->num_q_vectors = 1;
1105 if (!pci_enable_msi(adapter->pdev))
1106 adapter->flags |= IGB_FLAG_HAS_MSI;
1107 out:
1108 /* Notify the stack of the (possibly) reduced queue counts. */
1109 netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1110 return netif_set_real_num_rx_queues(adapter->netdev,
1111 adapter->num_rx_queues);
1112 }
1113
1114 /**
1115 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1116 * @adapter: board private structure to initialize
1117 *
1118 * We allocate one q_vector per queue interrupt. If allocation fails we
1119 * return -ENOMEM.
1120 **/
1121 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1122 {
1123 struct igb_q_vector *q_vector;
1124 struct e1000_hw *hw = &adapter->hw;
1125 int v_idx;
1126 int orig_node = adapter->node;
1127
1128 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1129 if ((adapter->num_q_vectors == (adapter->num_rx_queues +
1130 adapter->num_tx_queues)) &&
1131 (adapter->num_rx_queues == v_idx))
1132 adapter->node = orig_node;
1133 if (orig_node == -1) {
1134 int cur_node = next_online_node(adapter->node);
1135 if (cur_node == MAX_NUMNODES)
1136 cur_node = first_online_node;
1137 adapter->node = cur_node;
1138 }
1139 q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL,
1140 adapter->node);
1141 if (!q_vector)
1142 q_vector = kzalloc(sizeof(struct igb_q_vector),
1143 GFP_KERNEL);
1144 if (!q_vector)
1145 goto err_out;
1146 q_vector->adapter = adapter;
1147 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1148 q_vector->itr_val = IGB_START_ITR;
1149 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1150 adapter->q_vector[v_idx] = q_vector;
1151 }
1152 /* Restore the adapter's original node */
1153 adapter->node = orig_node;
1154
1155 return 0;
1156
1157 err_out:
1158 /* Restore the adapter's original node */
1159 adapter->node = orig_node;
1160 igb_free_q_vectors(adapter);
1161 return -ENOMEM;
1162 }
1163
1164 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1165 int ring_idx, int v_idx)
1166 {
1167 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1168
1169 q_vector->rx.ring = adapter->rx_ring[ring_idx];
1170 q_vector->rx.ring->q_vector = q_vector;
1171 q_vector->rx.count++;
1172 q_vector->itr_val = adapter->rx_itr_setting;
1173 if (q_vector->itr_val && q_vector->itr_val <= 3)
1174 q_vector->itr_val = IGB_START_ITR;
1175 }
1176
1177 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1178 int ring_idx, int v_idx)
1179 {
1180 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1181
1182 q_vector->tx.ring = adapter->tx_ring[ring_idx];
1183 q_vector->tx.ring->q_vector = q_vector;
1184 q_vector->tx.count++;
1185 q_vector->itr_val = adapter->tx_itr_setting;
1186 q_vector->tx.work_limit = adapter->tx_work_limit;
1187 if (q_vector->itr_val && q_vector->itr_val <= 3)
1188 q_vector->itr_val = IGB_START_ITR;
1189 }
1190
1191 /**
1192 * igb_map_ring_to_vector - maps allocated queues to vectors
1193 *
1194 * This function maps the recently allocated queues to vectors.
1195 **/
1196 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1197 {
1198 int i;
1199 int v_idx = 0;
1200
1201 if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1202 (adapter->num_q_vectors < adapter->num_tx_queues))
1203 return -ENOMEM;
1204
1205 if (adapter->num_q_vectors >=
1206 (adapter->num_rx_queues + adapter->num_tx_queues)) {
1207 for (i = 0; i < adapter->num_rx_queues; i++)
1208 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1209 for (i = 0; i < adapter->num_tx_queues; i++)
1210 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1211 } else {
1212 for (i = 0; i < adapter->num_rx_queues; i++) {
1213 if (i < adapter->num_tx_queues)
1214 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1215 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1216 }
1217 for (; i < adapter->num_tx_queues; i++)
1218 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1219 }
1220 return 0;
1221 }
1222
1223 /**
1224 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1225 *
1226 * This function initializes the interrupts and allocates all of the queues.
1227 **/
1228 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1229 {
1230 struct pci_dev *pdev = adapter->pdev;
1231 int err;
1232
1233 err = igb_set_interrupt_capability(adapter);
1234 if (err)
1235 return err;
1236
1237 err = igb_alloc_q_vectors(adapter);
1238 if (err) {
1239 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1240 goto err_alloc_q_vectors;
1241 }
1242
1243 err = igb_alloc_queues(adapter);
1244 if (err) {
1245 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1246 goto err_alloc_queues;
1247 }
1248
1249 err = igb_map_ring_to_vector(adapter);
1250 if (err) {
1251 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1252 goto err_map_queues;
1253 }
1254
1255
1256 return 0;
1257 err_map_queues:
1258 igb_free_queues(adapter);
1259 err_alloc_queues:
1260 igb_free_q_vectors(adapter);
1261 err_alloc_q_vectors:
1262 igb_reset_interrupt_capability(adapter);
1263 return err;
1264 }
1265
1266 /**
1267 * igb_request_irq - initialize interrupts
1268 *
1269 * Attempts to configure interrupts using the best available
1270 * capabilities of the hardware and kernel.
1271 **/
1272 static int igb_request_irq(struct igb_adapter *adapter)
1273 {
1274 struct net_device *netdev = adapter->netdev;
1275 struct pci_dev *pdev = adapter->pdev;
1276 int err = 0;
1277
1278 if (adapter->msix_entries) {
1279 err = igb_request_msix(adapter);
1280 if (!err)
1281 goto request_done;
1282 /* fall back to MSI */
1283 igb_clear_interrupt_scheme(adapter);
1284 if (!pci_enable_msi(pdev))
1285 adapter->flags |= IGB_FLAG_HAS_MSI;
1286 igb_free_all_tx_resources(adapter);
1287 igb_free_all_rx_resources(adapter);
1288 adapter->num_tx_queues = 1;
1289 adapter->num_rx_queues = 1;
1290 adapter->num_q_vectors = 1;
1291 err = igb_alloc_q_vectors(adapter);
1292 if (err) {
1293 dev_err(&pdev->dev,
1294 "Unable to allocate memory for vectors\n");
1295 goto request_done;
1296 }
1297 err = igb_alloc_queues(adapter);
1298 if (err) {
1299 dev_err(&pdev->dev,
1300 "Unable to allocate memory for queues\n");
1301 igb_free_q_vectors(adapter);
1302 goto request_done;
1303 }
1304 igb_setup_all_tx_resources(adapter);
1305 igb_setup_all_rx_resources(adapter);
1306 }
1307
1308 igb_assign_vector(adapter->q_vector[0], 0);
1309
1310 if (adapter->flags & IGB_FLAG_HAS_MSI) {
1311 err = request_irq(pdev->irq, igb_intr_msi, 0,
1312 netdev->name, adapter);
1313 if (!err)
1314 goto request_done;
1315
1316 /* fall back to legacy interrupts */
1317 igb_reset_interrupt_capability(adapter);
1318 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1319 }
1320
1321 err = request_irq(pdev->irq, igb_intr, IRQF_SHARED,
1322 netdev->name, adapter);
1323
1324 if (err)
1325 dev_err(&pdev->dev, "Error %d getting interrupt\n",
1326 err);
1327
1328 request_done:
1329 return err;
1330 }
1331
1332 static void igb_free_irq(struct igb_adapter *adapter)
1333 {
1334 if (adapter->msix_entries) {
1335 int vector = 0, i;
1336
1337 free_irq(adapter->msix_entries[vector++].vector, adapter);
1338
1339 for (i = 0; i < adapter->num_q_vectors; i++)
1340 free_irq(adapter->msix_entries[vector++].vector,
1341 adapter->q_vector[i]);
1342 } else {
1343 free_irq(adapter->pdev->irq, adapter);
1344 }
1345 }
1346
1347 /**
1348 * igb_irq_disable - Mask off interrupt generation on the NIC
1349 * @adapter: board private structure
1350 **/
1351 static void igb_irq_disable(struct igb_adapter *adapter)
1352 {
1353 struct e1000_hw *hw = &adapter->hw;
1354
1355 /*
1356 * we need to be careful when disabling interrupts. The VFs are also
1357 * mapped into these registers and so clearing the bits can cause
1358 * issues on the VF drivers so we only need to clear what we set
1359 */
1360 if (adapter->msix_entries) {
1361 u32 regval = rd32(E1000_EIAM);
1362 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1363 wr32(E1000_EIMC, adapter->eims_enable_mask);
1364 regval = rd32(E1000_EIAC);
1365 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1366 }
1367
1368 wr32(E1000_IAM, 0);
1369 wr32(E1000_IMC, ~0);
1370 wrfl();
1371 if (adapter->msix_entries) {
1372 int i;
1373 for (i = 0; i < adapter->num_q_vectors; i++)
1374 synchronize_irq(adapter->msix_entries[i].vector);
1375 } else {
1376 synchronize_irq(adapter->pdev->irq);
1377 }
1378 }
1379
1380 /**
1381 * igb_irq_enable - Enable default interrupt generation settings
1382 * @adapter: board private structure
1383 **/
1384 static void igb_irq_enable(struct igb_adapter *adapter)
1385 {
1386 struct e1000_hw *hw = &adapter->hw;
1387
1388 if (adapter->msix_entries) {
1389 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA;
1390 u32 regval = rd32(E1000_EIAC);
1391 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1392 regval = rd32(E1000_EIAM);
1393 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1394 wr32(E1000_EIMS, adapter->eims_enable_mask);
1395 if (adapter->vfs_allocated_count) {
1396 wr32(E1000_MBVFIMR, 0xFF);
1397 ims |= E1000_IMS_VMMB;
1398 }
1399 wr32(E1000_IMS, ims);
1400 } else {
1401 wr32(E1000_IMS, IMS_ENABLE_MASK |
1402 E1000_IMS_DRSTA);
1403 wr32(E1000_IAM, IMS_ENABLE_MASK |
1404 E1000_IMS_DRSTA);
1405 }
1406 }
1407
1408 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1409 {
1410 struct e1000_hw *hw = &adapter->hw;
1411 u16 vid = adapter->hw.mng_cookie.vlan_id;
1412 u16 old_vid = adapter->mng_vlan_id;
1413
1414 if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1415 /* add VID to filter table */
1416 igb_vfta_set(hw, vid, true);
1417 adapter->mng_vlan_id = vid;
1418 } else {
1419 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1420 }
1421
1422 if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1423 (vid != old_vid) &&
1424 !test_bit(old_vid, adapter->active_vlans)) {
1425 /* remove VID from filter table */
1426 igb_vfta_set(hw, old_vid, false);
1427 }
1428 }
1429
1430 /**
1431 * igb_release_hw_control - release control of the h/w to f/w
1432 * @adapter: address of board private structure
1433 *
1434 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1435 * For ASF and Pass Through versions of f/w this means that the
1436 * driver is no longer loaded.
1437 *
1438 **/
1439 static void igb_release_hw_control(struct igb_adapter *adapter)
1440 {
1441 struct e1000_hw *hw = &adapter->hw;
1442 u32 ctrl_ext;
1443
1444 /* Let firmware take over control of h/w */
1445 ctrl_ext = rd32(E1000_CTRL_EXT);
1446 wr32(E1000_CTRL_EXT,
1447 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1448 }
1449
1450 /**
1451 * igb_get_hw_control - get control of the h/w from f/w
1452 * @adapter: address of board private structure
1453 *
1454 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1455 * For ASF and Pass Through versions of f/w this means that
1456 * the driver is loaded.
1457 *
1458 **/
1459 static void igb_get_hw_control(struct igb_adapter *adapter)
1460 {
1461 struct e1000_hw *hw = &adapter->hw;
1462 u32 ctrl_ext;
1463
1464 /* Let firmware know the driver has taken over */
1465 ctrl_ext = rd32(E1000_CTRL_EXT);
1466 wr32(E1000_CTRL_EXT,
1467 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1468 }
1469
1470 /**
1471 * igb_configure - configure the hardware for RX and TX
1472 * @adapter: private board structure
1473 **/
1474 static void igb_configure(struct igb_adapter *adapter)
1475 {
1476 struct net_device *netdev = adapter->netdev;
1477 int i;
1478
1479 igb_get_hw_control(adapter);
1480 igb_set_rx_mode(netdev);
1481
1482 igb_restore_vlan(adapter);
1483
1484 igb_setup_tctl(adapter);
1485 igb_setup_mrqc(adapter);
1486 igb_setup_rctl(adapter);
1487
1488 igb_configure_tx(adapter);
1489 igb_configure_rx(adapter);
1490
1491 igb_rx_fifo_flush_82575(&adapter->hw);
1492
1493 /* call igb_desc_unused which always leaves
1494 * at least 1 descriptor unused to make sure
1495 * next_to_use != next_to_clean */
1496 for (i = 0; i < adapter->num_rx_queues; i++) {
1497 struct igb_ring *ring = adapter->rx_ring[i];
1498 igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1499 }
1500 }
1501
1502 /**
1503 * igb_power_up_link - Power up the phy/serdes link
1504 * @adapter: address of board private structure
1505 **/
1506 void igb_power_up_link(struct igb_adapter *adapter)
1507 {
1508 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1509 igb_power_up_phy_copper(&adapter->hw);
1510 else
1511 igb_power_up_serdes_link_82575(&adapter->hw);
1512 igb_reset_phy(&adapter->hw);
1513 }
1514
1515 /**
1516 * igb_power_down_link - Power down the phy/serdes link
1517 * @adapter: address of board private structure
1518 */
1519 static void igb_power_down_link(struct igb_adapter *adapter)
1520 {
1521 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1522 igb_power_down_phy_copper_82575(&adapter->hw);
1523 else
1524 igb_shutdown_serdes_link_82575(&adapter->hw);
1525 }
1526
1527 /**
1528 * igb_up - Open the interface and prepare it to handle traffic
1529 * @adapter: board private structure
1530 **/
1531 int igb_up(struct igb_adapter *adapter)
1532 {
1533 struct e1000_hw *hw = &adapter->hw;
1534 int i;
1535
1536 /* hardware has been reset, we need to reload some things */
1537 igb_configure(adapter);
1538
1539 clear_bit(__IGB_DOWN, &adapter->state);
1540
1541 for (i = 0; i < adapter->num_q_vectors; i++)
1542 napi_enable(&(adapter->q_vector[i]->napi));
1543
1544 if (adapter->msix_entries)
1545 igb_configure_msix(adapter);
1546 else
1547 igb_assign_vector(adapter->q_vector[0], 0);
1548
1549 /* Clear any pending interrupts. */
1550 rd32(E1000_ICR);
1551 igb_irq_enable(adapter);
1552
1553 /* notify VFs that reset has been completed */
1554 if (adapter->vfs_allocated_count) {
1555 u32 reg_data = rd32(E1000_CTRL_EXT);
1556 reg_data |= E1000_CTRL_EXT_PFRSTD;
1557 wr32(E1000_CTRL_EXT, reg_data);
1558 }
1559
1560 netif_tx_start_all_queues(adapter->netdev);
1561
1562 /* start the watchdog. */
1563 hw->mac.get_link_status = 1;
1564 schedule_work(&adapter->watchdog_task);
1565
1566 return 0;
1567 }
1568
1569 void igb_down(struct igb_adapter *adapter)
1570 {
1571 struct net_device *netdev = adapter->netdev;
1572 struct e1000_hw *hw = &adapter->hw;
1573 u32 tctl, rctl;
1574 int i;
1575
1576 /* signal that we're down so the interrupt handler does not
1577 * reschedule our watchdog timer */
1578 set_bit(__IGB_DOWN, &adapter->state);
1579
1580 /* disable receives in the hardware */
1581 rctl = rd32(E1000_RCTL);
1582 wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1583 /* flush and sleep below */
1584
1585 netif_tx_stop_all_queues(netdev);
1586
1587 /* disable transmits in the hardware */
1588 tctl = rd32(E1000_TCTL);
1589 tctl &= ~E1000_TCTL_EN;
1590 wr32(E1000_TCTL, tctl);
1591 /* flush both disables and wait for them to finish */
1592 wrfl();
1593 msleep(10);
1594
1595 for (i = 0; i < adapter->num_q_vectors; i++)
1596 napi_disable(&(adapter->q_vector[i]->napi));
1597
1598 igb_irq_disable(adapter);
1599
1600 del_timer_sync(&adapter->watchdog_timer);
1601 del_timer_sync(&adapter->phy_info_timer);
1602
1603 netif_carrier_off(netdev);
1604
1605 /* record the stats before reset*/
1606 spin_lock(&adapter->stats64_lock);
1607 igb_update_stats(adapter, &adapter->stats64);
1608 spin_unlock(&adapter->stats64_lock);
1609
1610 adapter->link_speed = 0;
1611 adapter->link_duplex = 0;
1612
1613 if (!pci_channel_offline(adapter->pdev))
1614 igb_reset(adapter);
1615 igb_clean_all_tx_rings(adapter);
1616 igb_clean_all_rx_rings(adapter);
1617 #ifdef CONFIG_IGB_DCA
1618
1619 /* since we reset the hardware DCA settings were cleared */
1620 igb_setup_dca(adapter);
1621 #endif
1622 }
1623
1624 void igb_reinit_locked(struct igb_adapter *adapter)
1625 {
1626 WARN_ON(in_interrupt());
1627 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1628 msleep(1);
1629 igb_down(adapter);
1630 igb_up(adapter);
1631 clear_bit(__IGB_RESETTING, &adapter->state);
1632 }
1633
1634 void igb_reset(struct igb_adapter *adapter)
1635 {
1636 struct pci_dev *pdev = adapter->pdev;
1637 struct e1000_hw *hw = &adapter->hw;
1638 struct e1000_mac_info *mac = &hw->mac;
1639 struct e1000_fc_info *fc = &hw->fc;
1640 u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1641 u16 hwm;
1642
1643 /* Repartition Pba for greater than 9k mtu
1644 * To take effect CTRL.RST is required.
1645 */
1646 switch (mac->type) {
1647 case e1000_i350:
1648 case e1000_82580:
1649 pba = rd32(E1000_RXPBS);
1650 pba = igb_rxpbs_adjust_82580(pba);
1651 break;
1652 case e1000_82576:
1653 pba = rd32(E1000_RXPBS);
1654 pba &= E1000_RXPBS_SIZE_MASK_82576;
1655 break;
1656 case e1000_82575:
1657 default:
1658 pba = E1000_PBA_34K;
1659 break;
1660 }
1661
1662 if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1663 (mac->type < e1000_82576)) {
1664 /* adjust PBA for jumbo frames */
1665 wr32(E1000_PBA, pba);
1666
1667 /* To maintain wire speed transmits, the Tx FIFO should be
1668 * large enough to accommodate two full transmit packets,
1669 * rounded up to the next 1KB and expressed in KB. Likewise,
1670 * the Rx FIFO should be large enough to accommodate at least
1671 * one full receive packet and is similarly rounded up and
1672 * expressed in KB. */
1673 pba = rd32(E1000_PBA);
1674 /* upper 16 bits has Tx packet buffer allocation size in KB */
1675 tx_space = pba >> 16;
1676 /* lower 16 bits has Rx packet buffer allocation size in KB */
1677 pba &= 0xffff;
1678 /* the tx fifo also stores 16 bytes of information about the tx
1679 * but don't include ethernet FCS because hardware appends it */
1680 min_tx_space = (adapter->max_frame_size +
1681 sizeof(union e1000_adv_tx_desc) -
1682 ETH_FCS_LEN) * 2;
1683 min_tx_space = ALIGN(min_tx_space, 1024);
1684 min_tx_space >>= 10;
1685 /* software strips receive CRC, so leave room for it */
1686 min_rx_space = adapter->max_frame_size;
1687 min_rx_space = ALIGN(min_rx_space, 1024);
1688 min_rx_space >>= 10;
1689
1690 /* If current Tx allocation is less than the min Tx FIFO size,
1691 * and the min Tx FIFO size is less than the current Rx FIFO
1692 * allocation, take space away from current Rx allocation */
1693 if (tx_space < min_tx_space &&
1694 ((min_tx_space - tx_space) < pba)) {
1695 pba = pba - (min_tx_space - tx_space);
1696
1697 /* if short on rx space, rx wins and must trump tx
1698 * adjustment */
1699 if (pba < min_rx_space)
1700 pba = min_rx_space;
1701 }
1702 wr32(E1000_PBA, pba);
1703 }
1704
1705 /* flow control settings */
1706 /* The high water mark must be low enough to fit one full frame
1707 * (or the size used for early receive) above it in the Rx FIFO.
1708 * Set it to the lower of:
1709 * - 90% of the Rx FIFO size, or
1710 * - the full Rx FIFO size minus one full frame */
1711 hwm = min(((pba << 10) * 9 / 10),
1712 ((pba << 10) - 2 * adapter->max_frame_size));
1713
1714 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
1715 fc->low_water = fc->high_water - 16;
1716 fc->pause_time = 0xFFFF;
1717 fc->send_xon = 1;
1718 fc->current_mode = fc->requested_mode;
1719
1720 /* disable receive for all VFs and wait one second */
1721 if (adapter->vfs_allocated_count) {
1722 int i;
1723 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1724 adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1725
1726 /* ping all the active vfs to let them know we are going down */
1727 igb_ping_all_vfs(adapter);
1728
1729 /* disable transmits and receives */
1730 wr32(E1000_VFRE, 0);
1731 wr32(E1000_VFTE, 0);
1732 }
1733
1734 /* Allow time for pending master requests to run */
1735 hw->mac.ops.reset_hw(hw);
1736 wr32(E1000_WUC, 0);
1737
1738 if (hw->mac.ops.init_hw(hw))
1739 dev_err(&pdev->dev, "Hardware Error\n");
1740
1741 igb_init_dmac(adapter, pba);
1742 if (!netif_running(adapter->netdev))
1743 igb_power_down_link(adapter);
1744
1745 igb_update_mng_vlan(adapter);
1746
1747 /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1748 wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1749
1750 igb_get_phy_info(hw);
1751 }
1752
1753 static netdev_features_t igb_fix_features(struct net_device *netdev,
1754 netdev_features_t features)
1755 {
1756 /*
1757 * Since there is no support for separate rx/tx vlan accel
1758 * enable/disable make sure tx flag is always in same state as rx.
1759 */
1760 if (features & NETIF_F_HW_VLAN_RX)
1761 features |= NETIF_F_HW_VLAN_TX;
1762 else
1763 features &= ~NETIF_F_HW_VLAN_TX;
1764
1765 return features;
1766 }
1767
1768 static int igb_set_features(struct net_device *netdev,
1769 netdev_features_t features)
1770 {
1771 netdev_features_t changed = netdev->features ^ features;
1772
1773 if (changed & NETIF_F_HW_VLAN_RX)
1774 igb_vlan_mode(netdev, features);
1775
1776 return 0;
1777 }
1778
1779 static const struct net_device_ops igb_netdev_ops = {
1780 .ndo_open = igb_open,
1781 .ndo_stop = igb_close,
1782 .ndo_start_xmit = igb_xmit_frame,
1783 .ndo_get_stats64 = igb_get_stats64,
1784 .ndo_set_rx_mode = igb_set_rx_mode,
1785 .ndo_set_mac_address = igb_set_mac,
1786 .ndo_change_mtu = igb_change_mtu,
1787 .ndo_do_ioctl = igb_ioctl,
1788 .ndo_tx_timeout = igb_tx_timeout,
1789 .ndo_validate_addr = eth_validate_addr,
1790 .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid,
1791 .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid,
1792 .ndo_set_vf_mac = igb_ndo_set_vf_mac,
1793 .ndo_set_vf_vlan = igb_ndo_set_vf_vlan,
1794 .ndo_set_vf_tx_rate = igb_ndo_set_vf_bw,
1795 .ndo_get_vf_config = igb_ndo_get_vf_config,
1796 #ifdef CONFIG_NET_POLL_CONTROLLER
1797 .ndo_poll_controller = igb_netpoll,
1798 #endif
1799 .ndo_fix_features = igb_fix_features,
1800 .ndo_set_features = igb_set_features,
1801 };
1802
1803 /**
1804 * igb_probe - Device Initialization Routine
1805 * @pdev: PCI device information struct
1806 * @ent: entry in igb_pci_tbl
1807 *
1808 * Returns 0 on success, negative on failure
1809 *
1810 * igb_probe initializes an adapter identified by a pci_dev structure.
1811 * The OS initialization, configuring of the adapter private structure,
1812 * and a hardware reset occur.
1813 **/
1814 static int __devinit igb_probe(struct pci_dev *pdev,
1815 const struct pci_device_id *ent)
1816 {
1817 struct net_device *netdev;
1818 struct igb_adapter *adapter;
1819 struct e1000_hw *hw;
1820 u16 eeprom_data = 0;
1821 s32 ret_val;
1822 static int global_quad_port_a; /* global quad port a indication */
1823 const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1824 unsigned long mmio_start, mmio_len;
1825 int err, pci_using_dac;
1826 u16 eeprom_apme_mask = IGB_EEPROM_APME;
1827 u8 part_str[E1000_PBANUM_LENGTH];
1828
1829 /* Catch broken hardware that put the wrong VF device ID in
1830 * the PCIe SR-IOV capability.
1831 */
1832 if (pdev->is_virtfn) {
1833 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1834 pci_name(pdev), pdev->vendor, pdev->device);
1835 return -EINVAL;
1836 }
1837
1838 err = pci_enable_device_mem(pdev);
1839 if (err)
1840 return err;
1841
1842 pci_using_dac = 0;
1843 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1844 if (!err) {
1845 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1846 if (!err)
1847 pci_using_dac = 1;
1848 } else {
1849 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1850 if (err) {
1851 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1852 if (err) {
1853 dev_err(&pdev->dev, "No usable DMA "
1854 "configuration, aborting\n");
1855 goto err_dma;
1856 }
1857 }
1858 }
1859
1860 err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1861 IORESOURCE_MEM),
1862 igb_driver_name);
1863 if (err)
1864 goto err_pci_reg;
1865
1866 pci_enable_pcie_error_reporting(pdev);
1867
1868 pci_set_master(pdev);
1869 pci_save_state(pdev);
1870
1871 err = -ENOMEM;
1872 netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1873 IGB_MAX_TX_QUEUES);
1874 if (!netdev)
1875 goto err_alloc_etherdev;
1876
1877 SET_NETDEV_DEV(netdev, &pdev->dev);
1878
1879 pci_set_drvdata(pdev, netdev);
1880 adapter = netdev_priv(netdev);
1881 adapter->netdev = netdev;
1882 adapter->pdev = pdev;
1883 hw = &adapter->hw;
1884 hw->back = adapter;
1885 adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1886
1887 mmio_start = pci_resource_start(pdev, 0);
1888 mmio_len = pci_resource_len(pdev, 0);
1889
1890 err = -EIO;
1891 hw->hw_addr = ioremap(mmio_start, mmio_len);
1892 if (!hw->hw_addr)
1893 goto err_ioremap;
1894
1895 netdev->netdev_ops = &igb_netdev_ops;
1896 igb_set_ethtool_ops(netdev);
1897 netdev->watchdog_timeo = 5 * HZ;
1898
1899 strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1900
1901 netdev->mem_start = mmio_start;
1902 netdev->mem_end = mmio_start + mmio_len;
1903
1904 /* PCI config space info */
1905 hw->vendor_id = pdev->vendor;
1906 hw->device_id = pdev->device;
1907 hw->revision_id = pdev->revision;
1908 hw->subsystem_vendor_id = pdev->subsystem_vendor;
1909 hw->subsystem_device_id = pdev->subsystem_device;
1910
1911 /* Copy the default MAC, PHY and NVM function pointers */
1912 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1913 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1914 memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1915 /* Initialize skew-specific constants */
1916 err = ei->get_invariants(hw);
1917 if (err)
1918 goto err_sw_init;
1919
1920 /* setup the private structure */
1921 err = igb_sw_init(adapter);
1922 if (err)
1923 goto err_sw_init;
1924
1925 igb_get_bus_info_pcie(hw);
1926
1927 hw->phy.autoneg_wait_to_complete = false;
1928
1929 /* Copper options */
1930 if (hw->phy.media_type == e1000_media_type_copper) {
1931 hw->phy.mdix = AUTO_ALL_MODES;
1932 hw->phy.disable_polarity_correction = false;
1933 hw->phy.ms_type = e1000_ms_hw_default;
1934 }
1935
1936 if (igb_check_reset_block(hw))
1937 dev_info(&pdev->dev,
1938 "PHY reset is blocked due to SOL/IDER session.\n");
1939
1940 /*
1941 * features is initialized to 0 in allocation, it might have bits
1942 * set by igb_sw_init so we should use an or instead of an
1943 * assignment.
1944 */
1945 netdev->features |= NETIF_F_SG |
1946 NETIF_F_IP_CSUM |
1947 NETIF_F_IPV6_CSUM |
1948 NETIF_F_TSO |
1949 NETIF_F_TSO6 |
1950 NETIF_F_RXHASH |
1951 NETIF_F_RXCSUM |
1952 NETIF_F_HW_VLAN_RX |
1953 NETIF_F_HW_VLAN_TX;
1954
1955 /* copy netdev features into list of user selectable features */
1956 netdev->hw_features |= netdev->features;
1957
1958 /* set this bit last since it cannot be part of hw_features */
1959 netdev->features |= NETIF_F_HW_VLAN_FILTER;
1960
1961 netdev->vlan_features |= NETIF_F_TSO |
1962 NETIF_F_TSO6 |
1963 NETIF_F_IP_CSUM |
1964 NETIF_F_IPV6_CSUM |
1965 NETIF_F_SG;
1966
1967 if (pci_using_dac) {
1968 netdev->features |= NETIF_F_HIGHDMA;
1969 netdev->vlan_features |= NETIF_F_HIGHDMA;
1970 }
1971
1972 if (hw->mac.type >= e1000_82576) {
1973 netdev->hw_features |= NETIF_F_SCTP_CSUM;
1974 netdev->features |= NETIF_F_SCTP_CSUM;
1975 }
1976
1977 netdev->priv_flags |= IFF_UNICAST_FLT;
1978
1979 adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1980
1981 /* before reading the NVM, reset the controller to put the device in a
1982 * known good starting state */
1983 hw->mac.ops.reset_hw(hw);
1984
1985 /* make sure the NVM is good */
1986 if (hw->nvm.ops.validate(hw) < 0) {
1987 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1988 err = -EIO;
1989 goto err_eeprom;
1990 }
1991
1992 /* copy the MAC address out of the NVM */
1993 if (hw->mac.ops.read_mac_addr(hw))
1994 dev_err(&pdev->dev, "NVM Read Error\n");
1995
1996 memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1997 memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1998
1999 if (!is_valid_ether_addr(netdev->perm_addr)) {
2000 dev_err(&pdev->dev, "Invalid MAC Address\n");
2001 err = -EIO;
2002 goto err_eeprom;
2003 }
2004
2005 setup_timer(&adapter->watchdog_timer, igb_watchdog,
2006 (unsigned long) adapter);
2007 setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2008 (unsigned long) adapter);
2009
2010 INIT_WORK(&adapter->reset_task, igb_reset_task);
2011 INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2012
2013 /* Initialize link properties that are user-changeable */
2014 adapter->fc_autoneg = true;
2015 hw->mac.autoneg = true;
2016 hw->phy.autoneg_advertised = 0x2f;
2017
2018 hw->fc.requested_mode = e1000_fc_default;
2019 hw->fc.current_mode = e1000_fc_default;
2020
2021 igb_validate_mdi_setting(hw);
2022
2023 /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2024 * enable the ACPI Magic Packet filter
2025 */
2026
2027 if (hw->bus.func == 0)
2028 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2029 else if (hw->mac.type >= e1000_82580)
2030 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2031 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2032 &eeprom_data);
2033 else if (hw->bus.func == 1)
2034 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2035
2036 if (eeprom_data & eeprom_apme_mask)
2037 adapter->eeprom_wol |= E1000_WUFC_MAG;
2038
2039 /* now that we have the eeprom settings, apply the special cases where
2040 * the eeprom may be wrong or the board simply won't support wake on
2041 * lan on a particular port */
2042 switch (pdev->device) {
2043 case E1000_DEV_ID_82575GB_QUAD_COPPER:
2044 adapter->eeprom_wol = 0;
2045 break;
2046 case E1000_DEV_ID_82575EB_FIBER_SERDES:
2047 case E1000_DEV_ID_82576_FIBER:
2048 case E1000_DEV_ID_82576_SERDES:
2049 /* Wake events only supported on port A for dual fiber
2050 * regardless of eeprom setting */
2051 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2052 adapter->eeprom_wol = 0;
2053 break;
2054 case E1000_DEV_ID_82576_QUAD_COPPER:
2055 case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2056 /* if quad port adapter, disable WoL on all but port A */
2057 if (global_quad_port_a != 0)
2058 adapter->eeprom_wol = 0;
2059 else
2060 adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2061 /* Reset for multiple quad port adapters */
2062 if (++global_quad_port_a == 4)
2063 global_quad_port_a = 0;
2064 break;
2065 }
2066
2067 /* initialize the wol settings based on the eeprom settings */
2068 adapter->wol = adapter->eeprom_wol;
2069 device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2070
2071 /* reset the hardware with the new settings */
2072 igb_reset(adapter);
2073
2074 /* let the f/w know that the h/w is now under the control of the
2075 * driver. */
2076 igb_get_hw_control(adapter);
2077
2078 strcpy(netdev->name, "eth%d");
2079 err = register_netdev(netdev);
2080 if (err)
2081 goto err_register;
2082
2083 /* carrier off reporting is important to ethtool even BEFORE open */
2084 netif_carrier_off(netdev);
2085
2086 #ifdef CONFIG_IGB_DCA
2087 if (dca_add_requester(&pdev->dev) == 0) {
2088 adapter->flags |= IGB_FLAG_DCA_ENABLED;
2089 dev_info(&pdev->dev, "DCA enabled\n");
2090 igb_setup_dca(adapter);
2091 }
2092
2093 #endif
2094 /* do hw tstamp init after resetting */
2095 igb_init_hw_timer(adapter);
2096
2097 dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2098 /* print bus type/speed/width info */
2099 dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2100 netdev->name,
2101 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2102 (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2103 "unknown"),
2104 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2105 (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2106 (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2107 "unknown"),
2108 netdev->dev_addr);
2109
2110 ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2111 if (ret_val)
2112 strcpy(part_str, "Unknown");
2113 dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2114 dev_info(&pdev->dev,
2115 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2116 adapter->msix_entries ? "MSI-X" :
2117 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2118 adapter->num_rx_queues, adapter->num_tx_queues);
2119 switch (hw->mac.type) {
2120 case e1000_i350:
2121 igb_set_eee_i350(hw);
2122 break;
2123 default:
2124 break;
2125 }
2126
2127 pm_runtime_put_noidle(&pdev->dev);
2128 return 0;
2129
2130 err_register:
2131 igb_release_hw_control(adapter);
2132 err_eeprom:
2133 if (!igb_check_reset_block(hw))
2134 igb_reset_phy(hw);
2135
2136 if (hw->flash_address)
2137 iounmap(hw->flash_address);
2138 err_sw_init:
2139 igb_clear_interrupt_scheme(adapter);
2140 iounmap(hw->hw_addr);
2141 err_ioremap:
2142 free_netdev(netdev);
2143 err_alloc_etherdev:
2144 pci_release_selected_regions(pdev,
2145 pci_select_bars(pdev, IORESOURCE_MEM));
2146 err_pci_reg:
2147 err_dma:
2148 pci_disable_device(pdev);
2149 return err;
2150 }
2151
2152 /**
2153 * igb_remove - Device Removal Routine
2154 * @pdev: PCI device information struct
2155 *
2156 * igb_remove is called by the PCI subsystem to alert the driver
2157 * that it should release a PCI device. The could be caused by a
2158 * Hot-Plug event, or because the driver is going to be removed from
2159 * memory.
2160 **/
2161 static void __devexit igb_remove(struct pci_dev *pdev)
2162 {
2163 struct net_device *netdev = pci_get_drvdata(pdev);
2164 struct igb_adapter *adapter = netdev_priv(netdev);
2165 struct e1000_hw *hw = &adapter->hw;
2166
2167 pm_runtime_get_noresume(&pdev->dev);
2168
2169 /*
2170 * The watchdog timer may be rescheduled, so explicitly
2171 * disable watchdog from being rescheduled.
2172 */
2173 set_bit(__IGB_DOWN, &adapter->state);
2174 del_timer_sync(&adapter->watchdog_timer);
2175 del_timer_sync(&adapter->phy_info_timer);
2176
2177 cancel_work_sync(&adapter->reset_task);
2178 cancel_work_sync(&adapter->watchdog_task);
2179
2180 #ifdef CONFIG_IGB_DCA
2181 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2182 dev_info(&pdev->dev, "DCA disabled\n");
2183 dca_remove_requester(&pdev->dev);
2184 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2185 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2186 }
2187 #endif
2188
2189 /* Release control of h/w to f/w. If f/w is AMT enabled, this
2190 * would have already happened in close and is redundant. */
2191 igb_release_hw_control(adapter);
2192
2193 unregister_netdev(netdev);
2194
2195 igb_clear_interrupt_scheme(adapter);
2196
2197 #ifdef CONFIG_PCI_IOV
2198 /* reclaim resources allocated to VFs */
2199 if (adapter->vf_data) {
2200 /* disable iov and allow time for transactions to clear */
2201 if (!igb_check_vf_assignment(adapter)) {
2202 pci_disable_sriov(pdev);
2203 msleep(500);
2204 } else {
2205 dev_info(&pdev->dev, "VF(s) assigned to guests!\n");
2206 }
2207
2208 kfree(adapter->vf_data);
2209 adapter->vf_data = NULL;
2210 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2211 wrfl();
2212 msleep(100);
2213 dev_info(&pdev->dev, "IOV Disabled\n");
2214 }
2215 #endif
2216
2217 iounmap(hw->hw_addr);
2218 if (hw->flash_address)
2219 iounmap(hw->flash_address);
2220 pci_release_selected_regions(pdev,
2221 pci_select_bars(pdev, IORESOURCE_MEM));
2222
2223 kfree(adapter->shadow_vfta);
2224 free_netdev(netdev);
2225
2226 pci_disable_pcie_error_reporting(pdev);
2227
2228 pci_disable_device(pdev);
2229 }
2230
2231 /**
2232 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2233 * @adapter: board private structure to initialize
2234 *
2235 * This function initializes the vf specific data storage and then attempts to
2236 * allocate the VFs. The reason for ordering it this way is because it is much
2237 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2238 * the memory for the VFs.
2239 **/
2240 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2241 {
2242 #ifdef CONFIG_PCI_IOV
2243 struct pci_dev *pdev = adapter->pdev;
2244 int old_vfs = igb_find_enabled_vfs(adapter);
2245 int i;
2246
2247 if (old_vfs) {
2248 dev_info(&pdev->dev, "%d pre-allocated VFs found - override "
2249 "max_vfs setting of %d\n", old_vfs, max_vfs);
2250 adapter->vfs_allocated_count = old_vfs;
2251 }
2252
2253 if (!adapter->vfs_allocated_count)
2254 return;
2255
2256 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2257 sizeof(struct vf_data_storage), GFP_KERNEL);
2258 /* if allocation failed then we do not support SR-IOV */
2259 if (!adapter->vf_data) {
2260 adapter->vfs_allocated_count = 0;
2261 dev_err(&pdev->dev, "Unable to allocate memory for VF "
2262 "Data Storage\n");
2263 goto out;
2264 }
2265
2266 if (!old_vfs) {
2267 if (pci_enable_sriov(pdev, adapter->vfs_allocated_count))
2268 goto err_out;
2269 }
2270 dev_info(&pdev->dev, "%d VFs allocated\n",
2271 adapter->vfs_allocated_count);
2272 for (i = 0; i < adapter->vfs_allocated_count; i++)
2273 igb_vf_configure(adapter, i);
2274
2275 /* DMA Coalescing is not supported in IOV mode. */
2276 adapter->flags &= ~IGB_FLAG_DMAC;
2277 goto out;
2278 err_out:
2279 kfree(adapter->vf_data);
2280 adapter->vf_data = NULL;
2281 adapter->vfs_allocated_count = 0;
2282 out:
2283 return;
2284 #endif /* CONFIG_PCI_IOV */
2285 }
2286
2287 /**
2288 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2289 * @adapter: board private structure to initialize
2290 *
2291 * igb_init_hw_timer initializes the function pointer and values for the hw
2292 * timer found in hardware.
2293 **/
2294 static void igb_init_hw_timer(struct igb_adapter *adapter)
2295 {
2296 struct e1000_hw *hw = &adapter->hw;
2297
2298 switch (hw->mac.type) {
2299 case e1000_i350:
2300 case e1000_82580:
2301 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2302 adapter->cycles.read = igb_read_clock;
2303 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2304 adapter->cycles.mult = 1;
2305 /*
2306 * The 82580 timesync updates the system timer every 8ns by 8ns
2307 * and the value cannot be shifted. Instead we need to shift
2308 * the registers to generate a 64bit timer value. As a result
2309 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2310 * 24 in order to generate a larger value for synchronization.
2311 */
2312 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2313 /* disable system timer temporarily by setting bit 31 */
2314 wr32(E1000_TSAUXC, 0x80000000);
2315 wrfl();
2316
2317 /* Set registers so that rollover occurs soon to test this. */
2318 wr32(E1000_SYSTIMR, 0x00000000);
2319 wr32(E1000_SYSTIML, 0x80000000);
2320 wr32(E1000_SYSTIMH, 0x000000FF);
2321 wrfl();
2322
2323 /* enable system timer by clearing bit 31 */
2324 wr32(E1000_TSAUXC, 0x0);
2325 wrfl();
2326
2327 timecounter_init(&adapter->clock,
2328 &adapter->cycles,
2329 ktime_to_ns(ktime_get_real()));
2330 /*
2331 * Synchronize our NIC clock against system wall clock. NIC
2332 * time stamp reading requires ~3us per sample, each sample
2333 * was pretty stable even under load => only require 10
2334 * samples for each offset comparison.
2335 */
2336 memset(&adapter->compare, 0, sizeof(adapter->compare));
2337 adapter->compare.source = &adapter->clock;
2338 adapter->compare.target = ktime_get_real;
2339 adapter->compare.num_samples = 10;
2340 timecompare_update(&adapter->compare, 0);
2341 break;
2342 case e1000_82576:
2343 /*
2344 * Initialize hardware timer: we keep it running just in case
2345 * that some program needs it later on.
2346 */
2347 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2348 adapter->cycles.read = igb_read_clock;
2349 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2350 adapter->cycles.mult = 1;
2351 /**
2352 * Scale the NIC clock cycle by a large factor so that
2353 * relatively small clock corrections can be added or
2354 * subtracted at each clock tick. The drawbacks of a large
2355 * factor are a) that the clock register overflows more quickly
2356 * (not such a big deal) and b) that the increment per tick has
2357 * to fit into 24 bits. As a result we need to use a shift of
2358 * 19 so we can fit a value of 16 into the TIMINCA register.
2359 */
2360 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2361 wr32(E1000_TIMINCA,
2362 (1 << E1000_TIMINCA_16NS_SHIFT) |
2363 (16 << IGB_82576_TSYNC_SHIFT));
2364
2365 /* Set registers so that rollover occurs soon to test this. */
2366 wr32(E1000_SYSTIML, 0x00000000);
2367 wr32(E1000_SYSTIMH, 0xFF800000);
2368 wrfl();
2369
2370 timecounter_init(&adapter->clock,
2371 &adapter->cycles,
2372 ktime_to_ns(ktime_get_real()));
2373 /*
2374 * Synchronize our NIC clock against system wall clock. NIC
2375 * time stamp reading requires ~3us per sample, each sample
2376 * was pretty stable even under load => only require 10
2377 * samples for each offset comparison.
2378 */
2379 memset(&adapter->compare, 0, sizeof(adapter->compare));
2380 adapter->compare.source = &adapter->clock;
2381 adapter->compare.target = ktime_get_real;
2382 adapter->compare.num_samples = 10;
2383 timecompare_update(&adapter->compare, 0);
2384 break;
2385 case e1000_82575:
2386 /* 82575 does not support timesync */
2387 default:
2388 break;
2389 }
2390
2391 }
2392
2393 /**
2394 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2395 * @adapter: board private structure to initialize
2396 *
2397 * igb_sw_init initializes the Adapter private data structure.
2398 * Fields are initialized based on PCI device information and
2399 * OS network device settings (MTU size).
2400 **/
2401 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2402 {
2403 struct e1000_hw *hw = &adapter->hw;
2404 struct net_device *netdev = adapter->netdev;
2405 struct pci_dev *pdev = adapter->pdev;
2406
2407 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2408
2409 /* set default ring sizes */
2410 adapter->tx_ring_count = IGB_DEFAULT_TXD;
2411 adapter->rx_ring_count = IGB_DEFAULT_RXD;
2412
2413 /* set default ITR values */
2414 adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2415 adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2416
2417 /* set default work limits */
2418 adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
2419
2420 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2421 VLAN_HLEN;
2422 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2423
2424 adapter->node = -1;
2425
2426 spin_lock_init(&adapter->stats64_lock);
2427 #ifdef CONFIG_PCI_IOV
2428 switch (hw->mac.type) {
2429 case e1000_82576:
2430 case e1000_i350:
2431 if (max_vfs > 7) {
2432 dev_warn(&pdev->dev,
2433 "Maximum of 7 VFs per PF, using max\n");
2434 adapter->vfs_allocated_count = 7;
2435 } else
2436 adapter->vfs_allocated_count = max_vfs;
2437 break;
2438 default:
2439 break;
2440 }
2441 #endif /* CONFIG_PCI_IOV */
2442 adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2443 /* i350 cannot do RSS and SR-IOV at the same time */
2444 if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2445 adapter->rss_queues = 1;
2446
2447 /*
2448 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2449 * then we should combine the queues into a queue pair in order to
2450 * conserve interrupts due to limited supply
2451 */
2452 if ((adapter->rss_queues > 4) ||
2453 ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2454 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2455
2456 /* Setup and initialize a copy of the hw vlan table array */
2457 adapter->shadow_vfta = kzalloc(sizeof(u32) *
2458 E1000_VLAN_FILTER_TBL_SIZE,
2459 GFP_ATOMIC);
2460
2461 /* This call may decrease the number of queues */
2462 if (igb_init_interrupt_scheme(adapter)) {
2463 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2464 return -ENOMEM;
2465 }
2466
2467 igb_probe_vfs(adapter);
2468
2469 /* Explicitly disable IRQ since the NIC can be in any state. */
2470 igb_irq_disable(adapter);
2471
2472 if (hw->mac.type == e1000_i350)
2473 adapter->flags &= ~IGB_FLAG_DMAC;
2474
2475 set_bit(__IGB_DOWN, &adapter->state);
2476 return 0;
2477 }
2478
2479 /**
2480 * igb_open - Called when a network interface is made active
2481 * @netdev: network interface device structure
2482 *
2483 * Returns 0 on success, negative value on failure
2484 *
2485 * The open entry point is called when a network interface is made
2486 * active by the system (IFF_UP). At this point all resources needed
2487 * for transmit and receive operations are allocated, the interrupt
2488 * handler is registered with the OS, the watchdog timer is started,
2489 * and the stack is notified that the interface is ready.
2490 **/
2491 static int __igb_open(struct net_device *netdev, bool resuming)
2492 {
2493 struct igb_adapter *adapter = netdev_priv(netdev);
2494 struct e1000_hw *hw = &adapter->hw;
2495 struct pci_dev *pdev = adapter->pdev;
2496 int err;
2497 int i;
2498
2499 /* disallow open during test */
2500 if (test_bit(__IGB_TESTING, &adapter->state)) {
2501 WARN_ON(resuming);
2502 return -EBUSY;
2503 }
2504
2505 if (!resuming)
2506 pm_runtime_get_sync(&pdev->dev);
2507
2508 netif_carrier_off(netdev);
2509
2510 /* allocate transmit descriptors */
2511 err = igb_setup_all_tx_resources(adapter);
2512 if (err)
2513 goto err_setup_tx;
2514
2515 /* allocate receive descriptors */
2516 err = igb_setup_all_rx_resources(adapter);
2517 if (err)
2518 goto err_setup_rx;
2519
2520 igb_power_up_link(adapter);
2521
2522 /* before we allocate an interrupt, we must be ready to handle it.
2523 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2524 * as soon as we call pci_request_irq, so we have to setup our
2525 * clean_rx handler before we do so. */
2526 igb_configure(adapter);
2527
2528 err = igb_request_irq(adapter);
2529 if (err)
2530 goto err_req_irq;
2531
2532 /* From here on the code is the same as igb_up() */
2533 clear_bit(__IGB_DOWN, &adapter->state);
2534
2535 for (i = 0; i < adapter->num_q_vectors; i++)
2536 napi_enable(&(adapter->q_vector[i]->napi));
2537
2538 /* Clear any pending interrupts. */
2539 rd32(E1000_ICR);
2540
2541 igb_irq_enable(adapter);
2542
2543 /* notify VFs that reset has been completed */
2544 if (adapter->vfs_allocated_count) {
2545 u32 reg_data = rd32(E1000_CTRL_EXT);
2546 reg_data |= E1000_CTRL_EXT_PFRSTD;
2547 wr32(E1000_CTRL_EXT, reg_data);
2548 }
2549
2550 netif_tx_start_all_queues(netdev);
2551
2552 if (!resuming)
2553 pm_runtime_put(&pdev->dev);
2554
2555 /* start the watchdog. */
2556 hw->mac.get_link_status = 1;
2557 schedule_work(&adapter->watchdog_task);
2558
2559 return 0;
2560
2561 err_req_irq:
2562 igb_release_hw_control(adapter);
2563 igb_power_down_link(adapter);
2564 igb_free_all_rx_resources(adapter);
2565 err_setup_rx:
2566 igb_free_all_tx_resources(adapter);
2567 err_setup_tx:
2568 igb_reset(adapter);
2569 if (!resuming)
2570 pm_runtime_put(&pdev->dev);
2571
2572 return err;
2573 }
2574
2575 static int igb_open(struct net_device *netdev)
2576 {
2577 return __igb_open(netdev, false);
2578 }
2579
2580 /**
2581 * igb_close - Disables a network interface
2582 * @netdev: network interface device structure
2583 *
2584 * Returns 0, this is not allowed to fail
2585 *
2586 * The close entry point is called when an interface is de-activated
2587 * by the OS. The hardware is still under the driver's control, but
2588 * needs to be disabled. A global MAC reset is issued to stop the
2589 * hardware, and all transmit and receive resources are freed.
2590 **/
2591 static int __igb_close(struct net_device *netdev, bool suspending)
2592 {
2593 struct igb_adapter *adapter = netdev_priv(netdev);
2594 struct pci_dev *pdev = adapter->pdev;
2595
2596 WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2597
2598 if (!suspending)
2599 pm_runtime_get_sync(&pdev->dev);
2600
2601 igb_down(adapter);
2602 igb_free_irq(adapter);
2603
2604 igb_free_all_tx_resources(adapter);
2605 igb_free_all_rx_resources(adapter);
2606
2607 if (!suspending)
2608 pm_runtime_put_sync(&pdev->dev);
2609 return 0;
2610 }
2611
2612 static int igb_close(struct net_device *netdev)
2613 {
2614 return __igb_close(netdev, false);
2615 }
2616
2617 /**
2618 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2619 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2620 *
2621 * Return 0 on success, negative on failure
2622 **/
2623 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2624 {
2625 struct device *dev = tx_ring->dev;
2626 int orig_node = dev_to_node(dev);
2627 int size;
2628
2629 size = sizeof(struct igb_tx_buffer) * tx_ring->count;
2630 tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node);
2631 if (!tx_ring->tx_buffer_info)
2632 tx_ring->tx_buffer_info = vzalloc(size);
2633 if (!tx_ring->tx_buffer_info)
2634 goto err;
2635
2636 /* round up to nearest 4K */
2637 tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2638 tx_ring->size = ALIGN(tx_ring->size, 4096);
2639
2640 set_dev_node(dev, tx_ring->numa_node);
2641 tx_ring->desc = dma_alloc_coherent(dev,
2642 tx_ring->size,
2643 &tx_ring->dma,
2644 GFP_KERNEL);
2645 set_dev_node(dev, orig_node);
2646 if (!tx_ring->desc)
2647 tx_ring->desc = dma_alloc_coherent(dev,
2648 tx_ring->size,
2649 &tx_ring->dma,
2650 GFP_KERNEL);
2651
2652 if (!tx_ring->desc)
2653 goto err;
2654
2655 tx_ring->next_to_use = 0;
2656 tx_ring->next_to_clean = 0;
2657
2658 return 0;
2659
2660 err:
2661 vfree(tx_ring->tx_buffer_info);
2662 dev_err(dev,
2663 "Unable to allocate memory for the transmit descriptor ring\n");
2664 return -ENOMEM;
2665 }
2666
2667 /**
2668 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2669 * (Descriptors) for all queues
2670 * @adapter: board private structure
2671 *
2672 * Return 0 on success, negative on failure
2673 **/
2674 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2675 {
2676 struct pci_dev *pdev = adapter->pdev;
2677 int i, err = 0;
2678
2679 for (i = 0; i < adapter->num_tx_queues; i++) {
2680 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2681 if (err) {
2682 dev_err(&pdev->dev,
2683 "Allocation for Tx Queue %u failed\n", i);
2684 for (i--; i >= 0; i--)
2685 igb_free_tx_resources(adapter->tx_ring[i]);
2686 break;
2687 }
2688 }
2689
2690 return err;
2691 }
2692
2693 /**
2694 * igb_setup_tctl - configure the transmit control registers
2695 * @adapter: Board private structure
2696 **/
2697 void igb_setup_tctl(struct igb_adapter *adapter)
2698 {
2699 struct e1000_hw *hw = &adapter->hw;
2700 u32 tctl;
2701
2702 /* disable queue 0 which is enabled by default on 82575 and 82576 */
2703 wr32(E1000_TXDCTL(0), 0);
2704
2705 /* Program the Transmit Control Register */
2706 tctl = rd32(E1000_TCTL);
2707 tctl &= ~E1000_TCTL_CT;
2708 tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2709 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2710
2711 igb_config_collision_dist(hw);
2712
2713 /* Enable transmits */
2714 tctl |= E1000_TCTL_EN;
2715
2716 wr32(E1000_TCTL, tctl);
2717 }
2718
2719 /**
2720 * igb_configure_tx_ring - Configure transmit ring after Reset
2721 * @adapter: board private structure
2722 * @ring: tx ring to configure
2723 *
2724 * Configure a transmit ring after a reset.
2725 **/
2726 void igb_configure_tx_ring(struct igb_adapter *adapter,
2727 struct igb_ring *ring)
2728 {
2729 struct e1000_hw *hw = &adapter->hw;
2730 u32 txdctl = 0;
2731 u64 tdba = ring->dma;
2732 int reg_idx = ring->reg_idx;
2733
2734 /* disable the queue */
2735 wr32(E1000_TXDCTL(reg_idx), 0);
2736 wrfl();
2737 mdelay(10);
2738
2739 wr32(E1000_TDLEN(reg_idx),
2740 ring->count * sizeof(union e1000_adv_tx_desc));
2741 wr32(E1000_TDBAL(reg_idx),
2742 tdba & 0x00000000ffffffffULL);
2743 wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2744
2745 ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2746 wr32(E1000_TDH(reg_idx), 0);
2747 writel(0, ring->tail);
2748
2749 txdctl |= IGB_TX_PTHRESH;
2750 txdctl |= IGB_TX_HTHRESH << 8;
2751 txdctl |= IGB_TX_WTHRESH << 16;
2752
2753 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2754 wr32(E1000_TXDCTL(reg_idx), txdctl);
2755
2756 netdev_tx_reset_queue(txring_txq(ring));
2757 }
2758
2759 /**
2760 * igb_configure_tx - Configure transmit Unit after Reset
2761 * @adapter: board private structure
2762 *
2763 * Configure the Tx unit of the MAC after a reset.
2764 **/
2765 static void igb_configure_tx(struct igb_adapter *adapter)
2766 {
2767 int i;
2768
2769 for (i = 0; i < adapter->num_tx_queues; i++)
2770 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2771 }
2772
2773 /**
2774 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2775 * @rx_ring: rx descriptor ring (for a specific queue) to setup
2776 *
2777 * Returns 0 on success, negative on failure
2778 **/
2779 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2780 {
2781 struct device *dev = rx_ring->dev;
2782 int orig_node = dev_to_node(dev);
2783 int size, desc_len;
2784
2785 size = sizeof(struct igb_rx_buffer) * rx_ring->count;
2786 rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node);
2787 if (!rx_ring->rx_buffer_info)
2788 rx_ring->rx_buffer_info = vzalloc(size);
2789 if (!rx_ring->rx_buffer_info)
2790 goto err;
2791
2792 desc_len = sizeof(union e1000_adv_rx_desc);
2793
2794 /* Round up to nearest 4K */
2795 rx_ring->size = rx_ring->count * desc_len;
2796 rx_ring->size = ALIGN(rx_ring->size, 4096);
2797
2798 set_dev_node(dev, rx_ring->numa_node);
2799 rx_ring->desc = dma_alloc_coherent(dev,
2800 rx_ring->size,
2801 &rx_ring->dma,
2802 GFP_KERNEL);
2803 set_dev_node(dev, orig_node);
2804 if (!rx_ring->desc)
2805 rx_ring->desc = dma_alloc_coherent(dev,
2806 rx_ring->size,
2807 &rx_ring->dma,
2808 GFP_KERNEL);
2809
2810 if (!rx_ring->desc)
2811 goto err;
2812
2813 rx_ring->next_to_clean = 0;
2814 rx_ring->next_to_use = 0;
2815
2816 return 0;
2817
2818 err:
2819 vfree(rx_ring->rx_buffer_info);
2820 rx_ring->rx_buffer_info = NULL;
2821 dev_err(dev, "Unable to allocate memory for the receive descriptor"
2822 " ring\n");
2823 return -ENOMEM;
2824 }
2825
2826 /**
2827 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2828 * (Descriptors) for all queues
2829 * @adapter: board private structure
2830 *
2831 * Return 0 on success, negative on failure
2832 **/
2833 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2834 {
2835 struct pci_dev *pdev = adapter->pdev;
2836 int i, err = 0;
2837
2838 for (i = 0; i < adapter->num_rx_queues; i++) {
2839 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2840 if (err) {
2841 dev_err(&pdev->dev,
2842 "Allocation for Rx Queue %u failed\n", i);
2843 for (i--; i >= 0; i--)
2844 igb_free_rx_resources(adapter->rx_ring[i]);
2845 break;
2846 }
2847 }
2848
2849 return err;
2850 }
2851
2852 /**
2853 * igb_setup_mrqc - configure the multiple receive queue control registers
2854 * @adapter: Board private structure
2855 **/
2856 static void igb_setup_mrqc(struct igb_adapter *adapter)
2857 {
2858 struct e1000_hw *hw = &adapter->hw;
2859 u32 mrqc, rxcsum;
2860 u32 j, num_rx_queues, shift = 0, shift2 = 0;
2861 union e1000_reta {
2862 u32 dword;
2863 u8 bytes[4];
2864 } reta;
2865 static const u8 rsshash[40] = {
2866 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2867 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2868 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2869 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2870
2871 /* Fill out hash function seeds */
2872 for (j = 0; j < 10; j++) {
2873 u32 rsskey = rsshash[(j * 4)];
2874 rsskey |= rsshash[(j * 4) + 1] << 8;
2875 rsskey |= rsshash[(j * 4) + 2] << 16;
2876 rsskey |= rsshash[(j * 4) + 3] << 24;
2877 array_wr32(E1000_RSSRK(0), j, rsskey);
2878 }
2879
2880 num_rx_queues = adapter->rss_queues;
2881
2882 if (adapter->vfs_allocated_count) {
2883 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2884 switch (hw->mac.type) {
2885 case e1000_i350:
2886 case e1000_82580:
2887 num_rx_queues = 1;
2888 shift = 0;
2889 break;
2890 case e1000_82576:
2891 shift = 3;
2892 num_rx_queues = 2;
2893 break;
2894 case e1000_82575:
2895 shift = 2;
2896 shift2 = 6;
2897 default:
2898 break;
2899 }
2900 } else {
2901 if (hw->mac.type == e1000_82575)
2902 shift = 6;
2903 }
2904
2905 for (j = 0; j < (32 * 4); j++) {
2906 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2907 if (shift2)
2908 reta.bytes[j & 3] |= num_rx_queues << shift2;
2909 if ((j & 3) == 3)
2910 wr32(E1000_RETA(j >> 2), reta.dword);
2911 }
2912
2913 /*
2914 * Disable raw packet checksumming so that RSS hash is placed in
2915 * descriptor on writeback. No need to enable TCP/UDP/IP checksum
2916 * offloads as they are enabled by default
2917 */
2918 rxcsum = rd32(E1000_RXCSUM);
2919 rxcsum |= E1000_RXCSUM_PCSD;
2920
2921 if (adapter->hw.mac.type >= e1000_82576)
2922 /* Enable Receive Checksum Offload for SCTP */
2923 rxcsum |= E1000_RXCSUM_CRCOFL;
2924
2925 /* Don't need to set TUOFL or IPOFL, they default to 1 */
2926 wr32(E1000_RXCSUM, rxcsum);
2927
2928 /* If VMDq is enabled then we set the appropriate mode for that, else
2929 * we default to RSS so that an RSS hash is calculated per packet even
2930 * if we are only using one queue */
2931 if (adapter->vfs_allocated_count) {
2932 if (hw->mac.type > e1000_82575) {
2933 /* Set the default pool for the PF's first queue */
2934 u32 vtctl = rd32(E1000_VT_CTL);
2935 vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2936 E1000_VT_CTL_DISABLE_DEF_POOL);
2937 vtctl |= adapter->vfs_allocated_count <<
2938 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2939 wr32(E1000_VT_CTL, vtctl);
2940 }
2941 if (adapter->rss_queues > 1)
2942 mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2943 else
2944 mrqc = E1000_MRQC_ENABLE_VMDQ;
2945 } else {
2946 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2947 }
2948 igb_vmm_control(adapter);
2949
2950 /*
2951 * Generate RSS hash based on TCP port numbers and/or
2952 * IPv4/v6 src and dst addresses since UDP cannot be
2953 * hashed reliably due to IP fragmentation
2954 */
2955 mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2956 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2957 E1000_MRQC_RSS_FIELD_IPV6 |
2958 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2959 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2960
2961 wr32(E1000_MRQC, mrqc);
2962 }
2963
2964 /**
2965 * igb_setup_rctl - configure the receive control registers
2966 * @adapter: Board private structure
2967 **/
2968 void igb_setup_rctl(struct igb_adapter *adapter)
2969 {
2970 struct e1000_hw *hw = &adapter->hw;
2971 u32 rctl;
2972
2973 rctl = rd32(E1000_RCTL);
2974
2975 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2976 rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2977
2978 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2979 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2980
2981 /*
2982 * enable stripping of CRC. It's unlikely this will break BMC
2983 * redirection as it did with e1000. Newer features require
2984 * that the HW strips the CRC.
2985 */
2986 rctl |= E1000_RCTL_SECRC;
2987
2988 /* disable store bad packets and clear size bits. */
2989 rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2990
2991 /* enable LPE to prevent packets larger than max_frame_size */
2992 rctl |= E1000_RCTL_LPE;
2993
2994 /* disable queue 0 to prevent tail write w/o re-config */
2995 wr32(E1000_RXDCTL(0), 0);
2996
2997 /* Attention!!! For SR-IOV PF driver operations you must enable
2998 * queue drop for all VF and PF queues to prevent head of line blocking
2999 * if an un-trusted VF does not provide descriptors to hardware.
3000 */
3001 if (adapter->vfs_allocated_count) {
3002 /* set all queue drop enable bits */
3003 wr32(E1000_QDE, ALL_QUEUES);
3004 }
3005
3006 wr32(E1000_RCTL, rctl);
3007 }
3008
3009 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
3010 int vfn)
3011 {
3012 struct e1000_hw *hw = &adapter->hw;
3013 u32 vmolr;
3014
3015 /* if it isn't the PF check to see if VFs are enabled and
3016 * increase the size to support vlan tags */
3017 if (vfn < adapter->vfs_allocated_count &&
3018 adapter->vf_data[vfn].vlans_enabled)
3019 size += VLAN_TAG_SIZE;
3020
3021 vmolr = rd32(E1000_VMOLR(vfn));
3022 vmolr &= ~E1000_VMOLR_RLPML_MASK;
3023 vmolr |= size | E1000_VMOLR_LPE;
3024 wr32(E1000_VMOLR(vfn), vmolr);
3025
3026 return 0;
3027 }
3028
3029 /**
3030 * igb_rlpml_set - set maximum receive packet size
3031 * @adapter: board private structure
3032 *
3033 * Configure maximum receivable packet size.
3034 **/
3035 static void igb_rlpml_set(struct igb_adapter *adapter)
3036 {
3037 u32 max_frame_size = adapter->max_frame_size;
3038 struct e1000_hw *hw = &adapter->hw;
3039 u16 pf_id = adapter->vfs_allocated_count;
3040
3041 if (pf_id) {
3042 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
3043 /*
3044 * If we're in VMDQ or SR-IOV mode, then set global RLPML
3045 * to our max jumbo frame size, in case we need to enable
3046 * jumbo frames on one of the rings later.
3047 * This will not pass over-length frames into the default
3048 * queue because it's gated by the VMOLR.RLPML.
3049 */
3050 max_frame_size = MAX_JUMBO_FRAME_SIZE;
3051 }
3052
3053 wr32(E1000_RLPML, max_frame_size);
3054 }
3055
3056 static inline void igb_set_vmolr(struct igb_adapter *adapter,
3057 int vfn, bool aupe)
3058 {
3059 struct e1000_hw *hw = &adapter->hw;
3060 u32 vmolr;
3061
3062 /*
3063 * This register exists only on 82576 and newer so if we are older then
3064 * we should exit and do nothing
3065 */
3066 if (hw->mac.type < e1000_82576)
3067 return;
3068
3069 vmolr = rd32(E1000_VMOLR(vfn));
3070 vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */
3071 if (aupe)
3072 vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */
3073 else
3074 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3075
3076 /* clear all bits that might not be set */
3077 vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3078
3079 if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3080 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3081 /*
3082 * for VMDq only allow the VFs and pool 0 to accept broadcast and
3083 * multicast packets
3084 */
3085 if (vfn <= adapter->vfs_allocated_count)
3086 vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */
3087
3088 wr32(E1000_VMOLR(vfn), vmolr);
3089 }
3090
3091 /**
3092 * igb_configure_rx_ring - Configure a receive ring after Reset
3093 * @adapter: board private structure
3094 * @ring: receive ring to be configured
3095 *
3096 * Configure the Rx unit of the MAC after a reset.
3097 **/
3098 void igb_configure_rx_ring(struct igb_adapter *adapter,
3099 struct igb_ring *ring)
3100 {
3101 struct e1000_hw *hw = &adapter->hw;
3102 u64 rdba = ring->dma;
3103 int reg_idx = ring->reg_idx;
3104 u32 srrctl = 0, rxdctl = 0;
3105
3106 /* disable the queue */
3107 wr32(E1000_RXDCTL(reg_idx), 0);
3108
3109 /* Set DMA base address registers */
3110 wr32(E1000_RDBAL(reg_idx),
3111 rdba & 0x00000000ffffffffULL);
3112 wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3113 wr32(E1000_RDLEN(reg_idx),
3114 ring->count * sizeof(union e1000_adv_rx_desc));
3115
3116 /* initialize head and tail */
3117 ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3118 wr32(E1000_RDH(reg_idx), 0);
3119 writel(0, ring->tail);
3120
3121 /* set descriptor configuration */
3122 srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3123 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3124 srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3125 #else
3126 srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3127 #endif
3128 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3129 if (hw->mac.type >= e1000_82580)
3130 srrctl |= E1000_SRRCTL_TIMESTAMP;
3131 /* Only set Drop Enable if we are supporting multiple queues */
3132 if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3133 srrctl |= E1000_SRRCTL_DROP_EN;
3134
3135 wr32(E1000_SRRCTL(reg_idx), srrctl);
3136
3137 /* set filtering for VMDQ pools */
3138 igb_set_vmolr(adapter, reg_idx & 0x7, true);
3139
3140 rxdctl |= IGB_RX_PTHRESH;
3141 rxdctl |= IGB_RX_HTHRESH << 8;
3142 rxdctl |= IGB_RX_WTHRESH << 16;
3143
3144 /* enable receive descriptor fetching */
3145 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3146 wr32(E1000_RXDCTL(reg_idx), rxdctl);
3147 }
3148
3149 /**
3150 * igb_configure_rx - Configure receive Unit after Reset
3151 * @adapter: board private structure
3152 *
3153 * Configure the Rx unit of the MAC after a reset.
3154 **/
3155 static void igb_configure_rx(struct igb_adapter *adapter)
3156 {
3157 int i;
3158
3159 /* set UTA to appropriate mode */
3160 igb_set_uta(adapter);
3161
3162 /* set the correct pool for the PF default MAC address in entry 0 */
3163 igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3164 adapter->vfs_allocated_count);
3165
3166 /* Setup the HW Rx Head and Tail Descriptor Pointers and
3167 * the Base and Length of the Rx Descriptor Ring */
3168 for (i = 0; i < adapter->num_rx_queues; i++)
3169 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3170 }
3171
3172 /**
3173 * igb_free_tx_resources - Free Tx Resources per Queue
3174 * @tx_ring: Tx descriptor ring for a specific queue
3175 *
3176 * Free all transmit software resources
3177 **/
3178 void igb_free_tx_resources(struct igb_ring *tx_ring)
3179 {
3180 igb_clean_tx_ring(tx_ring);
3181
3182 vfree(tx_ring->tx_buffer_info);
3183 tx_ring->tx_buffer_info = NULL;
3184
3185 /* if not set, then don't free */
3186 if (!tx_ring->desc)
3187 return;
3188
3189 dma_free_coherent(tx_ring->dev, tx_ring->size,
3190 tx_ring->desc, tx_ring->dma);
3191
3192 tx_ring->desc = NULL;
3193 }
3194
3195 /**
3196 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3197 * @adapter: board private structure
3198 *
3199 * Free all transmit software resources
3200 **/
3201 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3202 {
3203 int i;
3204
3205 for (i = 0; i < adapter->num_tx_queues; i++)
3206 igb_free_tx_resources(adapter->tx_ring[i]);
3207 }
3208
3209 void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3210 struct igb_tx_buffer *tx_buffer)
3211 {
3212 if (tx_buffer->skb) {
3213 dev_kfree_skb_any(tx_buffer->skb);
3214 if (tx_buffer->dma)
3215 dma_unmap_single(ring->dev,
3216 tx_buffer->dma,
3217 tx_buffer->length,
3218 DMA_TO_DEVICE);
3219 } else if (tx_buffer->dma) {
3220 dma_unmap_page(ring->dev,
3221 tx_buffer->dma,
3222 tx_buffer->length,
3223 DMA_TO_DEVICE);
3224 }
3225 tx_buffer->next_to_watch = NULL;
3226 tx_buffer->skb = NULL;
3227 tx_buffer->dma = 0;
3228 /* buffer_info must be completely set up in the transmit path */
3229 }
3230
3231 /**
3232 * igb_clean_tx_ring - Free Tx Buffers
3233 * @tx_ring: ring to be cleaned
3234 **/
3235 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3236 {
3237 struct igb_tx_buffer *buffer_info;
3238 unsigned long size;
3239 u16 i;
3240
3241 if (!tx_ring->tx_buffer_info)
3242 return;
3243 /* Free all the Tx ring sk_buffs */
3244
3245 for (i = 0; i < tx_ring->count; i++) {
3246 buffer_info = &tx_ring->tx_buffer_info[i];
3247 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3248 }
3249
3250 size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3251 memset(tx_ring->tx_buffer_info, 0, size);
3252
3253 /* Zero out the descriptor ring */
3254 memset(tx_ring->desc, 0, tx_ring->size);
3255
3256 tx_ring->next_to_use = 0;
3257 tx_ring->next_to_clean = 0;
3258 }
3259
3260 /**
3261 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3262 * @adapter: board private structure
3263 **/
3264 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3265 {
3266 int i;
3267
3268 for (i = 0; i < adapter->num_tx_queues; i++)
3269 igb_clean_tx_ring(adapter->tx_ring[i]);
3270 }
3271
3272 /**
3273 * igb_free_rx_resources - Free Rx Resources
3274 * @rx_ring: ring to clean the resources from
3275 *
3276 * Free all receive software resources
3277 **/
3278 void igb_free_rx_resources(struct igb_ring *rx_ring)
3279 {
3280 igb_clean_rx_ring(rx_ring);
3281
3282 vfree(rx_ring->rx_buffer_info);
3283 rx_ring->rx_buffer_info = NULL;
3284
3285 /* if not set, then don't free */
3286 if (!rx_ring->desc)
3287 return;
3288
3289 dma_free_coherent(rx_ring->dev, rx_ring->size,
3290 rx_ring->desc, rx_ring->dma);
3291
3292 rx_ring->desc = NULL;
3293 }
3294
3295 /**
3296 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3297 * @adapter: board private structure
3298 *
3299 * Free all receive software resources
3300 **/
3301 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3302 {
3303 int i;
3304
3305 for (i = 0; i < adapter->num_rx_queues; i++)
3306 igb_free_rx_resources(adapter->rx_ring[i]);
3307 }
3308
3309 /**
3310 * igb_clean_rx_ring - Free Rx Buffers per Queue
3311 * @rx_ring: ring to free buffers from
3312 **/
3313 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3314 {
3315 unsigned long size;
3316 u16 i;
3317
3318 if (!rx_ring->rx_buffer_info)
3319 return;
3320
3321 /* Free all the Rx ring sk_buffs */
3322 for (i = 0; i < rx_ring->count; i++) {
3323 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3324 if (buffer_info->dma) {
3325 dma_unmap_single(rx_ring->dev,
3326 buffer_info->dma,
3327 IGB_RX_HDR_LEN,
3328 DMA_FROM_DEVICE);
3329 buffer_info->dma = 0;
3330 }
3331
3332 if (buffer_info->skb) {
3333 dev_kfree_skb(buffer_info->skb);
3334 buffer_info->skb = NULL;
3335 }
3336 if (buffer_info->page_dma) {
3337 dma_unmap_page(rx_ring->dev,
3338 buffer_info->page_dma,
3339 PAGE_SIZE / 2,
3340 DMA_FROM_DEVICE);
3341 buffer_info->page_dma = 0;
3342 }
3343 if (buffer_info->page) {
3344 put_page(buffer_info->page);
3345 buffer_info->page = NULL;
3346 buffer_info->page_offset = 0;
3347 }
3348 }
3349
3350 size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3351 memset(rx_ring->rx_buffer_info, 0, size);
3352
3353 /* Zero out the descriptor ring */
3354 memset(rx_ring->desc, 0, rx_ring->size);
3355
3356 rx_ring->next_to_clean = 0;
3357 rx_ring->next_to_use = 0;
3358 }
3359
3360 /**
3361 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3362 * @adapter: board private structure
3363 **/
3364 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3365 {
3366 int i;
3367
3368 for (i = 0; i < adapter->num_rx_queues; i++)
3369 igb_clean_rx_ring(adapter->rx_ring[i]);
3370 }
3371
3372 /**
3373 * igb_set_mac - Change the Ethernet Address of the NIC
3374 * @netdev: network interface device structure
3375 * @p: pointer to an address structure
3376 *
3377 * Returns 0 on success, negative on failure
3378 **/
3379 static int igb_set_mac(struct net_device *netdev, void *p)
3380 {
3381 struct igb_adapter *adapter = netdev_priv(netdev);
3382 struct e1000_hw *hw = &adapter->hw;
3383 struct sockaddr *addr = p;
3384
3385 if (!is_valid_ether_addr(addr->sa_data))
3386 return -EADDRNOTAVAIL;
3387
3388 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3389 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3390
3391 /* set the correct pool for the new PF MAC address in entry 0 */
3392 igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3393 adapter->vfs_allocated_count);
3394
3395 return 0;
3396 }
3397
3398 /**
3399 * igb_write_mc_addr_list - write multicast addresses to MTA
3400 * @netdev: network interface device structure
3401 *
3402 * Writes multicast address list to the MTA hash table.
3403 * Returns: -ENOMEM on failure
3404 * 0 on no addresses written
3405 * X on writing X addresses to MTA
3406 **/
3407 static int igb_write_mc_addr_list(struct net_device *netdev)
3408 {
3409 struct igb_adapter *adapter = netdev_priv(netdev);
3410 struct e1000_hw *hw = &adapter->hw;
3411 struct netdev_hw_addr *ha;
3412 u8 *mta_list;
3413 int i;
3414
3415 if (netdev_mc_empty(netdev)) {
3416 /* nothing to program, so clear mc list */
3417 igb_update_mc_addr_list(hw, NULL, 0);
3418 igb_restore_vf_multicasts(adapter);
3419 return 0;
3420 }
3421
3422 mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3423 if (!mta_list)
3424 return -ENOMEM;
3425
3426 /* The shared function expects a packed array of only addresses. */
3427 i = 0;
3428 netdev_for_each_mc_addr(ha, netdev)
3429 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3430
3431 igb_update_mc_addr_list(hw, mta_list, i);
3432 kfree(mta_list);
3433
3434 return netdev_mc_count(netdev);
3435 }
3436
3437 /**
3438 * igb_write_uc_addr_list - write unicast addresses to RAR table
3439 * @netdev: network interface device structure
3440 *
3441 * Writes unicast address list to the RAR table.
3442 * Returns: -ENOMEM on failure/insufficient address space
3443 * 0 on no addresses written
3444 * X on writing X addresses to the RAR table
3445 **/
3446 static int igb_write_uc_addr_list(struct net_device *netdev)
3447 {
3448 struct igb_adapter *adapter = netdev_priv(netdev);
3449 struct e1000_hw *hw = &adapter->hw;
3450 unsigned int vfn = adapter->vfs_allocated_count;
3451 unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3452 int count = 0;
3453
3454 /* return ENOMEM indicating insufficient memory for addresses */
3455 if (netdev_uc_count(netdev) > rar_entries)
3456 return -ENOMEM;
3457
3458 if (!netdev_uc_empty(netdev) && rar_entries) {
3459 struct netdev_hw_addr *ha;
3460
3461 netdev_for_each_uc_addr(ha, netdev) {
3462 if (!rar_entries)
3463 break;
3464 igb_rar_set_qsel(adapter, ha->addr,
3465 rar_entries--,
3466 vfn);
3467 count++;
3468 }
3469 }
3470 /* write the addresses in reverse order to avoid write combining */
3471 for (; rar_entries > 0 ; rar_entries--) {
3472 wr32(E1000_RAH(rar_entries), 0);
3473 wr32(E1000_RAL(rar_entries), 0);
3474 }
3475 wrfl();
3476
3477 return count;
3478 }
3479
3480 /**
3481 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3482 * @netdev: network interface device structure
3483 *
3484 * The set_rx_mode entry point is called whenever the unicast or multicast
3485 * address lists or the network interface flags are updated. This routine is
3486 * responsible for configuring the hardware for proper unicast, multicast,
3487 * promiscuous mode, and all-multi behavior.
3488 **/
3489 static void igb_set_rx_mode(struct net_device *netdev)
3490 {
3491 struct igb_adapter *adapter = netdev_priv(netdev);
3492 struct e1000_hw *hw = &adapter->hw;
3493 unsigned int vfn = adapter->vfs_allocated_count;
3494 u32 rctl, vmolr = 0;
3495 int count;
3496
3497 /* Check for Promiscuous and All Multicast modes */
3498 rctl = rd32(E1000_RCTL);
3499
3500 /* clear the effected bits */
3501 rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3502
3503 if (netdev->flags & IFF_PROMISC) {
3504 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3505 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3506 } else {
3507 if (netdev->flags & IFF_ALLMULTI) {
3508 rctl |= E1000_RCTL_MPE;
3509 vmolr |= E1000_VMOLR_MPME;
3510 } else {
3511 /*
3512 * Write addresses to the MTA, if the attempt fails
3513 * then we should just turn on promiscuous mode so
3514 * that we can at least receive multicast traffic
3515 */
3516 count = igb_write_mc_addr_list(netdev);
3517 if (count < 0) {
3518 rctl |= E1000_RCTL_MPE;
3519 vmolr |= E1000_VMOLR_MPME;
3520 } else if (count) {
3521 vmolr |= E1000_VMOLR_ROMPE;
3522 }
3523 }
3524 /*
3525 * Write addresses to available RAR registers, if there is not
3526 * sufficient space to store all the addresses then enable
3527 * unicast promiscuous mode
3528 */
3529 count = igb_write_uc_addr_list(netdev);
3530 if (count < 0) {
3531 rctl |= E1000_RCTL_UPE;
3532 vmolr |= E1000_VMOLR_ROPE;
3533 }
3534 rctl |= E1000_RCTL_VFE;
3535 }
3536 wr32(E1000_RCTL, rctl);
3537
3538 /*
3539 * In order to support SR-IOV and eventually VMDq it is necessary to set
3540 * the VMOLR to enable the appropriate modes. Without this workaround
3541 * we will have issues with VLAN tag stripping not being done for frames
3542 * that are only arriving because we are the default pool
3543 */
3544 if (hw->mac.type < e1000_82576)
3545 return;
3546
3547 vmolr |= rd32(E1000_VMOLR(vfn)) &
3548 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3549 wr32(E1000_VMOLR(vfn), vmolr);
3550 igb_restore_vf_multicasts(adapter);
3551 }
3552
3553 static void igb_check_wvbr(struct igb_adapter *adapter)
3554 {
3555 struct e1000_hw *hw = &adapter->hw;
3556 u32 wvbr = 0;
3557
3558 switch (hw->mac.type) {
3559 case e1000_82576:
3560 case e1000_i350:
3561 if (!(wvbr = rd32(E1000_WVBR)))
3562 return;
3563 break;
3564 default:
3565 break;
3566 }
3567
3568 adapter->wvbr |= wvbr;
3569 }
3570
3571 #define IGB_STAGGERED_QUEUE_OFFSET 8
3572
3573 static void igb_spoof_check(struct igb_adapter *adapter)
3574 {
3575 int j;
3576
3577 if (!adapter->wvbr)
3578 return;
3579
3580 for(j = 0; j < adapter->vfs_allocated_count; j++) {
3581 if (adapter->wvbr & (1 << j) ||
3582 adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3583 dev_warn(&adapter->pdev->dev,
3584 "Spoof event(s) detected on VF %d\n", j);
3585 adapter->wvbr &=
3586 ~((1 << j) |
3587 (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3588 }
3589 }
3590 }
3591
3592 /* Need to wait a few seconds after link up to get diagnostic information from
3593 * the phy */
3594 static void igb_update_phy_info(unsigned long data)
3595 {
3596 struct igb_adapter *adapter = (struct igb_adapter *) data;
3597 igb_get_phy_info(&adapter->hw);
3598 }
3599
3600 /**
3601 * igb_has_link - check shared code for link and determine up/down
3602 * @adapter: pointer to driver private info
3603 **/
3604 bool igb_has_link(struct igb_adapter *adapter)
3605 {
3606 struct e1000_hw *hw = &adapter->hw;
3607 bool link_active = false;
3608 s32 ret_val = 0;
3609
3610 /* get_link_status is set on LSC (link status) interrupt or
3611 * rx sequence error interrupt. get_link_status will stay
3612 * false until the e1000_check_for_link establishes link
3613 * for copper adapters ONLY
3614 */
3615 switch (hw->phy.media_type) {
3616 case e1000_media_type_copper:
3617 if (hw->mac.get_link_status) {
3618 ret_val = hw->mac.ops.check_for_link(hw);
3619 link_active = !hw->mac.get_link_status;
3620 } else {
3621 link_active = true;
3622 }
3623 break;
3624 case e1000_media_type_internal_serdes:
3625 ret_val = hw->mac.ops.check_for_link(hw);
3626 link_active = hw->mac.serdes_has_link;
3627 break;
3628 default:
3629 case e1000_media_type_unknown:
3630 break;
3631 }
3632
3633 return link_active;
3634 }
3635
3636 static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3637 {
3638 bool ret = false;
3639 u32 ctrl_ext, thstat;
3640
3641 /* check for thermal sensor event on i350, copper only */
3642 if (hw->mac.type == e1000_i350) {
3643 thstat = rd32(E1000_THSTAT);
3644 ctrl_ext = rd32(E1000_CTRL_EXT);
3645
3646 if ((hw->phy.media_type == e1000_media_type_copper) &&
3647 !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3648 ret = !!(thstat & event);
3649 }
3650 }
3651
3652 return ret;
3653 }
3654
3655 /**
3656 * igb_watchdog - Timer Call-back
3657 * @data: pointer to adapter cast into an unsigned long
3658 **/
3659 static void igb_watchdog(unsigned long data)
3660 {
3661 struct igb_adapter *adapter = (struct igb_adapter *)data;
3662 /* Do the rest outside of interrupt context */
3663 schedule_work(&adapter->watchdog_task);
3664 }
3665
3666 static void igb_watchdog_task(struct work_struct *work)
3667 {
3668 struct igb_adapter *adapter = container_of(work,
3669 struct igb_adapter,
3670 watchdog_task);
3671 struct e1000_hw *hw = &adapter->hw;
3672 struct net_device *netdev = adapter->netdev;
3673 u32 link;
3674 int i;
3675
3676 link = igb_has_link(adapter);
3677 if (link) {
3678 /* Cancel scheduled suspend requests. */
3679 pm_runtime_resume(netdev->dev.parent);
3680
3681 if (!netif_carrier_ok(netdev)) {
3682 u32 ctrl;
3683 hw->mac.ops.get_speed_and_duplex(hw,
3684 &adapter->link_speed,
3685 &adapter->link_duplex);
3686
3687 ctrl = rd32(E1000_CTRL);
3688 /* Links status message must follow this format */
3689 printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s "
3690 "Duplex, Flow Control: %s\n",
3691 netdev->name,
3692 adapter->link_speed,
3693 adapter->link_duplex == FULL_DUPLEX ?
3694 "Full" : "Half",
3695 (ctrl & E1000_CTRL_TFCE) &&
3696 (ctrl & E1000_CTRL_RFCE) ? "RX/TX" :
3697 (ctrl & E1000_CTRL_RFCE) ? "RX" :
3698 (ctrl & E1000_CTRL_TFCE) ? "TX" : "None");
3699
3700 /* check for thermal sensor event */
3701 if (igb_thermal_sensor_event(hw,
3702 E1000_THSTAT_LINK_THROTTLE)) {
3703 netdev_info(netdev, "The network adapter link "
3704 "speed was downshifted because it "
3705 "overheated\n");
3706 }
3707
3708 /* adjust timeout factor according to speed/duplex */
3709 adapter->tx_timeout_factor = 1;
3710 switch (adapter->link_speed) {
3711 case SPEED_10:
3712 adapter->tx_timeout_factor = 14;
3713 break;
3714 case SPEED_100:
3715 /* maybe add some timeout factor ? */
3716 break;
3717 }
3718
3719 netif_carrier_on(netdev);
3720
3721 igb_ping_all_vfs(adapter);
3722 igb_check_vf_rate_limit(adapter);
3723
3724 /* link state has changed, schedule phy info update */
3725 if (!test_bit(__IGB_DOWN, &adapter->state))
3726 mod_timer(&adapter->phy_info_timer,
3727 round_jiffies(jiffies + 2 * HZ));
3728 }
3729 } else {
3730 if (netif_carrier_ok(netdev)) {
3731 adapter->link_speed = 0;
3732 adapter->link_duplex = 0;
3733
3734 /* check for thermal sensor event */
3735 if (igb_thermal_sensor_event(hw,
3736 E1000_THSTAT_PWR_DOWN)) {
3737 netdev_err(netdev, "The network adapter was "
3738 "stopped because it overheated\n");
3739 }
3740
3741 /* Links status message must follow this format */
3742 printk(KERN_INFO "igb: %s NIC Link is Down\n",
3743 netdev->name);
3744 netif_carrier_off(netdev);
3745
3746 igb_ping_all_vfs(adapter);
3747
3748 /* link state has changed, schedule phy info update */
3749 if (!test_bit(__IGB_DOWN, &adapter->state))
3750 mod_timer(&adapter->phy_info_timer,
3751 round_jiffies(jiffies + 2 * HZ));
3752
3753 pm_schedule_suspend(netdev->dev.parent,
3754 MSEC_PER_SEC * 5);
3755 }
3756 }
3757
3758 spin_lock(&adapter->stats64_lock);
3759 igb_update_stats(adapter, &adapter->stats64);
3760 spin_unlock(&adapter->stats64_lock);
3761
3762 for (i = 0; i < adapter->num_tx_queues; i++) {
3763 struct igb_ring *tx_ring = adapter->tx_ring[i];
3764 if (!netif_carrier_ok(netdev)) {
3765 /* We've lost link, so the controller stops DMA,
3766 * but we've got queued Tx work that's never going
3767 * to get done, so reset controller to flush Tx.
3768 * (Do the reset outside of interrupt context). */
3769 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3770 adapter->tx_timeout_count++;
3771 schedule_work(&adapter->reset_task);
3772 /* return immediately since reset is imminent */
3773 return;
3774 }
3775 }
3776
3777 /* Force detection of hung controller every watchdog period */
3778 set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
3779 }
3780
3781 /* Cause software interrupt to ensure rx ring is cleaned */
3782 if (adapter->msix_entries) {
3783 u32 eics = 0;
3784 for (i = 0; i < adapter->num_q_vectors; i++)
3785 eics |= adapter->q_vector[i]->eims_value;
3786 wr32(E1000_EICS, eics);
3787 } else {
3788 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3789 }
3790
3791 igb_spoof_check(adapter);
3792
3793 /* Reset the timer */
3794 if (!test_bit(__IGB_DOWN, &adapter->state))
3795 mod_timer(&adapter->watchdog_timer,
3796 round_jiffies(jiffies + 2 * HZ));
3797 }
3798
3799 enum latency_range {
3800 lowest_latency = 0,
3801 low_latency = 1,
3802 bulk_latency = 2,
3803 latency_invalid = 255
3804 };
3805
3806 /**
3807 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3808 *
3809 * Stores a new ITR value based on strictly on packet size. This
3810 * algorithm is less sophisticated than that used in igb_update_itr,
3811 * due to the difficulty of synchronizing statistics across multiple
3812 * receive rings. The divisors and thresholds used by this function
3813 * were determined based on theoretical maximum wire speed and testing
3814 * data, in order to minimize response time while increasing bulk
3815 * throughput.
3816 * This functionality is controlled by the InterruptThrottleRate module
3817 * parameter (see igb_param.c)
3818 * NOTE: This function is called only when operating in a multiqueue
3819 * receive environment.
3820 * @q_vector: pointer to q_vector
3821 **/
3822 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3823 {
3824 int new_val = q_vector->itr_val;
3825 int avg_wire_size = 0;
3826 struct igb_adapter *adapter = q_vector->adapter;
3827 unsigned int packets;
3828
3829 /* For non-gigabit speeds, just fix the interrupt rate at 4000
3830 * ints/sec - ITR timer value of 120 ticks.
3831 */
3832 if (adapter->link_speed != SPEED_1000) {
3833 new_val = IGB_4K_ITR;
3834 goto set_itr_val;
3835 }
3836
3837 packets = q_vector->rx.total_packets;
3838 if (packets)
3839 avg_wire_size = q_vector->rx.total_bytes / packets;
3840
3841 packets = q_vector->tx.total_packets;
3842 if (packets)
3843 avg_wire_size = max_t(u32, avg_wire_size,
3844 q_vector->tx.total_bytes / packets);
3845
3846 /* if avg_wire_size isn't set no work was done */
3847 if (!avg_wire_size)
3848 goto clear_counts;
3849
3850 /* Add 24 bytes to size to account for CRC, preamble, and gap */
3851 avg_wire_size += 24;
3852
3853 /* Don't starve jumbo frames */
3854 avg_wire_size = min(avg_wire_size, 3000);
3855
3856 /* Give a little boost to mid-size frames */
3857 if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3858 new_val = avg_wire_size / 3;
3859 else
3860 new_val = avg_wire_size / 2;
3861
3862 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3863 if (new_val < IGB_20K_ITR &&
3864 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3865 (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3866 new_val = IGB_20K_ITR;
3867
3868 set_itr_val:
3869 if (new_val != q_vector->itr_val) {
3870 q_vector->itr_val = new_val;
3871 q_vector->set_itr = 1;
3872 }
3873 clear_counts:
3874 q_vector->rx.total_bytes = 0;
3875 q_vector->rx.total_packets = 0;
3876 q_vector->tx.total_bytes = 0;
3877 q_vector->tx.total_packets = 0;
3878 }
3879
3880 /**
3881 * igb_update_itr - update the dynamic ITR value based on statistics
3882 * Stores a new ITR value based on packets and byte
3883 * counts during the last interrupt. The advantage of per interrupt
3884 * computation is faster updates and more accurate ITR for the current
3885 * traffic pattern. Constants in this function were computed
3886 * based on theoretical maximum wire speed and thresholds were set based
3887 * on testing data as well as attempting to minimize response time
3888 * while increasing bulk throughput.
3889 * this functionality is controlled by the InterruptThrottleRate module
3890 * parameter (see igb_param.c)
3891 * NOTE: These calculations are only valid when operating in a single-
3892 * queue environment.
3893 * @q_vector: pointer to q_vector
3894 * @ring_container: ring info to update the itr for
3895 **/
3896 static void igb_update_itr(struct igb_q_vector *q_vector,
3897 struct igb_ring_container *ring_container)
3898 {
3899 unsigned int packets = ring_container->total_packets;
3900 unsigned int bytes = ring_container->total_bytes;
3901 u8 itrval = ring_container->itr;
3902
3903 /* no packets, exit with status unchanged */
3904 if (packets == 0)
3905 return;
3906
3907 switch (itrval) {
3908 case lowest_latency:
3909 /* handle TSO and jumbo frames */
3910 if (bytes/packets > 8000)
3911 itrval = bulk_latency;
3912 else if ((packets < 5) && (bytes > 512))
3913 itrval = low_latency;
3914 break;
3915 case low_latency: /* 50 usec aka 20000 ints/s */
3916 if (bytes > 10000) {
3917 /* this if handles the TSO accounting */
3918 if (bytes/packets > 8000) {
3919 itrval = bulk_latency;
3920 } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3921 itrval = bulk_latency;
3922 } else if ((packets > 35)) {
3923 itrval = lowest_latency;
3924 }
3925 } else if (bytes/packets > 2000) {
3926 itrval = bulk_latency;
3927 } else if (packets <= 2 && bytes < 512) {
3928 itrval = lowest_latency;
3929 }
3930 break;
3931 case bulk_latency: /* 250 usec aka 4000 ints/s */
3932 if (bytes > 25000) {
3933 if (packets > 35)
3934 itrval = low_latency;
3935 } else if (bytes < 1500) {
3936 itrval = low_latency;
3937 }
3938 break;
3939 }
3940
3941 /* clear work counters since we have the values we need */
3942 ring_container->total_bytes = 0;
3943 ring_container->total_packets = 0;
3944
3945 /* write updated itr to ring container */
3946 ring_container->itr = itrval;
3947 }
3948
3949 static void igb_set_itr(struct igb_q_vector *q_vector)
3950 {
3951 struct igb_adapter *adapter = q_vector->adapter;
3952 u32 new_itr = q_vector->itr_val;
3953 u8 current_itr = 0;
3954
3955 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3956 if (adapter->link_speed != SPEED_1000) {
3957 current_itr = 0;
3958 new_itr = IGB_4K_ITR;
3959 goto set_itr_now;
3960 }
3961
3962 igb_update_itr(q_vector, &q_vector->tx);
3963 igb_update_itr(q_vector, &q_vector->rx);
3964
3965 current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
3966
3967 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3968 if (current_itr == lowest_latency &&
3969 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3970 (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3971 current_itr = low_latency;
3972
3973 switch (current_itr) {
3974 /* counts and packets in update_itr are dependent on these numbers */
3975 case lowest_latency:
3976 new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
3977 break;
3978 case low_latency:
3979 new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
3980 break;
3981 case bulk_latency:
3982 new_itr = IGB_4K_ITR; /* 4,000 ints/sec */
3983 break;
3984 default:
3985 break;
3986 }
3987
3988 set_itr_now:
3989 if (new_itr != q_vector->itr_val) {
3990 /* this attempts to bias the interrupt rate towards Bulk
3991 * by adding intermediate steps when interrupt rate is
3992 * increasing */
3993 new_itr = new_itr > q_vector->itr_val ?
3994 max((new_itr * q_vector->itr_val) /
3995 (new_itr + (q_vector->itr_val >> 2)),
3996 new_itr) :
3997 new_itr;
3998 /* Don't write the value here; it resets the adapter's
3999 * internal timer, and causes us to delay far longer than
4000 * we should between interrupts. Instead, we write the ITR
4001 * value at the beginning of the next interrupt so the timing
4002 * ends up being correct.
4003 */
4004 q_vector->itr_val = new_itr;
4005 q_vector->set_itr = 1;
4006 }
4007 }
4008
4009 static void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
4010 u32 type_tucmd, u32 mss_l4len_idx)
4011 {
4012 struct e1000_adv_tx_context_desc *context_desc;
4013 u16 i = tx_ring->next_to_use;
4014
4015 context_desc = IGB_TX_CTXTDESC(tx_ring, i);
4016
4017 i++;
4018 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
4019
4020 /* set bits to identify this as an advanced context descriptor */
4021 type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
4022
4023 /* For 82575, context index must be unique per ring. */
4024 if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4025 mss_l4len_idx |= tx_ring->reg_idx << 4;
4026
4027 context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens);
4028 context_desc->seqnum_seed = 0;
4029 context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd);
4030 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
4031 }
4032
4033 static int igb_tso(struct igb_ring *tx_ring,
4034 struct igb_tx_buffer *first,
4035 u8 *hdr_len)
4036 {
4037 struct sk_buff *skb = first->skb;
4038 u32 vlan_macip_lens, type_tucmd;
4039 u32 mss_l4len_idx, l4len;
4040
4041 if (!skb_is_gso(skb))
4042 return 0;
4043
4044 if (skb_header_cloned(skb)) {
4045 int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
4046 if (err)
4047 return err;
4048 }
4049
4050 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4051 type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
4052
4053 if (first->protocol == __constant_htons(ETH_P_IP)) {
4054 struct iphdr *iph = ip_hdr(skb);
4055 iph->tot_len = 0;
4056 iph->check = 0;
4057 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4058 iph->daddr, 0,
4059 IPPROTO_TCP,
4060 0);
4061 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4062 first->tx_flags |= IGB_TX_FLAGS_TSO |
4063 IGB_TX_FLAGS_CSUM |
4064 IGB_TX_FLAGS_IPV4;
4065 } else if (skb_is_gso_v6(skb)) {
4066 ipv6_hdr(skb)->payload_len = 0;
4067 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4068 &ipv6_hdr(skb)->daddr,
4069 0, IPPROTO_TCP, 0);
4070 first->tx_flags |= IGB_TX_FLAGS_TSO |
4071 IGB_TX_FLAGS_CSUM;
4072 }
4073
4074 /* compute header lengths */
4075 l4len = tcp_hdrlen(skb);
4076 *hdr_len = skb_transport_offset(skb) + l4len;
4077
4078 /* update gso size and bytecount with header size */
4079 first->gso_segs = skb_shinfo(skb)->gso_segs;
4080 first->bytecount += (first->gso_segs - 1) * *hdr_len;
4081
4082 /* MSS L4LEN IDX */
4083 mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
4084 mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4085
4086 /* VLAN MACLEN IPLEN */
4087 vlan_macip_lens = skb_network_header_len(skb);
4088 vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4089 vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4090
4091 igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4092
4093 return 1;
4094 }
4095
4096 static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
4097 {
4098 struct sk_buff *skb = first->skb;
4099 u32 vlan_macip_lens = 0;
4100 u32 mss_l4len_idx = 0;
4101 u32 type_tucmd = 0;
4102
4103 if (skb->ip_summed != CHECKSUM_PARTIAL) {
4104 if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
4105 return;
4106 } else {
4107 u8 l4_hdr = 0;
4108 switch (first->protocol) {
4109 case __constant_htons(ETH_P_IP):
4110 vlan_macip_lens |= skb_network_header_len(skb);
4111 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4112 l4_hdr = ip_hdr(skb)->protocol;
4113 break;
4114 case __constant_htons(ETH_P_IPV6):
4115 vlan_macip_lens |= skb_network_header_len(skb);
4116 l4_hdr = ipv6_hdr(skb)->nexthdr;
4117 break;
4118 default:
4119 if (unlikely(net_ratelimit())) {
4120 dev_warn(tx_ring->dev,
4121 "partial checksum but proto=%x!\n",
4122 first->protocol);
4123 }
4124 break;
4125 }
4126
4127 switch (l4_hdr) {
4128 case IPPROTO_TCP:
4129 type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4130 mss_l4len_idx = tcp_hdrlen(skb) <<
4131 E1000_ADVTXD_L4LEN_SHIFT;
4132 break;
4133 case IPPROTO_SCTP:
4134 type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4135 mss_l4len_idx = sizeof(struct sctphdr) <<
4136 E1000_ADVTXD_L4LEN_SHIFT;
4137 break;
4138 case IPPROTO_UDP:
4139 mss_l4len_idx = sizeof(struct udphdr) <<
4140 E1000_ADVTXD_L4LEN_SHIFT;
4141 break;
4142 default:
4143 if (unlikely(net_ratelimit())) {
4144 dev_warn(tx_ring->dev,
4145 "partial checksum but l4 proto=%x!\n",
4146 l4_hdr);
4147 }
4148 break;
4149 }
4150
4151 /* update TX checksum flag */
4152 first->tx_flags |= IGB_TX_FLAGS_CSUM;
4153 }
4154
4155 vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4156 vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4157
4158 igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4159 }
4160
4161 static __le32 igb_tx_cmd_type(u32 tx_flags)
4162 {
4163 /* set type for advanced descriptor with frame checksum insertion */
4164 __le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA |
4165 E1000_ADVTXD_DCMD_IFCS |
4166 E1000_ADVTXD_DCMD_DEXT);
4167
4168 /* set HW vlan bit if vlan is present */
4169 if (tx_flags & IGB_TX_FLAGS_VLAN)
4170 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE);
4171
4172 /* set timestamp bit if present */
4173 if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4174 cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP);
4175
4176 /* set segmentation bits for TSO */
4177 if (tx_flags & IGB_TX_FLAGS_TSO)
4178 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE);
4179
4180 return cmd_type;
4181 }
4182
4183 static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
4184 union e1000_adv_tx_desc *tx_desc,
4185 u32 tx_flags, unsigned int paylen)
4186 {
4187 u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
4188
4189 /* 82575 requires a unique index per ring if any offload is enabled */
4190 if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) &&
4191 test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4192 olinfo_status |= tx_ring->reg_idx << 4;
4193
4194 /* insert L4 checksum */
4195 if (tx_flags & IGB_TX_FLAGS_CSUM) {
4196 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4197
4198 /* insert IPv4 checksum */
4199 if (tx_flags & IGB_TX_FLAGS_IPV4)
4200 olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4201 }
4202
4203 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4204 }
4205
4206 /*
4207 * The largest size we can write to the descriptor is 65535. In order to
4208 * maintain a power of two alignment we have to limit ourselves to 32K.
4209 */
4210 #define IGB_MAX_TXD_PWR 15
4211 #define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR)
4212
4213 static void igb_tx_map(struct igb_ring *tx_ring,
4214 struct igb_tx_buffer *first,
4215 const u8 hdr_len)
4216 {
4217 struct sk_buff *skb = first->skb;
4218 struct igb_tx_buffer *tx_buffer_info;
4219 union e1000_adv_tx_desc *tx_desc;
4220 dma_addr_t dma;
4221 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
4222 unsigned int data_len = skb->data_len;
4223 unsigned int size = skb_headlen(skb);
4224 unsigned int paylen = skb->len - hdr_len;
4225 __le32 cmd_type;
4226 u32 tx_flags = first->tx_flags;
4227 u16 i = tx_ring->next_to_use;
4228
4229 tx_desc = IGB_TX_DESC(tx_ring, i);
4230
4231 igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen);
4232 cmd_type = igb_tx_cmd_type(tx_flags);
4233
4234 dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4235 if (dma_mapping_error(tx_ring->dev, dma))
4236 goto dma_error;
4237
4238 /* record length, and DMA address */
4239 first->length = size;
4240 first->dma = dma;
4241 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4242
4243 for (;;) {
4244 while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
4245 tx_desc->read.cmd_type_len =
4246 cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD);
4247
4248 i++;
4249 tx_desc++;
4250 if (i == tx_ring->count) {
4251 tx_desc = IGB_TX_DESC(tx_ring, 0);
4252 i = 0;
4253 }
4254
4255 dma += IGB_MAX_DATA_PER_TXD;
4256 size -= IGB_MAX_DATA_PER_TXD;
4257
4258 tx_desc->read.olinfo_status = 0;
4259 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4260 }
4261
4262 if (likely(!data_len))
4263 break;
4264
4265 tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
4266
4267 i++;
4268 tx_desc++;
4269 if (i == tx_ring->count) {
4270 tx_desc = IGB_TX_DESC(tx_ring, 0);
4271 i = 0;
4272 }
4273
4274 size = skb_frag_size(frag);
4275 data_len -= size;
4276
4277 dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
4278 size, DMA_TO_DEVICE);
4279 if (dma_mapping_error(tx_ring->dev, dma))
4280 goto dma_error;
4281
4282 tx_buffer_info = &tx_ring->tx_buffer_info[i];
4283 tx_buffer_info->length = size;
4284 tx_buffer_info->dma = dma;
4285
4286 tx_desc->read.olinfo_status = 0;
4287 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4288
4289 frag++;
4290 }
4291
4292 netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
4293
4294 /* write last descriptor with RS and EOP bits */
4295 cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD);
4296 tx_desc->read.cmd_type_len = cmd_type;
4297
4298 /* set the timestamp */
4299 first->time_stamp = jiffies;
4300
4301 /*
4302 * Force memory writes to complete before letting h/w know there
4303 * are new descriptors to fetch. (Only applicable for weak-ordered
4304 * memory model archs, such as IA-64).
4305 *
4306 * We also need this memory barrier to make certain all of the
4307 * status bits have been updated before next_to_watch is written.
4308 */
4309 wmb();
4310
4311 /* set next_to_watch value indicating a packet is present */
4312 first->next_to_watch = tx_desc;
4313
4314 i++;
4315 if (i == tx_ring->count)
4316 i = 0;
4317
4318 tx_ring->next_to_use = i;
4319
4320 writel(i, tx_ring->tail);
4321
4322 /* we need this if more than one processor can write to our tail
4323 * at a time, it syncronizes IO on IA64/Altix systems */
4324 mmiowb();
4325
4326 return;
4327
4328 dma_error:
4329 dev_err(tx_ring->dev, "TX DMA map failed\n");
4330
4331 /* clear dma mappings for failed tx_buffer_info map */
4332 for (;;) {
4333 tx_buffer_info = &tx_ring->tx_buffer_info[i];
4334 igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
4335 if (tx_buffer_info == first)
4336 break;
4337 if (i == 0)
4338 i = tx_ring->count;
4339 i--;
4340 }
4341
4342 tx_ring->next_to_use = i;
4343 }
4344
4345 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4346 {
4347 struct net_device *netdev = tx_ring->netdev;
4348
4349 netif_stop_subqueue(netdev, tx_ring->queue_index);
4350
4351 /* Herbert's original patch had:
4352 * smp_mb__after_netif_stop_queue();
4353 * but since that doesn't exist yet, just open code it. */
4354 smp_mb();
4355
4356 /* We need to check again in a case another CPU has just
4357 * made room available. */
4358 if (igb_desc_unused(tx_ring) < size)
4359 return -EBUSY;
4360
4361 /* A reprieve! */
4362 netif_wake_subqueue(netdev, tx_ring->queue_index);
4363
4364 u64_stats_update_begin(&tx_ring->tx_syncp2);
4365 tx_ring->tx_stats.restart_queue2++;
4366 u64_stats_update_end(&tx_ring->tx_syncp2);
4367
4368 return 0;
4369 }
4370
4371 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4372 {
4373 if (igb_desc_unused(tx_ring) >= size)
4374 return 0;
4375 return __igb_maybe_stop_tx(tx_ring, size);
4376 }
4377
4378 netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4379 struct igb_ring *tx_ring)
4380 {
4381 struct igb_tx_buffer *first;
4382 int tso;
4383 u32 tx_flags = 0;
4384 __be16 protocol = vlan_get_protocol(skb);
4385 u8 hdr_len = 0;
4386
4387 /* need: 1 descriptor per page,
4388 * + 2 desc gap to keep tail from touching head,
4389 * + 1 desc for skb->data,
4390 * + 1 desc for context descriptor,
4391 * otherwise try next time */
4392 if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4393 /* this is a hard error */
4394 return NETDEV_TX_BUSY;
4395 }
4396
4397 /* record the location of the first descriptor for this packet */
4398 first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
4399 first->skb = skb;
4400 first->bytecount = skb->len;
4401 first->gso_segs = 1;
4402
4403 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4404 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4405 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4406 }
4407
4408 if (vlan_tx_tag_present(skb)) {
4409 tx_flags |= IGB_TX_FLAGS_VLAN;
4410 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4411 }
4412
4413 /* record initial flags and protocol */
4414 first->tx_flags = tx_flags;
4415 first->protocol = protocol;
4416
4417 tso = igb_tso(tx_ring, first, &hdr_len);
4418 if (tso < 0)
4419 goto out_drop;
4420 else if (!tso)
4421 igb_tx_csum(tx_ring, first);
4422
4423 igb_tx_map(tx_ring, first, hdr_len);
4424
4425 /* Make sure there is space in the ring for the next send. */
4426 igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4427
4428 return NETDEV_TX_OK;
4429
4430 out_drop:
4431 igb_unmap_and_free_tx_resource(tx_ring, first);
4432
4433 return NETDEV_TX_OK;
4434 }
4435
4436 static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4437 struct sk_buff *skb)
4438 {
4439 unsigned int r_idx = skb->queue_mapping;
4440
4441 if (r_idx >= adapter->num_tx_queues)
4442 r_idx = r_idx % adapter->num_tx_queues;
4443
4444 return adapter->tx_ring[r_idx];
4445 }
4446
4447 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4448 struct net_device *netdev)
4449 {
4450 struct igb_adapter *adapter = netdev_priv(netdev);
4451
4452 if (test_bit(__IGB_DOWN, &adapter->state)) {
4453 dev_kfree_skb_any(skb);
4454 return NETDEV_TX_OK;
4455 }
4456
4457 if (skb->len <= 0) {
4458 dev_kfree_skb_any(skb);
4459 return NETDEV_TX_OK;
4460 }
4461
4462 /*
4463 * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4464 * in order to meet this minimum size requirement.
4465 */
4466 if (skb->len < 17) {
4467 if (skb_padto(skb, 17))
4468 return NETDEV_TX_OK;
4469 skb->len = 17;
4470 }
4471
4472 return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4473 }
4474
4475 /**
4476 * igb_tx_timeout - Respond to a Tx Hang
4477 * @netdev: network interface device structure
4478 **/
4479 static void igb_tx_timeout(struct net_device *netdev)
4480 {
4481 struct igb_adapter *adapter = netdev_priv(netdev);
4482 struct e1000_hw *hw = &adapter->hw;
4483
4484 /* Do the reset outside of interrupt context */
4485 adapter->tx_timeout_count++;
4486
4487 if (hw->mac.type >= e1000_82580)
4488 hw->dev_spec._82575.global_device_reset = true;
4489
4490 schedule_work(&adapter->reset_task);
4491 wr32(E1000_EICS,
4492 (adapter->eims_enable_mask & ~adapter->eims_other));
4493 }
4494
4495 static void igb_reset_task(struct work_struct *work)
4496 {
4497 struct igb_adapter *adapter;
4498 adapter = container_of(work, struct igb_adapter, reset_task);
4499
4500 igb_dump(adapter);
4501 netdev_err(adapter->netdev, "Reset adapter\n");
4502 igb_reinit_locked(adapter);
4503 }
4504
4505 /**
4506 * igb_get_stats64 - Get System Network Statistics
4507 * @netdev: network interface device structure
4508 * @stats: rtnl_link_stats64 pointer
4509 *
4510 **/
4511 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4512 struct rtnl_link_stats64 *stats)
4513 {
4514 struct igb_adapter *adapter = netdev_priv(netdev);
4515
4516 spin_lock(&adapter->stats64_lock);
4517 igb_update_stats(adapter, &adapter->stats64);
4518 memcpy(stats, &adapter->stats64, sizeof(*stats));
4519 spin_unlock(&adapter->stats64_lock);
4520
4521 return stats;
4522 }
4523
4524 /**
4525 * igb_change_mtu - Change the Maximum Transfer Unit
4526 * @netdev: network interface device structure
4527 * @new_mtu: new value for maximum frame size
4528 *
4529 * Returns 0 on success, negative on failure
4530 **/
4531 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4532 {
4533 struct igb_adapter *adapter = netdev_priv(netdev);
4534 struct pci_dev *pdev = adapter->pdev;
4535 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4536
4537 if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4538 dev_err(&pdev->dev, "Invalid MTU setting\n");
4539 return -EINVAL;
4540 }
4541
4542 #define MAX_STD_JUMBO_FRAME_SIZE 9238
4543 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4544 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4545 return -EINVAL;
4546 }
4547
4548 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4549 msleep(1);
4550
4551 /* igb_down has a dependency on max_frame_size */
4552 adapter->max_frame_size = max_frame;
4553
4554 if (netif_running(netdev))
4555 igb_down(adapter);
4556
4557 dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4558 netdev->mtu, new_mtu);
4559 netdev->mtu = new_mtu;
4560
4561 if (netif_running(netdev))
4562 igb_up(adapter);
4563 else
4564 igb_reset(adapter);
4565
4566 clear_bit(__IGB_RESETTING, &adapter->state);
4567
4568 return 0;
4569 }
4570
4571 /**
4572 * igb_update_stats - Update the board statistics counters
4573 * @adapter: board private structure
4574 **/
4575
4576 void igb_update_stats(struct igb_adapter *adapter,
4577 struct rtnl_link_stats64 *net_stats)
4578 {
4579 struct e1000_hw *hw = &adapter->hw;
4580 struct pci_dev *pdev = adapter->pdev;
4581 u32 reg, mpc;
4582 u16 phy_tmp;
4583 int i;
4584 u64 bytes, packets;
4585 unsigned int start;
4586 u64 _bytes, _packets;
4587
4588 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4589
4590 /*
4591 * Prevent stats update while adapter is being reset, or if the pci
4592 * connection is down.
4593 */
4594 if (adapter->link_speed == 0)
4595 return;
4596 if (pci_channel_offline(pdev))
4597 return;
4598
4599 bytes = 0;
4600 packets = 0;
4601 for (i = 0; i < adapter->num_rx_queues; i++) {
4602 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4603 struct igb_ring *ring = adapter->rx_ring[i];
4604
4605 ring->rx_stats.drops += rqdpc_tmp;
4606 net_stats->rx_fifo_errors += rqdpc_tmp;
4607
4608 do {
4609 start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4610 _bytes = ring->rx_stats.bytes;
4611 _packets = ring->rx_stats.packets;
4612 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4613 bytes += _bytes;
4614 packets += _packets;
4615 }
4616
4617 net_stats->rx_bytes = bytes;
4618 net_stats->rx_packets = packets;
4619
4620 bytes = 0;
4621 packets = 0;
4622 for (i = 0; i < adapter->num_tx_queues; i++) {
4623 struct igb_ring *ring = adapter->tx_ring[i];
4624 do {
4625 start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4626 _bytes = ring->tx_stats.bytes;
4627 _packets = ring->tx_stats.packets;
4628 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4629 bytes += _bytes;
4630 packets += _packets;
4631 }
4632 net_stats->tx_bytes = bytes;
4633 net_stats->tx_packets = packets;
4634
4635 /* read stats registers */
4636 adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4637 adapter->stats.gprc += rd32(E1000_GPRC);
4638 adapter->stats.gorc += rd32(E1000_GORCL);
4639 rd32(E1000_GORCH); /* clear GORCL */
4640 adapter->stats.bprc += rd32(E1000_BPRC);
4641 adapter->stats.mprc += rd32(E1000_MPRC);
4642 adapter->stats.roc += rd32(E1000_ROC);
4643
4644 adapter->stats.prc64 += rd32(E1000_PRC64);
4645 adapter->stats.prc127 += rd32(E1000_PRC127);
4646 adapter->stats.prc255 += rd32(E1000_PRC255);
4647 adapter->stats.prc511 += rd32(E1000_PRC511);
4648 adapter->stats.prc1023 += rd32(E1000_PRC1023);
4649 adapter->stats.prc1522 += rd32(E1000_PRC1522);
4650 adapter->stats.symerrs += rd32(E1000_SYMERRS);
4651 adapter->stats.sec += rd32(E1000_SEC);
4652
4653 mpc = rd32(E1000_MPC);
4654 adapter->stats.mpc += mpc;
4655 net_stats->rx_fifo_errors += mpc;
4656 adapter->stats.scc += rd32(E1000_SCC);
4657 adapter->stats.ecol += rd32(E1000_ECOL);
4658 adapter->stats.mcc += rd32(E1000_MCC);
4659 adapter->stats.latecol += rd32(E1000_LATECOL);
4660 adapter->stats.dc += rd32(E1000_DC);
4661 adapter->stats.rlec += rd32(E1000_RLEC);
4662 adapter->stats.xonrxc += rd32(E1000_XONRXC);
4663 adapter->stats.xontxc += rd32(E1000_XONTXC);
4664 adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4665 adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4666 adapter->stats.fcruc += rd32(E1000_FCRUC);
4667 adapter->stats.gptc += rd32(E1000_GPTC);
4668 adapter->stats.gotc += rd32(E1000_GOTCL);
4669 rd32(E1000_GOTCH); /* clear GOTCL */
4670 adapter->stats.rnbc += rd32(E1000_RNBC);
4671 adapter->stats.ruc += rd32(E1000_RUC);
4672 adapter->stats.rfc += rd32(E1000_RFC);
4673 adapter->stats.rjc += rd32(E1000_RJC);
4674 adapter->stats.tor += rd32(E1000_TORH);
4675 adapter->stats.tot += rd32(E1000_TOTH);
4676 adapter->stats.tpr += rd32(E1000_TPR);
4677
4678 adapter->stats.ptc64 += rd32(E1000_PTC64);
4679 adapter->stats.ptc127 += rd32(E1000_PTC127);
4680 adapter->stats.ptc255 += rd32(E1000_PTC255);
4681 adapter->stats.ptc511 += rd32(E1000_PTC511);
4682 adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4683 adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4684
4685 adapter->stats.mptc += rd32(E1000_MPTC);
4686 adapter->stats.bptc += rd32(E1000_BPTC);
4687
4688 adapter->stats.tpt += rd32(E1000_TPT);
4689 adapter->stats.colc += rd32(E1000_COLC);
4690
4691 adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4692 /* read internal phy specific stats */
4693 reg = rd32(E1000_CTRL_EXT);
4694 if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4695 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4696 adapter->stats.tncrs += rd32(E1000_TNCRS);
4697 }
4698
4699 adapter->stats.tsctc += rd32(E1000_TSCTC);
4700 adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4701
4702 adapter->stats.iac += rd32(E1000_IAC);
4703 adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4704 adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4705 adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4706 adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4707 adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4708 adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4709 adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4710 adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4711
4712 /* Fill out the OS statistics structure */
4713 net_stats->multicast = adapter->stats.mprc;
4714 net_stats->collisions = adapter->stats.colc;
4715
4716 /* Rx Errors */
4717
4718 /* RLEC on some newer hardware can be incorrect so build
4719 * our own version based on RUC and ROC */
4720 net_stats->rx_errors = adapter->stats.rxerrc +
4721 adapter->stats.crcerrs + adapter->stats.algnerrc +
4722 adapter->stats.ruc + adapter->stats.roc +
4723 adapter->stats.cexterr;
4724 net_stats->rx_length_errors = adapter->stats.ruc +
4725 adapter->stats.roc;
4726 net_stats->rx_crc_errors = adapter->stats.crcerrs;
4727 net_stats->rx_frame_errors = adapter->stats.algnerrc;
4728 net_stats->rx_missed_errors = adapter->stats.mpc;
4729
4730 /* Tx Errors */
4731 net_stats->tx_errors = adapter->stats.ecol +
4732 adapter->stats.latecol;
4733 net_stats->tx_aborted_errors = adapter->stats.ecol;
4734 net_stats->tx_window_errors = adapter->stats.latecol;
4735 net_stats->tx_carrier_errors = adapter->stats.tncrs;
4736
4737 /* Tx Dropped needs to be maintained elsewhere */
4738
4739 /* Phy Stats */
4740 if (hw->phy.media_type == e1000_media_type_copper) {
4741 if ((adapter->link_speed == SPEED_1000) &&
4742 (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4743 phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4744 adapter->phy_stats.idle_errors += phy_tmp;
4745 }
4746 }
4747
4748 /* Management Stats */
4749 adapter->stats.mgptc += rd32(E1000_MGTPTC);
4750 adapter->stats.mgprc += rd32(E1000_MGTPRC);
4751 adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4752
4753 /* OS2BMC Stats */
4754 reg = rd32(E1000_MANC);
4755 if (reg & E1000_MANC_EN_BMC2OS) {
4756 adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4757 adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4758 adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4759 adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4760 }
4761 }
4762
4763 static irqreturn_t igb_msix_other(int irq, void *data)
4764 {
4765 struct igb_adapter *adapter = data;
4766 struct e1000_hw *hw = &adapter->hw;
4767 u32 icr = rd32(E1000_ICR);
4768 /* reading ICR causes bit 31 of EICR to be cleared */
4769
4770 if (icr & E1000_ICR_DRSTA)
4771 schedule_work(&adapter->reset_task);
4772
4773 if (icr & E1000_ICR_DOUTSYNC) {
4774 /* HW is reporting DMA is out of sync */
4775 adapter->stats.doosync++;
4776 /* The DMA Out of Sync is also indication of a spoof event
4777 * in IOV mode. Check the Wrong VM Behavior register to
4778 * see if it is really a spoof event. */
4779 igb_check_wvbr(adapter);
4780 }
4781
4782 /* Check for a mailbox event */
4783 if (icr & E1000_ICR_VMMB)
4784 igb_msg_task(adapter);
4785
4786 if (icr & E1000_ICR_LSC) {
4787 hw->mac.get_link_status = 1;
4788 /* guard against interrupt when we're going down */
4789 if (!test_bit(__IGB_DOWN, &adapter->state))
4790 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4791 }
4792
4793 wr32(E1000_EIMS, adapter->eims_other);
4794
4795 return IRQ_HANDLED;
4796 }
4797
4798 static void igb_write_itr(struct igb_q_vector *q_vector)
4799 {
4800 struct igb_adapter *adapter = q_vector->adapter;
4801 u32 itr_val = q_vector->itr_val & 0x7FFC;
4802
4803 if (!q_vector->set_itr)
4804 return;
4805
4806 if (!itr_val)
4807 itr_val = 0x4;
4808
4809 if (adapter->hw.mac.type == e1000_82575)
4810 itr_val |= itr_val << 16;
4811 else
4812 itr_val |= E1000_EITR_CNT_IGNR;
4813
4814 writel(itr_val, q_vector->itr_register);
4815 q_vector->set_itr = 0;
4816 }
4817
4818 static irqreturn_t igb_msix_ring(int irq, void *data)
4819 {
4820 struct igb_q_vector *q_vector = data;
4821
4822 /* Write the ITR value calculated from the previous interrupt. */
4823 igb_write_itr(q_vector);
4824
4825 napi_schedule(&q_vector->napi);
4826
4827 return IRQ_HANDLED;
4828 }
4829
4830 #ifdef CONFIG_IGB_DCA
4831 static void igb_update_dca(struct igb_q_vector *q_vector)
4832 {
4833 struct igb_adapter *adapter = q_vector->adapter;
4834 struct e1000_hw *hw = &adapter->hw;
4835 int cpu = get_cpu();
4836
4837 if (q_vector->cpu == cpu)
4838 goto out_no_update;
4839
4840 if (q_vector->tx.ring) {
4841 int q = q_vector->tx.ring->reg_idx;
4842 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4843 if (hw->mac.type == e1000_82575) {
4844 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4845 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4846 } else {
4847 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4848 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4849 E1000_DCA_TXCTRL_CPUID_SHIFT;
4850 }
4851 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4852 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4853 }
4854 if (q_vector->rx.ring) {
4855 int q = q_vector->rx.ring->reg_idx;
4856 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4857 if (hw->mac.type == e1000_82575) {
4858 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4859 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4860 } else {
4861 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4862 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4863 E1000_DCA_RXCTRL_CPUID_SHIFT;
4864 }
4865 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4866 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4867 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4868 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4869 }
4870 q_vector->cpu = cpu;
4871 out_no_update:
4872 put_cpu();
4873 }
4874
4875 static void igb_setup_dca(struct igb_adapter *adapter)
4876 {
4877 struct e1000_hw *hw = &adapter->hw;
4878 int i;
4879
4880 if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4881 return;
4882
4883 /* Always use CB2 mode, difference is masked in the CB driver. */
4884 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4885
4886 for (i = 0; i < adapter->num_q_vectors; i++) {
4887 adapter->q_vector[i]->cpu = -1;
4888 igb_update_dca(adapter->q_vector[i]);
4889 }
4890 }
4891
4892 static int __igb_notify_dca(struct device *dev, void *data)
4893 {
4894 struct net_device *netdev = dev_get_drvdata(dev);
4895 struct igb_adapter *adapter = netdev_priv(netdev);
4896 struct pci_dev *pdev = adapter->pdev;
4897 struct e1000_hw *hw = &adapter->hw;
4898 unsigned long event = *(unsigned long *)data;
4899
4900 switch (event) {
4901 case DCA_PROVIDER_ADD:
4902 /* if already enabled, don't do it again */
4903 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4904 break;
4905 if (dca_add_requester(dev) == 0) {
4906 adapter->flags |= IGB_FLAG_DCA_ENABLED;
4907 dev_info(&pdev->dev, "DCA enabled\n");
4908 igb_setup_dca(adapter);
4909 break;
4910 }
4911 /* Fall Through since DCA is disabled. */
4912 case DCA_PROVIDER_REMOVE:
4913 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4914 /* without this a class_device is left
4915 * hanging around in the sysfs model */
4916 dca_remove_requester(dev);
4917 dev_info(&pdev->dev, "DCA disabled\n");
4918 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4919 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4920 }
4921 break;
4922 }
4923
4924 return 0;
4925 }
4926
4927 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4928 void *p)
4929 {
4930 int ret_val;
4931
4932 ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4933 __igb_notify_dca);
4934
4935 return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4936 }
4937 #endif /* CONFIG_IGB_DCA */
4938
4939 #ifdef CONFIG_PCI_IOV
4940 static int igb_vf_configure(struct igb_adapter *adapter, int vf)
4941 {
4942 unsigned char mac_addr[ETH_ALEN];
4943 struct pci_dev *pdev = adapter->pdev;
4944 struct e1000_hw *hw = &adapter->hw;
4945 struct pci_dev *pvfdev;
4946 unsigned int device_id;
4947 u16 thisvf_devfn;
4948
4949 random_ether_addr(mac_addr);
4950 igb_set_vf_mac(adapter, vf, mac_addr);
4951
4952 switch (adapter->hw.mac.type) {
4953 case e1000_82576:
4954 device_id = IGB_82576_VF_DEV_ID;
4955 /* VF Stride for 82576 is 2 */
4956 thisvf_devfn = (pdev->devfn + 0x80 + (vf << 1)) |
4957 (pdev->devfn & 1);
4958 break;
4959 case e1000_i350:
4960 device_id = IGB_I350_VF_DEV_ID;
4961 /* VF Stride for I350 is 4 */
4962 thisvf_devfn = (pdev->devfn + 0x80 + (vf << 2)) |
4963 (pdev->devfn & 3);
4964 break;
4965 default:
4966 device_id = 0;
4967 thisvf_devfn = 0;
4968 break;
4969 }
4970
4971 pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
4972 while (pvfdev) {
4973 if (pvfdev->devfn == thisvf_devfn)
4974 break;
4975 pvfdev = pci_get_device(hw->vendor_id,
4976 device_id, pvfdev);
4977 }
4978
4979 if (pvfdev)
4980 adapter->vf_data[vf].vfdev = pvfdev;
4981 else
4982 dev_err(&pdev->dev,
4983 "Couldn't find pci dev ptr for VF %4.4x\n",
4984 thisvf_devfn);
4985 return pvfdev != NULL;
4986 }
4987
4988 static int igb_find_enabled_vfs(struct igb_adapter *adapter)
4989 {
4990 struct e1000_hw *hw = &adapter->hw;
4991 struct pci_dev *pdev = adapter->pdev;
4992 struct pci_dev *pvfdev;
4993 u16 vf_devfn = 0;
4994 u16 vf_stride;
4995 unsigned int device_id;
4996 int vfs_found = 0;
4997
4998 switch (adapter->hw.mac.type) {
4999 case e1000_82576:
5000 device_id = IGB_82576_VF_DEV_ID;
5001 /* VF Stride for 82576 is 2 */
5002 vf_stride = 2;
5003 break;
5004 case e1000_i350:
5005 device_id = IGB_I350_VF_DEV_ID;
5006 /* VF Stride for I350 is 4 */
5007 vf_stride = 4;
5008 break;
5009 default:
5010 device_id = 0;
5011 vf_stride = 0;
5012 break;
5013 }
5014
5015 vf_devfn = pdev->devfn + 0x80;
5016 pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
5017 while (pvfdev) {
5018 if (pvfdev->devfn == vf_devfn &&
5019 (pvfdev->bus->number >= pdev->bus->number))
5020 vfs_found++;
5021 vf_devfn += vf_stride;
5022 pvfdev = pci_get_device(hw->vendor_id,
5023 device_id, pvfdev);
5024 }
5025
5026 return vfs_found;
5027 }
5028
5029 static int igb_check_vf_assignment(struct igb_adapter *adapter)
5030 {
5031 int i;
5032 for (i = 0; i < adapter->vfs_allocated_count; i++) {
5033 if (adapter->vf_data[i].vfdev) {
5034 if (adapter->vf_data[i].vfdev->dev_flags &
5035 PCI_DEV_FLAGS_ASSIGNED)
5036 return true;
5037 }
5038 }
5039 return false;
5040 }
5041
5042 #endif
5043 static void igb_ping_all_vfs(struct igb_adapter *adapter)
5044 {
5045 struct e1000_hw *hw = &adapter->hw;
5046 u32 ping;
5047 int i;
5048
5049 for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
5050 ping = E1000_PF_CONTROL_MSG;
5051 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
5052 ping |= E1000_VT_MSGTYPE_CTS;
5053 igb_write_mbx(hw, &ping, 1, i);
5054 }
5055 }
5056
5057 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5058 {
5059 struct e1000_hw *hw = &adapter->hw;
5060 u32 vmolr = rd32(E1000_VMOLR(vf));
5061 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5062
5063 vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
5064 IGB_VF_FLAG_MULTI_PROMISC);
5065 vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5066
5067 if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
5068 vmolr |= E1000_VMOLR_MPME;
5069 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
5070 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
5071 } else {
5072 /*
5073 * if we have hashes and we are clearing a multicast promisc
5074 * flag we need to write the hashes to the MTA as this step
5075 * was previously skipped
5076 */
5077 if (vf_data->num_vf_mc_hashes > 30) {
5078 vmolr |= E1000_VMOLR_MPME;
5079 } else if (vf_data->num_vf_mc_hashes) {
5080 int j;
5081 vmolr |= E1000_VMOLR_ROMPE;
5082 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5083 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5084 }
5085 }
5086
5087 wr32(E1000_VMOLR(vf), vmolr);
5088
5089 /* there are flags left unprocessed, likely not supported */
5090 if (*msgbuf & E1000_VT_MSGINFO_MASK)
5091 return -EINVAL;
5092
5093 return 0;
5094
5095 }
5096
5097 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
5098 u32 *msgbuf, u32 vf)
5099 {
5100 int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5101 u16 *hash_list = (u16 *)&msgbuf[1];
5102 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5103 int i;
5104
5105 /* salt away the number of multicast addresses assigned
5106 * to this VF for later use to restore when the PF multi cast
5107 * list changes
5108 */
5109 vf_data->num_vf_mc_hashes = n;
5110
5111 /* only up to 30 hash values supported */
5112 if (n > 30)
5113 n = 30;
5114
5115 /* store the hashes for later use */
5116 for (i = 0; i < n; i++)
5117 vf_data->vf_mc_hashes[i] = hash_list[i];
5118
5119 /* Flush and reset the mta with the new values */
5120 igb_set_rx_mode(adapter->netdev);
5121
5122 return 0;
5123 }
5124
5125 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
5126 {
5127 struct e1000_hw *hw = &adapter->hw;
5128 struct vf_data_storage *vf_data;
5129 int i, j;
5130
5131 for (i = 0; i < adapter->vfs_allocated_count; i++) {
5132 u32 vmolr = rd32(E1000_VMOLR(i));
5133 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5134
5135 vf_data = &adapter->vf_data[i];
5136
5137 if ((vf_data->num_vf_mc_hashes > 30) ||
5138 (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5139 vmolr |= E1000_VMOLR_MPME;
5140 } else if (vf_data->num_vf_mc_hashes) {
5141 vmolr |= E1000_VMOLR_ROMPE;
5142 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5143 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5144 }
5145 wr32(E1000_VMOLR(i), vmolr);
5146 }
5147 }
5148
5149 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5150 {
5151 struct e1000_hw *hw = &adapter->hw;
5152 u32 pool_mask, reg, vid;
5153 int i;
5154
5155 pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5156
5157 /* Find the vlan filter for this id */
5158 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5159 reg = rd32(E1000_VLVF(i));
5160
5161 /* remove the vf from the pool */
5162 reg &= ~pool_mask;
5163
5164 /* if pool is empty then remove entry from vfta */
5165 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5166 (reg & E1000_VLVF_VLANID_ENABLE)) {
5167 reg = 0;
5168 vid = reg & E1000_VLVF_VLANID_MASK;
5169 igb_vfta_set(hw, vid, false);
5170 }
5171
5172 wr32(E1000_VLVF(i), reg);
5173 }
5174
5175 adapter->vf_data[vf].vlans_enabled = 0;
5176 }
5177
5178 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5179 {
5180 struct e1000_hw *hw = &adapter->hw;
5181 u32 reg, i;
5182
5183 /* The vlvf table only exists on 82576 hardware and newer */
5184 if (hw->mac.type < e1000_82576)
5185 return -1;
5186
5187 /* we only need to do this if VMDq is enabled */
5188 if (!adapter->vfs_allocated_count)
5189 return -1;
5190
5191 /* Find the vlan filter for this id */
5192 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5193 reg = rd32(E1000_VLVF(i));
5194 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5195 vid == (reg & E1000_VLVF_VLANID_MASK))
5196 break;
5197 }
5198
5199 if (add) {
5200 if (i == E1000_VLVF_ARRAY_SIZE) {
5201 /* Did not find a matching VLAN ID entry that was
5202 * enabled. Search for a free filter entry, i.e.
5203 * one without the enable bit set
5204 */
5205 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5206 reg = rd32(E1000_VLVF(i));
5207 if (!(reg & E1000_VLVF_VLANID_ENABLE))
5208 break;
5209 }
5210 }
5211 if (i < E1000_VLVF_ARRAY_SIZE) {
5212 /* Found an enabled/available entry */
5213 reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5214
5215 /* if !enabled we need to set this up in vfta */
5216 if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5217 /* add VID to filter table */
5218 igb_vfta_set(hw, vid, true);
5219 reg |= E1000_VLVF_VLANID_ENABLE;
5220 }
5221 reg &= ~E1000_VLVF_VLANID_MASK;
5222 reg |= vid;
5223 wr32(E1000_VLVF(i), reg);
5224
5225 /* do not modify RLPML for PF devices */
5226 if (vf >= adapter->vfs_allocated_count)
5227 return 0;
5228
5229 if (!adapter->vf_data[vf].vlans_enabled) {
5230 u32 size;
5231 reg = rd32(E1000_VMOLR(vf));
5232 size = reg & E1000_VMOLR_RLPML_MASK;
5233 size += 4;
5234 reg &= ~E1000_VMOLR_RLPML_MASK;
5235 reg |= size;
5236 wr32(E1000_VMOLR(vf), reg);
5237 }
5238
5239 adapter->vf_data[vf].vlans_enabled++;
5240 }
5241 } else {
5242 if (i < E1000_VLVF_ARRAY_SIZE) {
5243 /* remove vf from the pool */
5244 reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5245 /* if pool is empty then remove entry from vfta */
5246 if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5247 reg = 0;
5248 igb_vfta_set(hw, vid, false);
5249 }
5250 wr32(E1000_VLVF(i), reg);
5251
5252 /* do not modify RLPML for PF devices */
5253 if (vf >= adapter->vfs_allocated_count)
5254 return 0;
5255
5256 adapter->vf_data[vf].vlans_enabled--;
5257 if (!adapter->vf_data[vf].vlans_enabled) {
5258 u32 size;
5259 reg = rd32(E1000_VMOLR(vf));
5260 size = reg & E1000_VMOLR_RLPML_MASK;
5261 size -= 4;
5262 reg &= ~E1000_VMOLR_RLPML_MASK;
5263 reg |= size;
5264 wr32(E1000_VMOLR(vf), reg);
5265 }
5266 }
5267 }
5268 return 0;
5269 }
5270
5271 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5272 {
5273 struct e1000_hw *hw = &adapter->hw;
5274
5275 if (vid)
5276 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5277 else
5278 wr32(E1000_VMVIR(vf), 0);
5279 }
5280
5281 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5282 int vf, u16 vlan, u8 qos)
5283 {
5284 int err = 0;
5285 struct igb_adapter *adapter = netdev_priv(netdev);
5286
5287 if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5288 return -EINVAL;
5289 if (vlan || qos) {
5290 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5291 if (err)
5292 goto out;
5293 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5294 igb_set_vmolr(adapter, vf, !vlan);
5295 adapter->vf_data[vf].pf_vlan = vlan;
5296 adapter->vf_data[vf].pf_qos = qos;
5297 dev_info(&adapter->pdev->dev,
5298 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5299 if (test_bit(__IGB_DOWN, &adapter->state)) {
5300 dev_warn(&adapter->pdev->dev,
5301 "The VF VLAN has been set,"
5302 " but the PF device is not up.\n");
5303 dev_warn(&adapter->pdev->dev,
5304 "Bring the PF device up before"
5305 " attempting to use the VF device.\n");
5306 }
5307 } else {
5308 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5309 false, vf);
5310 igb_set_vmvir(adapter, vlan, vf);
5311 igb_set_vmolr(adapter, vf, true);
5312 adapter->vf_data[vf].pf_vlan = 0;
5313 adapter->vf_data[vf].pf_qos = 0;
5314 }
5315 out:
5316 return err;
5317 }
5318
5319 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5320 {
5321 int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5322 int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5323
5324 return igb_vlvf_set(adapter, vid, add, vf);
5325 }
5326
5327 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5328 {
5329 /* clear flags - except flag that indicates PF has set the MAC */
5330 adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5331 adapter->vf_data[vf].last_nack = jiffies;
5332
5333 /* reset offloads to defaults */
5334 igb_set_vmolr(adapter, vf, true);
5335
5336 /* reset vlans for device */
5337 igb_clear_vf_vfta(adapter, vf);
5338 if (adapter->vf_data[vf].pf_vlan)
5339 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5340 adapter->vf_data[vf].pf_vlan,
5341 adapter->vf_data[vf].pf_qos);
5342 else
5343 igb_clear_vf_vfta(adapter, vf);
5344
5345 /* reset multicast table array for vf */
5346 adapter->vf_data[vf].num_vf_mc_hashes = 0;
5347
5348 /* Flush and reset the mta with the new values */
5349 igb_set_rx_mode(adapter->netdev);
5350 }
5351
5352 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5353 {
5354 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5355
5356 /* generate a new mac address as we were hotplug removed/added */
5357 if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5358 random_ether_addr(vf_mac);
5359
5360 /* process remaining reset events */
5361 igb_vf_reset(adapter, vf);
5362 }
5363
5364 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5365 {
5366 struct e1000_hw *hw = &adapter->hw;
5367 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5368 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5369 u32 reg, msgbuf[3];
5370 u8 *addr = (u8 *)(&msgbuf[1]);
5371
5372 /* process all the same items cleared in a function level reset */
5373 igb_vf_reset(adapter, vf);
5374
5375 /* set vf mac address */
5376 igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5377
5378 /* enable transmit and receive for vf */
5379 reg = rd32(E1000_VFTE);
5380 wr32(E1000_VFTE, reg | (1 << vf));
5381 reg = rd32(E1000_VFRE);
5382 wr32(E1000_VFRE, reg | (1 << vf));
5383
5384 adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5385
5386 /* reply to reset with ack and vf mac address */
5387 msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5388 memcpy(addr, vf_mac, 6);
5389 igb_write_mbx(hw, msgbuf, 3, vf);
5390 }
5391
5392 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5393 {
5394 /*
5395 * The VF MAC Address is stored in a packed array of bytes
5396 * starting at the second 32 bit word of the msg array
5397 */
5398 unsigned char *addr = (char *)&msg[1];
5399 int err = -1;
5400
5401 if (is_valid_ether_addr(addr))
5402 err = igb_set_vf_mac(adapter, vf, addr);
5403
5404 return err;
5405 }
5406
5407 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5408 {
5409 struct e1000_hw *hw = &adapter->hw;
5410 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5411 u32 msg = E1000_VT_MSGTYPE_NACK;
5412
5413 /* if device isn't clear to send it shouldn't be reading either */
5414 if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5415 time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5416 igb_write_mbx(hw, &msg, 1, vf);
5417 vf_data->last_nack = jiffies;
5418 }
5419 }
5420
5421 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5422 {
5423 struct pci_dev *pdev = adapter->pdev;
5424 u32 msgbuf[E1000_VFMAILBOX_SIZE];
5425 struct e1000_hw *hw = &adapter->hw;
5426 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5427 s32 retval;
5428
5429 retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5430
5431 if (retval) {
5432 /* if receive failed revoke VF CTS stats and restart init */
5433 dev_err(&pdev->dev, "Error receiving message from VF\n");
5434 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5435 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5436 return;
5437 goto out;
5438 }
5439
5440 /* this is a message we already processed, do nothing */
5441 if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5442 return;
5443
5444 /*
5445 * until the vf completes a reset it should not be
5446 * allowed to start any configuration.
5447 */
5448
5449 if (msgbuf[0] == E1000_VF_RESET) {
5450 igb_vf_reset_msg(adapter, vf);
5451 return;
5452 }
5453
5454 if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5455 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5456 return;
5457 retval = -1;
5458 goto out;
5459 }
5460
5461 switch ((msgbuf[0] & 0xFFFF)) {
5462 case E1000_VF_SET_MAC_ADDR:
5463 retval = -EINVAL;
5464 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5465 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5466 else
5467 dev_warn(&pdev->dev,
5468 "VF %d attempted to override administratively "
5469 "set MAC address\nReload the VF driver to "
5470 "resume operations\n", vf);
5471 break;
5472 case E1000_VF_SET_PROMISC:
5473 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5474 break;
5475 case E1000_VF_SET_MULTICAST:
5476 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5477 break;
5478 case E1000_VF_SET_LPE:
5479 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5480 break;
5481 case E1000_VF_SET_VLAN:
5482 retval = -1;
5483 if (vf_data->pf_vlan)
5484 dev_warn(&pdev->dev,
5485 "VF %d attempted to override administratively "
5486 "set VLAN tag\nReload the VF driver to "
5487 "resume operations\n", vf);
5488 else
5489 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5490 break;
5491 default:
5492 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5493 retval = -1;
5494 break;
5495 }
5496
5497 msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5498 out:
5499 /* notify the VF of the results of what it sent us */
5500 if (retval)
5501 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5502 else
5503 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5504
5505 igb_write_mbx(hw, msgbuf, 1, vf);
5506 }
5507
5508 static void igb_msg_task(struct igb_adapter *adapter)
5509 {
5510 struct e1000_hw *hw = &adapter->hw;
5511 u32 vf;
5512
5513 for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5514 /* process any reset requests */
5515 if (!igb_check_for_rst(hw, vf))
5516 igb_vf_reset_event(adapter, vf);
5517
5518 /* process any messages pending */
5519 if (!igb_check_for_msg(hw, vf))
5520 igb_rcv_msg_from_vf(adapter, vf);
5521
5522 /* process any acks */
5523 if (!igb_check_for_ack(hw, vf))
5524 igb_rcv_ack_from_vf(adapter, vf);
5525 }
5526 }
5527
5528 /**
5529 * igb_set_uta - Set unicast filter table address
5530 * @adapter: board private structure
5531 *
5532 * The unicast table address is a register array of 32-bit registers.
5533 * The table is meant to be used in a way similar to how the MTA is used
5534 * however due to certain limitations in the hardware it is necessary to
5535 * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5536 * enable bit to allow vlan tag stripping when promiscuous mode is enabled
5537 **/
5538 static void igb_set_uta(struct igb_adapter *adapter)
5539 {
5540 struct e1000_hw *hw = &adapter->hw;
5541 int i;
5542
5543 /* The UTA table only exists on 82576 hardware and newer */
5544 if (hw->mac.type < e1000_82576)
5545 return;
5546
5547 /* we only need to do this if VMDq is enabled */
5548 if (!adapter->vfs_allocated_count)
5549 return;
5550
5551 for (i = 0; i < hw->mac.uta_reg_count; i++)
5552 array_wr32(E1000_UTA, i, ~0);
5553 }
5554
5555 /**
5556 * igb_intr_msi - Interrupt Handler
5557 * @irq: interrupt number
5558 * @data: pointer to a network interface device structure
5559 **/
5560 static irqreturn_t igb_intr_msi(int irq, void *data)
5561 {
5562 struct igb_adapter *adapter = data;
5563 struct igb_q_vector *q_vector = adapter->q_vector[0];
5564 struct e1000_hw *hw = &adapter->hw;
5565 /* read ICR disables interrupts using IAM */
5566 u32 icr = rd32(E1000_ICR);
5567
5568 igb_write_itr(q_vector);
5569
5570 if (icr & E1000_ICR_DRSTA)
5571 schedule_work(&adapter->reset_task);
5572
5573 if (icr & E1000_ICR_DOUTSYNC) {
5574 /* HW is reporting DMA is out of sync */
5575 adapter->stats.doosync++;
5576 }
5577
5578 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5579 hw->mac.get_link_status = 1;
5580 if (!test_bit(__IGB_DOWN, &adapter->state))
5581 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5582 }
5583
5584 napi_schedule(&q_vector->napi);
5585
5586 return IRQ_HANDLED;
5587 }
5588
5589 /**
5590 * igb_intr - Legacy Interrupt Handler
5591 * @irq: interrupt number
5592 * @data: pointer to a network interface device structure
5593 **/
5594 static irqreturn_t igb_intr(int irq, void *data)
5595 {
5596 struct igb_adapter *adapter = data;
5597 struct igb_q_vector *q_vector = adapter->q_vector[0];
5598 struct e1000_hw *hw = &adapter->hw;
5599 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No
5600 * need for the IMC write */
5601 u32 icr = rd32(E1000_ICR);
5602
5603 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5604 * not set, then the adapter didn't send an interrupt */
5605 if (!(icr & E1000_ICR_INT_ASSERTED))
5606 return IRQ_NONE;
5607
5608 igb_write_itr(q_vector);
5609
5610 if (icr & E1000_ICR_DRSTA)
5611 schedule_work(&adapter->reset_task);
5612
5613 if (icr & E1000_ICR_DOUTSYNC) {
5614 /* HW is reporting DMA is out of sync */
5615 adapter->stats.doosync++;
5616 }
5617
5618 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5619 hw->mac.get_link_status = 1;
5620 /* guard against interrupt when we're going down */
5621 if (!test_bit(__IGB_DOWN, &adapter->state))
5622 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5623 }
5624
5625 napi_schedule(&q_vector->napi);
5626
5627 return IRQ_HANDLED;
5628 }
5629
5630 static void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5631 {
5632 struct igb_adapter *adapter = q_vector->adapter;
5633 struct e1000_hw *hw = &adapter->hw;
5634
5635 if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
5636 (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
5637 if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
5638 igb_set_itr(q_vector);
5639 else
5640 igb_update_ring_itr(q_vector);
5641 }
5642
5643 if (!test_bit(__IGB_DOWN, &adapter->state)) {
5644 if (adapter->msix_entries)
5645 wr32(E1000_EIMS, q_vector->eims_value);
5646 else
5647 igb_irq_enable(adapter);
5648 }
5649 }
5650
5651 /**
5652 * igb_poll - NAPI Rx polling callback
5653 * @napi: napi polling structure
5654 * @budget: count of how many packets we should handle
5655 **/
5656 static int igb_poll(struct napi_struct *napi, int budget)
5657 {
5658 struct igb_q_vector *q_vector = container_of(napi,
5659 struct igb_q_vector,
5660 napi);
5661 bool clean_complete = true;
5662
5663 #ifdef CONFIG_IGB_DCA
5664 if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5665 igb_update_dca(q_vector);
5666 #endif
5667 if (q_vector->tx.ring)
5668 clean_complete = igb_clean_tx_irq(q_vector);
5669
5670 if (q_vector->rx.ring)
5671 clean_complete &= igb_clean_rx_irq(q_vector, budget);
5672
5673 /* If all work not completed, return budget and keep polling */
5674 if (!clean_complete)
5675 return budget;
5676
5677 /* If not enough Rx work done, exit the polling mode */
5678 napi_complete(napi);
5679 igb_ring_irq_enable(q_vector);
5680
5681 return 0;
5682 }
5683
5684 /**
5685 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5686 * @adapter: board private structure
5687 * @shhwtstamps: timestamp structure to update
5688 * @regval: unsigned 64bit system time value.
5689 *
5690 * We need to convert the system time value stored in the RX/TXSTMP registers
5691 * into a hwtstamp which can be used by the upper level timestamping functions
5692 */
5693 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5694 struct skb_shared_hwtstamps *shhwtstamps,
5695 u64 regval)
5696 {
5697 u64 ns;
5698
5699 /*
5700 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5701 * 24 to match clock shift we setup earlier.
5702 */
5703 if (adapter->hw.mac.type >= e1000_82580)
5704 regval <<= IGB_82580_TSYNC_SHIFT;
5705
5706 ns = timecounter_cyc2time(&adapter->clock, regval);
5707 timecompare_update(&adapter->compare, ns);
5708 memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5709 shhwtstamps->hwtstamp = ns_to_ktime(ns);
5710 shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5711 }
5712
5713 /**
5714 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5715 * @q_vector: pointer to q_vector containing needed info
5716 * @buffer: pointer to igb_tx_buffer structure
5717 *
5718 * If we were asked to do hardware stamping and such a time stamp is
5719 * available, then it must have been for this skb here because we only
5720 * allow only one such packet into the queue.
5721 */
5722 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector,
5723 struct igb_tx_buffer *buffer_info)
5724 {
5725 struct igb_adapter *adapter = q_vector->adapter;
5726 struct e1000_hw *hw = &adapter->hw;
5727 struct skb_shared_hwtstamps shhwtstamps;
5728 u64 regval;
5729
5730 /* if skb does not support hw timestamp or TX stamp not valid exit */
5731 if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) ||
5732 !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5733 return;
5734
5735 regval = rd32(E1000_TXSTMPL);
5736 regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5737
5738 igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5739 skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5740 }
5741
5742 /**
5743 * igb_clean_tx_irq - Reclaim resources after transmit completes
5744 * @q_vector: pointer to q_vector containing needed info
5745 * returns true if ring is completely cleaned
5746 **/
5747 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5748 {
5749 struct igb_adapter *adapter = q_vector->adapter;
5750 struct igb_ring *tx_ring = q_vector->tx.ring;
5751 struct igb_tx_buffer *tx_buffer;
5752 union e1000_adv_tx_desc *tx_desc, *eop_desc;
5753 unsigned int total_bytes = 0, total_packets = 0;
5754 unsigned int budget = q_vector->tx.work_limit;
5755 unsigned int i = tx_ring->next_to_clean;
5756
5757 if (test_bit(__IGB_DOWN, &adapter->state))
5758 return true;
5759
5760 tx_buffer = &tx_ring->tx_buffer_info[i];
5761 tx_desc = IGB_TX_DESC(tx_ring, i);
5762 i -= tx_ring->count;
5763
5764 for (; budget; budget--) {
5765 eop_desc = tx_buffer->next_to_watch;
5766
5767 /* prevent any other reads prior to eop_desc */
5768 rmb();
5769
5770 /* if next_to_watch is not set then there is no work pending */
5771 if (!eop_desc)
5772 break;
5773
5774 /* if DD is not set pending work has not been completed */
5775 if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
5776 break;
5777
5778 /* clear next_to_watch to prevent false hangs */
5779 tx_buffer->next_to_watch = NULL;
5780
5781 /* update the statistics for this packet */
5782 total_bytes += tx_buffer->bytecount;
5783 total_packets += tx_buffer->gso_segs;
5784
5785 /* retrieve hardware timestamp */
5786 igb_tx_hwtstamp(q_vector, tx_buffer);
5787
5788 /* free the skb */
5789 dev_kfree_skb_any(tx_buffer->skb);
5790 tx_buffer->skb = NULL;
5791
5792 /* unmap skb header data */
5793 dma_unmap_single(tx_ring->dev,
5794 tx_buffer->dma,
5795 tx_buffer->length,
5796 DMA_TO_DEVICE);
5797
5798 /* clear last DMA location and unmap remaining buffers */
5799 while (tx_desc != eop_desc) {
5800 tx_buffer->dma = 0;
5801
5802 tx_buffer++;
5803 tx_desc++;
5804 i++;
5805 if (unlikely(!i)) {
5806 i -= tx_ring->count;
5807 tx_buffer = tx_ring->tx_buffer_info;
5808 tx_desc = IGB_TX_DESC(tx_ring, 0);
5809 }
5810
5811 /* unmap any remaining paged data */
5812 if (tx_buffer->dma) {
5813 dma_unmap_page(tx_ring->dev,
5814 tx_buffer->dma,
5815 tx_buffer->length,
5816 DMA_TO_DEVICE);
5817 }
5818 }
5819
5820 /* clear last DMA location */
5821 tx_buffer->dma = 0;
5822
5823 /* move us one more past the eop_desc for start of next pkt */
5824 tx_buffer++;
5825 tx_desc++;
5826 i++;
5827 if (unlikely(!i)) {
5828 i -= tx_ring->count;
5829 tx_buffer = tx_ring->tx_buffer_info;
5830 tx_desc = IGB_TX_DESC(tx_ring, 0);
5831 }
5832 }
5833
5834 netdev_tx_completed_queue(txring_txq(tx_ring),
5835 total_packets, total_bytes);
5836 i += tx_ring->count;
5837 tx_ring->next_to_clean = i;
5838 u64_stats_update_begin(&tx_ring->tx_syncp);
5839 tx_ring->tx_stats.bytes += total_bytes;
5840 tx_ring->tx_stats.packets += total_packets;
5841 u64_stats_update_end(&tx_ring->tx_syncp);
5842 q_vector->tx.total_bytes += total_bytes;
5843 q_vector->tx.total_packets += total_packets;
5844
5845 if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
5846 struct e1000_hw *hw = &adapter->hw;
5847
5848 eop_desc = tx_buffer->next_to_watch;
5849
5850 /* Detect a transmit hang in hardware, this serializes the
5851 * check with the clearing of time_stamp and movement of i */
5852 clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
5853 if (eop_desc &&
5854 time_after(jiffies, tx_buffer->time_stamp +
5855 (adapter->tx_timeout_factor * HZ)) &&
5856 !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5857
5858 /* detected Tx unit hang */
5859 dev_err(tx_ring->dev,
5860 "Detected Tx Unit Hang\n"
5861 " Tx Queue <%d>\n"
5862 " TDH <%x>\n"
5863 " TDT <%x>\n"
5864 " next_to_use <%x>\n"
5865 " next_to_clean <%x>\n"
5866 "buffer_info[next_to_clean]\n"
5867 " time_stamp <%lx>\n"
5868 " next_to_watch <%p>\n"
5869 " jiffies <%lx>\n"
5870 " desc.status <%x>\n",
5871 tx_ring->queue_index,
5872 rd32(E1000_TDH(tx_ring->reg_idx)),
5873 readl(tx_ring->tail),
5874 tx_ring->next_to_use,
5875 tx_ring->next_to_clean,
5876 tx_buffer->time_stamp,
5877 eop_desc,
5878 jiffies,
5879 eop_desc->wb.status);
5880 netif_stop_subqueue(tx_ring->netdev,
5881 tx_ring->queue_index);
5882
5883 /* we are about to reset, no point in enabling stuff */
5884 return true;
5885 }
5886 }
5887
5888 if (unlikely(total_packets &&
5889 netif_carrier_ok(tx_ring->netdev) &&
5890 igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5891 /* Make sure that anybody stopping the queue after this
5892 * sees the new next_to_clean.
5893 */
5894 smp_mb();
5895 if (__netif_subqueue_stopped(tx_ring->netdev,
5896 tx_ring->queue_index) &&
5897 !(test_bit(__IGB_DOWN, &adapter->state))) {
5898 netif_wake_subqueue(tx_ring->netdev,
5899 tx_ring->queue_index);
5900
5901 u64_stats_update_begin(&tx_ring->tx_syncp);
5902 tx_ring->tx_stats.restart_queue++;
5903 u64_stats_update_end(&tx_ring->tx_syncp);
5904 }
5905 }
5906
5907 return !!budget;
5908 }
5909
5910 static inline void igb_rx_checksum(struct igb_ring *ring,
5911 union e1000_adv_rx_desc *rx_desc,
5912 struct sk_buff *skb)
5913 {
5914 skb_checksum_none_assert(skb);
5915
5916 /* Ignore Checksum bit is set */
5917 if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM))
5918 return;
5919
5920 /* Rx checksum disabled via ethtool */
5921 if (!(ring->netdev->features & NETIF_F_RXCSUM))
5922 return;
5923
5924 /* TCP/UDP checksum error bit is set */
5925 if (igb_test_staterr(rx_desc,
5926 E1000_RXDEXT_STATERR_TCPE |
5927 E1000_RXDEXT_STATERR_IPE)) {
5928 /*
5929 * work around errata with sctp packets where the TCPE aka
5930 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5931 * packets, (aka let the stack check the crc32c)
5932 */
5933 if (!((skb->len == 60) &&
5934 test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
5935 u64_stats_update_begin(&ring->rx_syncp);
5936 ring->rx_stats.csum_err++;
5937 u64_stats_update_end(&ring->rx_syncp);
5938 }
5939 /* let the stack verify checksum errors */
5940 return;
5941 }
5942 /* It must be a TCP or UDP packet with a valid checksum */
5943 if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS |
5944 E1000_RXD_STAT_UDPCS))
5945 skb->ip_summed = CHECKSUM_UNNECESSARY;
5946
5947 dev_dbg(ring->dev, "cksum success: bits %08X\n",
5948 le32_to_cpu(rx_desc->wb.upper.status_error));
5949 }
5950
5951 static inline void igb_rx_hash(struct igb_ring *ring,
5952 union e1000_adv_rx_desc *rx_desc,
5953 struct sk_buff *skb)
5954 {
5955 if (ring->netdev->features & NETIF_F_RXHASH)
5956 skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
5957 }
5958
5959 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector,
5960 union e1000_adv_rx_desc *rx_desc,
5961 struct sk_buff *skb)
5962 {
5963 struct igb_adapter *adapter = q_vector->adapter;
5964 struct e1000_hw *hw = &adapter->hw;
5965 u64 regval;
5966
5967 if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP |
5968 E1000_RXDADV_STAT_TS))
5969 return;
5970
5971 /*
5972 * If this bit is set, then the RX registers contain the time stamp. No
5973 * other packet will be time stamped until we read these registers, so
5974 * read the registers to make them available again. Because only one
5975 * packet can be time stamped at a time, we know that the register
5976 * values must belong to this one here and therefore we don't need to
5977 * compare any of the additional attributes stored for it.
5978 *
5979 * If nothing went wrong, then it should have a shared tx_flags that we
5980 * can turn into a skb_shared_hwtstamps.
5981 */
5982 if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
5983 u32 *stamp = (u32 *)skb->data;
5984 regval = le32_to_cpu(*(stamp + 2));
5985 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5986 skb_pull(skb, IGB_TS_HDR_LEN);
5987 } else {
5988 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5989 return;
5990
5991 regval = rd32(E1000_RXSTMPL);
5992 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5993 }
5994
5995 igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5996 }
5997
5998 static void igb_rx_vlan(struct igb_ring *ring,
5999 union e1000_adv_rx_desc *rx_desc,
6000 struct sk_buff *skb)
6001 {
6002 if (igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
6003 u16 vid;
6004 if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) &&
6005 test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags))
6006 vid = be16_to_cpu(rx_desc->wb.upper.vlan);
6007 else
6008 vid = le16_to_cpu(rx_desc->wb.upper.vlan);
6009
6010 __vlan_hwaccel_put_tag(skb, vid);
6011 }
6012 }
6013
6014 static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
6015 {
6016 /* HW will not DMA in data larger than the given buffer, even if it
6017 * parses the (NFS, of course) header to be larger. In that case, it
6018 * fills the header buffer and spills the rest into the page.
6019 */
6020 u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
6021 E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
6022 if (hlen > IGB_RX_HDR_LEN)
6023 hlen = IGB_RX_HDR_LEN;
6024 return hlen;
6025 }
6026
6027 static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
6028 {
6029 struct igb_ring *rx_ring = q_vector->rx.ring;
6030 union e1000_adv_rx_desc *rx_desc;
6031 const int current_node = numa_node_id();
6032 unsigned int total_bytes = 0, total_packets = 0;
6033 u16 cleaned_count = igb_desc_unused(rx_ring);
6034 u16 i = rx_ring->next_to_clean;
6035
6036 rx_desc = IGB_RX_DESC(rx_ring, i);
6037
6038 while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) {
6039 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
6040 struct sk_buff *skb = buffer_info->skb;
6041 union e1000_adv_rx_desc *next_rxd;
6042
6043 buffer_info->skb = NULL;
6044 prefetch(skb->data);
6045
6046 i++;
6047 if (i == rx_ring->count)
6048 i = 0;
6049
6050 next_rxd = IGB_RX_DESC(rx_ring, i);
6051 prefetch(next_rxd);
6052
6053 /*
6054 * This memory barrier is needed to keep us from reading
6055 * any other fields out of the rx_desc until we know the
6056 * RXD_STAT_DD bit is set
6057 */
6058 rmb();
6059
6060 if (!skb_is_nonlinear(skb)) {
6061 __skb_put(skb, igb_get_hlen(rx_desc));
6062 dma_unmap_single(rx_ring->dev, buffer_info->dma,
6063 IGB_RX_HDR_LEN,
6064 DMA_FROM_DEVICE);
6065 buffer_info->dma = 0;
6066 }
6067
6068 if (rx_desc->wb.upper.length) {
6069 u16 length = le16_to_cpu(rx_desc->wb.upper.length);
6070
6071 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
6072 buffer_info->page,
6073 buffer_info->page_offset,
6074 length);
6075
6076 skb->len += length;
6077 skb->data_len += length;
6078 skb->truesize += PAGE_SIZE / 2;
6079
6080 if ((page_count(buffer_info->page) != 1) ||
6081 (page_to_nid(buffer_info->page) != current_node))
6082 buffer_info->page = NULL;
6083 else
6084 get_page(buffer_info->page);
6085
6086 dma_unmap_page(rx_ring->dev, buffer_info->page_dma,
6087 PAGE_SIZE / 2, DMA_FROM_DEVICE);
6088 buffer_info->page_dma = 0;
6089 }
6090
6091 if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)) {
6092 struct igb_rx_buffer *next_buffer;
6093 next_buffer = &rx_ring->rx_buffer_info[i];
6094 buffer_info->skb = next_buffer->skb;
6095 buffer_info->dma = next_buffer->dma;
6096 next_buffer->skb = skb;
6097 next_buffer->dma = 0;
6098 goto next_desc;
6099 }
6100
6101 if (igb_test_staterr(rx_desc,
6102 E1000_RXDEXT_ERR_FRAME_ERR_MASK)) {
6103 dev_kfree_skb_any(skb);
6104 goto next_desc;
6105 }
6106
6107 igb_rx_hwtstamp(q_vector, rx_desc, skb);
6108 igb_rx_hash(rx_ring, rx_desc, skb);
6109 igb_rx_checksum(rx_ring, rx_desc, skb);
6110 igb_rx_vlan(rx_ring, rx_desc, skb);
6111
6112 total_bytes += skb->len;
6113 total_packets++;
6114
6115 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
6116
6117 napi_gro_receive(&q_vector->napi, skb);
6118
6119 budget--;
6120 next_desc:
6121 if (!budget)
6122 break;
6123
6124 cleaned_count++;
6125 /* return some buffers to hardware, one at a time is too slow */
6126 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
6127 igb_alloc_rx_buffers(rx_ring, cleaned_count);
6128 cleaned_count = 0;
6129 }
6130
6131 /* use prefetched values */
6132 rx_desc = next_rxd;
6133 }
6134
6135 rx_ring->next_to_clean = i;
6136 u64_stats_update_begin(&rx_ring->rx_syncp);
6137 rx_ring->rx_stats.packets += total_packets;
6138 rx_ring->rx_stats.bytes += total_bytes;
6139 u64_stats_update_end(&rx_ring->rx_syncp);
6140 q_vector->rx.total_packets += total_packets;
6141 q_vector->rx.total_bytes += total_bytes;
6142
6143 if (cleaned_count)
6144 igb_alloc_rx_buffers(rx_ring, cleaned_count);
6145
6146 return !!budget;
6147 }
6148
6149 static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
6150 struct igb_rx_buffer *bi)
6151 {
6152 struct sk_buff *skb = bi->skb;
6153 dma_addr_t dma = bi->dma;
6154
6155 if (dma)
6156 return true;
6157
6158 if (likely(!skb)) {
6159 skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
6160 IGB_RX_HDR_LEN);
6161 bi->skb = skb;
6162 if (!skb) {
6163 rx_ring->rx_stats.alloc_failed++;
6164 return false;
6165 }
6166
6167 /* initialize skb for ring */
6168 skb_record_rx_queue(skb, rx_ring->queue_index);
6169 }
6170
6171 dma = dma_map_single(rx_ring->dev, skb->data,
6172 IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
6173
6174 if (dma_mapping_error(rx_ring->dev, dma)) {
6175 rx_ring->rx_stats.alloc_failed++;
6176 return false;
6177 }
6178
6179 bi->dma = dma;
6180 return true;
6181 }
6182
6183 static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
6184 struct igb_rx_buffer *bi)
6185 {
6186 struct page *page = bi->page;
6187 dma_addr_t page_dma = bi->page_dma;
6188 unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
6189
6190 if (page_dma)
6191 return true;
6192
6193 if (!page) {
6194 page = alloc_page(GFP_ATOMIC | __GFP_COLD);
6195 bi->page = page;
6196 if (unlikely(!page)) {
6197 rx_ring->rx_stats.alloc_failed++;
6198 return false;
6199 }
6200 }
6201
6202 page_dma = dma_map_page(rx_ring->dev, page,
6203 page_offset, PAGE_SIZE / 2,
6204 DMA_FROM_DEVICE);
6205
6206 if (dma_mapping_error(rx_ring->dev, page_dma)) {
6207 rx_ring->rx_stats.alloc_failed++;
6208 return false;
6209 }
6210
6211 bi->page_dma = page_dma;
6212 bi->page_offset = page_offset;
6213 return true;
6214 }
6215
6216 /**
6217 * igb_alloc_rx_buffers - Replace used receive buffers; packet split
6218 * @adapter: address of board private structure
6219 **/
6220 void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
6221 {
6222 union e1000_adv_rx_desc *rx_desc;
6223 struct igb_rx_buffer *bi;
6224 u16 i = rx_ring->next_to_use;
6225
6226 rx_desc = IGB_RX_DESC(rx_ring, i);
6227 bi = &rx_ring->rx_buffer_info[i];
6228 i -= rx_ring->count;
6229
6230 while (cleaned_count--) {
6231 if (!igb_alloc_mapped_skb(rx_ring, bi))
6232 break;
6233
6234 /* Refresh the desc even if buffer_addrs didn't change
6235 * because each write-back erases this info. */
6236 rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
6237
6238 if (!igb_alloc_mapped_page(rx_ring, bi))
6239 break;
6240
6241 rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
6242
6243 rx_desc++;
6244 bi++;
6245 i++;
6246 if (unlikely(!i)) {
6247 rx_desc = IGB_RX_DESC(rx_ring, 0);
6248 bi = rx_ring->rx_buffer_info;
6249 i -= rx_ring->count;
6250 }
6251
6252 /* clear the hdr_addr for the next_to_use descriptor */
6253 rx_desc->read.hdr_addr = 0;
6254 }
6255
6256 i += rx_ring->count;
6257
6258 if (rx_ring->next_to_use != i) {
6259 rx_ring->next_to_use = i;
6260
6261 /* Force memory writes to complete before letting h/w
6262 * know there are new descriptors to fetch. (Only
6263 * applicable for weak-ordered memory model archs,
6264 * such as IA-64). */
6265 wmb();
6266 writel(i, rx_ring->tail);
6267 }
6268 }
6269
6270 /**
6271 * igb_mii_ioctl -
6272 * @netdev:
6273 * @ifreq:
6274 * @cmd:
6275 **/
6276 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6277 {
6278 struct igb_adapter *adapter = netdev_priv(netdev);
6279 struct mii_ioctl_data *data = if_mii(ifr);
6280
6281 if (adapter->hw.phy.media_type != e1000_media_type_copper)
6282 return -EOPNOTSUPP;
6283
6284 switch (cmd) {
6285 case SIOCGMIIPHY:
6286 data->phy_id = adapter->hw.phy.addr;
6287 break;
6288 case SIOCGMIIREG:
6289 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6290 &data->val_out))
6291 return -EIO;
6292 break;
6293 case SIOCSMIIREG:
6294 default:
6295 return -EOPNOTSUPP;
6296 }
6297 return 0;
6298 }
6299
6300 /**
6301 * igb_hwtstamp_ioctl - control hardware time stamping
6302 * @netdev:
6303 * @ifreq:
6304 * @cmd:
6305 *
6306 * Outgoing time stamping can be enabled and disabled. Play nice and
6307 * disable it when requested, although it shouldn't case any overhead
6308 * when no packet needs it. At most one packet in the queue may be
6309 * marked for time stamping, otherwise it would be impossible to tell
6310 * for sure to which packet the hardware time stamp belongs.
6311 *
6312 * Incoming time stamping has to be configured via the hardware
6313 * filters. Not all combinations are supported, in particular event
6314 * type has to be specified. Matching the kind of event packet is
6315 * not supported, with the exception of "all V2 events regardless of
6316 * level 2 or 4".
6317 *
6318 **/
6319 static int igb_hwtstamp_ioctl(struct net_device *netdev,
6320 struct ifreq *ifr, int cmd)
6321 {
6322 struct igb_adapter *adapter = netdev_priv(netdev);
6323 struct e1000_hw *hw = &adapter->hw;
6324 struct hwtstamp_config config;
6325 u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6326 u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6327 u32 tsync_rx_cfg = 0;
6328 bool is_l4 = false;
6329 bool is_l2 = false;
6330 u32 regval;
6331
6332 if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6333 return -EFAULT;
6334
6335 /* reserved for future extensions */
6336 if (config.flags)
6337 return -EINVAL;
6338
6339 switch (config.tx_type) {
6340 case HWTSTAMP_TX_OFF:
6341 tsync_tx_ctl = 0;
6342 case HWTSTAMP_TX_ON:
6343 break;
6344 default:
6345 return -ERANGE;
6346 }
6347
6348 switch (config.rx_filter) {
6349 case HWTSTAMP_FILTER_NONE:
6350 tsync_rx_ctl = 0;
6351 break;
6352 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6353 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6354 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6355 case HWTSTAMP_FILTER_ALL:
6356 /*
6357 * register TSYNCRXCFG must be set, therefore it is not
6358 * possible to time stamp both Sync and Delay_Req messages
6359 * => fall back to time stamping all packets
6360 */
6361 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6362 config.rx_filter = HWTSTAMP_FILTER_ALL;
6363 break;
6364 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6365 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6366 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6367 is_l4 = true;
6368 break;
6369 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6370 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6371 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6372 is_l4 = true;
6373 break;
6374 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6375 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6376 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6377 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6378 is_l2 = true;
6379 is_l4 = true;
6380 config.rx_filter = HWTSTAMP_FILTER_SOME;
6381 break;
6382 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6383 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6384 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6385 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6386 is_l2 = true;
6387 is_l4 = true;
6388 config.rx_filter = HWTSTAMP_FILTER_SOME;
6389 break;
6390 case HWTSTAMP_FILTER_PTP_V2_EVENT:
6391 case HWTSTAMP_FILTER_PTP_V2_SYNC:
6392 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6393 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6394 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6395 is_l2 = true;
6396 is_l4 = true;
6397 break;
6398 default:
6399 return -ERANGE;
6400 }
6401
6402 if (hw->mac.type == e1000_82575) {
6403 if (tsync_rx_ctl | tsync_tx_ctl)
6404 return -EINVAL;
6405 return 0;
6406 }
6407
6408 /*
6409 * Per-packet timestamping only works if all packets are
6410 * timestamped, so enable timestamping in all packets as
6411 * long as one rx filter was configured.
6412 */
6413 if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) {
6414 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6415 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6416 }
6417
6418 /* enable/disable TX */
6419 regval = rd32(E1000_TSYNCTXCTL);
6420 regval &= ~E1000_TSYNCTXCTL_ENABLED;
6421 regval |= tsync_tx_ctl;
6422 wr32(E1000_TSYNCTXCTL, regval);
6423
6424 /* enable/disable RX */
6425 regval = rd32(E1000_TSYNCRXCTL);
6426 regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6427 regval |= tsync_rx_ctl;
6428 wr32(E1000_TSYNCRXCTL, regval);
6429
6430 /* define which PTP packets are time stamped */
6431 wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6432
6433 /* define ethertype filter for timestamped packets */
6434 if (is_l2)
6435 wr32(E1000_ETQF(3),
6436 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6437 E1000_ETQF_1588 | /* enable timestamping */
6438 ETH_P_1588)); /* 1588 eth protocol type */
6439 else
6440 wr32(E1000_ETQF(3), 0);
6441
6442 #define PTP_PORT 319
6443 /* L4 Queue Filter[3]: filter by destination port and protocol */
6444 if (is_l4) {
6445 u32 ftqf = (IPPROTO_UDP /* UDP */
6446 | E1000_FTQF_VF_BP /* VF not compared */
6447 | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6448 | E1000_FTQF_MASK); /* mask all inputs */
6449 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6450
6451 wr32(E1000_IMIR(3), htons(PTP_PORT));
6452 wr32(E1000_IMIREXT(3),
6453 (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6454 if (hw->mac.type == e1000_82576) {
6455 /* enable source port check */
6456 wr32(E1000_SPQF(3), htons(PTP_PORT));
6457 ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6458 }
6459 wr32(E1000_FTQF(3), ftqf);
6460 } else {
6461 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6462 }
6463 wrfl();
6464
6465 adapter->hwtstamp_config = config;
6466
6467 /* clear TX/RX time stamp registers, just to be sure */
6468 regval = rd32(E1000_TXSTMPH);
6469 regval = rd32(E1000_RXSTMPH);
6470
6471 return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6472 -EFAULT : 0;
6473 }
6474
6475 /**
6476 * igb_ioctl -
6477 * @netdev:
6478 * @ifreq:
6479 * @cmd:
6480 **/
6481 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6482 {
6483 switch (cmd) {
6484 case SIOCGMIIPHY:
6485 case SIOCGMIIREG:
6486 case SIOCSMIIREG:
6487 return igb_mii_ioctl(netdev, ifr, cmd);
6488 case SIOCSHWTSTAMP:
6489 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6490 default:
6491 return -EOPNOTSUPP;
6492 }
6493 }
6494
6495 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6496 {
6497 struct igb_adapter *adapter = hw->back;
6498 u16 cap_offset;
6499
6500 cap_offset = adapter->pdev->pcie_cap;
6501 if (!cap_offset)
6502 return -E1000_ERR_CONFIG;
6503
6504 pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6505
6506 return 0;
6507 }
6508
6509 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6510 {
6511 struct igb_adapter *adapter = hw->back;
6512 u16 cap_offset;
6513
6514 cap_offset = adapter->pdev->pcie_cap;
6515 if (!cap_offset)
6516 return -E1000_ERR_CONFIG;
6517
6518 pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6519
6520 return 0;
6521 }
6522
6523 static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features)
6524 {
6525 struct igb_adapter *adapter = netdev_priv(netdev);
6526 struct e1000_hw *hw = &adapter->hw;
6527 u32 ctrl, rctl;
6528 bool enable = !!(features & NETIF_F_HW_VLAN_RX);
6529
6530 if (enable) {
6531 /* enable VLAN tag insert/strip */
6532 ctrl = rd32(E1000_CTRL);
6533 ctrl |= E1000_CTRL_VME;
6534 wr32(E1000_CTRL, ctrl);
6535
6536 /* Disable CFI check */
6537 rctl = rd32(E1000_RCTL);
6538 rctl &= ~E1000_RCTL_CFIEN;
6539 wr32(E1000_RCTL, rctl);
6540 } else {
6541 /* disable VLAN tag insert/strip */
6542 ctrl = rd32(E1000_CTRL);
6543 ctrl &= ~E1000_CTRL_VME;
6544 wr32(E1000_CTRL, ctrl);
6545 }
6546
6547 igb_rlpml_set(adapter);
6548 }
6549
6550 static int igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6551 {
6552 struct igb_adapter *adapter = netdev_priv(netdev);
6553 struct e1000_hw *hw = &adapter->hw;
6554 int pf_id = adapter->vfs_allocated_count;
6555
6556 /* attempt to add filter to vlvf array */
6557 igb_vlvf_set(adapter, vid, true, pf_id);
6558
6559 /* add the filter since PF can receive vlans w/o entry in vlvf */
6560 igb_vfta_set(hw, vid, true);
6561
6562 set_bit(vid, adapter->active_vlans);
6563
6564 return 0;
6565 }
6566
6567 static int igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6568 {
6569 struct igb_adapter *adapter = netdev_priv(netdev);
6570 struct e1000_hw *hw = &adapter->hw;
6571 int pf_id = adapter->vfs_allocated_count;
6572 s32 err;
6573
6574 /* remove vlan from VLVF table array */
6575 err = igb_vlvf_set(adapter, vid, false, pf_id);
6576
6577 /* if vid was not present in VLVF just remove it from table */
6578 if (err)
6579 igb_vfta_set(hw, vid, false);
6580
6581 clear_bit(vid, adapter->active_vlans);
6582
6583 return 0;
6584 }
6585
6586 static void igb_restore_vlan(struct igb_adapter *adapter)
6587 {
6588 u16 vid;
6589
6590 igb_vlan_mode(adapter->netdev, adapter->netdev->features);
6591
6592 for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6593 igb_vlan_rx_add_vid(adapter->netdev, vid);
6594 }
6595
6596 int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6597 {
6598 struct pci_dev *pdev = adapter->pdev;
6599 struct e1000_mac_info *mac = &adapter->hw.mac;
6600
6601 mac->autoneg = 0;
6602
6603 /* Make sure dplx is at most 1 bit and lsb of speed is not set
6604 * for the switch() below to work */
6605 if ((spd & 1) || (dplx & ~1))
6606 goto err_inval;
6607
6608 /* Fiber NIC's only allow 1000 Gbps Full duplex */
6609 if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6610 spd != SPEED_1000 &&
6611 dplx != DUPLEX_FULL)
6612 goto err_inval;
6613
6614 switch (spd + dplx) {
6615 case SPEED_10 + DUPLEX_HALF:
6616 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6617 break;
6618 case SPEED_10 + DUPLEX_FULL:
6619 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6620 break;
6621 case SPEED_100 + DUPLEX_HALF:
6622 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6623 break;
6624 case SPEED_100 + DUPLEX_FULL:
6625 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6626 break;
6627 case SPEED_1000 + DUPLEX_FULL:
6628 mac->autoneg = 1;
6629 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6630 break;
6631 case SPEED_1000 + DUPLEX_HALF: /* not supported */
6632 default:
6633 goto err_inval;
6634 }
6635 return 0;
6636
6637 err_inval:
6638 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6639 return -EINVAL;
6640 }
6641
6642 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake,
6643 bool runtime)
6644 {
6645 struct net_device *netdev = pci_get_drvdata(pdev);
6646 struct igb_adapter *adapter = netdev_priv(netdev);
6647 struct e1000_hw *hw = &adapter->hw;
6648 u32 ctrl, rctl, status;
6649 u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol;
6650 #ifdef CONFIG_PM
6651 int retval = 0;
6652 #endif
6653
6654 netif_device_detach(netdev);
6655
6656 if (netif_running(netdev))
6657 __igb_close(netdev, true);
6658
6659 igb_clear_interrupt_scheme(adapter);
6660
6661 #ifdef CONFIG_PM
6662 retval = pci_save_state(pdev);
6663 if (retval)
6664 return retval;
6665 #endif
6666
6667 status = rd32(E1000_STATUS);
6668 if (status & E1000_STATUS_LU)
6669 wufc &= ~E1000_WUFC_LNKC;
6670
6671 if (wufc) {
6672 igb_setup_rctl(adapter);
6673 igb_set_rx_mode(netdev);
6674
6675 /* turn on all-multi mode if wake on multicast is enabled */
6676 if (wufc & E1000_WUFC_MC) {
6677 rctl = rd32(E1000_RCTL);
6678 rctl |= E1000_RCTL_MPE;
6679 wr32(E1000_RCTL, rctl);
6680 }
6681
6682 ctrl = rd32(E1000_CTRL);
6683 /* advertise wake from D3Cold */
6684 #define E1000_CTRL_ADVD3WUC 0x00100000
6685 /* phy power management enable */
6686 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6687 ctrl |= E1000_CTRL_ADVD3WUC;
6688 wr32(E1000_CTRL, ctrl);
6689
6690 /* Allow time for pending master requests to run */
6691 igb_disable_pcie_master(hw);
6692
6693 wr32(E1000_WUC, E1000_WUC_PME_EN);
6694 wr32(E1000_WUFC, wufc);
6695 } else {
6696 wr32(E1000_WUC, 0);
6697 wr32(E1000_WUFC, 0);
6698 }
6699
6700 *enable_wake = wufc || adapter->en_mng_pt;
6701 if (!*enable_wake)
6702 igb_power_down_link(adapter);
6703 else
6704 igb_power_up_link(adapter);
6705
6706 /* Release control of h/w to f/w. If f/w is AMT enabled, this
6707 * would have already happened in close and is redundant. */
6708 igb_release_hw_control(adapter);
6709
6710 pci_disable_device(pdev);
6711
6712 return 0;
6713 }
6714
6715 #ifdef CONFIG_PM
6716 #ifdef CONFIG_PM_SLEEP
6717 static int igb_suspend(struct device *dev)
6718 {
6719 int retval;
6720 bool wake;
6721 struct pci_dev *pdev = to_pci_dev(dev);
6722
6723 retval = __igb_shutdown(pdev, &wake, 0);
6724 if (retval)
6725 return retval;
6726
6727 if (wake) {
6728 pci_prepare_to_sleep(pdev);
6729 } else {
6730 pci_wake_from_d3(pdev, false);
6731 pci_set_power_state(pdev, PCI_D3hot);
6732 }
6733
6734 return 0;
6735 }
6736 #endif /* CONFIG_PM_SLEEP */
6737
6738 static int igb_resume(struct device *dev)
6739 {
6740 struct pci_dev *pdev = to_pci_dev(dev);
6741 struct net_device *netdev = pci_get_drvdata(pdev);
6742 struct igb_adapter *adapter = netdev_priv(netdev);
6743 struct e1000_hw *hw = &adapter->hw;
6744 u32 err;
6745
6746 pci_set_power_state(pdev, PCI_D0);
6747 pci_restore_state(pdev);
6748 pci_save_state(pdev);
6749
6750 err = pci_enable_device_mem(pdev);
6751 if (err) {
6752 dev_err(&pdev->dev,
6753 "igb: Cannot enable PCI device from suspend\n");
6754 return err;
6755 }
6756 pci_set_master(pdev);
6757
6758 pci_enable_wake(pdev, PCI_D3hot, 0);
6759 pci_enable_wake(pdev, PCI_D3cold, 0);
6760
6761 if (!rtnl_is_locked()) {
6762 /*
6763 * shut up ASSERT_RTNL() warning in
6764 * netif_set_real_num_tx/rx_queues.
6765 */
6766 rtnl_lock();
6767 err = igb_init_interrupt_scheme(adapter);
6768 rtnl_unlock();
6769 } else {
6770 err = igb_init_interrupt_scheme(adapter);
6771 }
6772 if (err) {
6773 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6774 return -ENOMEM;
6775 }
6776
6777 igb_reset(adapter);
6778
6779 /* let the f/w know that the h/w is now under the control of the
6780 * driver. */
6781 igb_get_hw_control(adapter);
6782
6783 wr32(E1000_WUS, ~0);
6784
6785 if (netdev->flags & IFF_UP) {
6786 err = __igb_open(netdev, true);
6787 if (err)
6788 return err;
6789 }
6790
6791 netif_device_attach(netdev);
6792 return 0;
6793 }
6794
6795 #ifdef CONFIG_PM_RUNTIME
6796 static int igb_runtime_idle(struct device *dev)
6797 {
6798 struct pci_dev *pdev = to_pci_dev(dev);
6799 struct net_device *netdev = pci_get_drvdata(pdev);
6800 struct igb_adapter *adapter = netdev_priv(netdev);
6801
6802 if (!igb_has_link(adapter))
6803 pm_schedule_suspend(dev, MSEC_PER_SEC * 5);
6804
6805 return -EBUSY;
6806 }
6807
6808 static int igb_runtime_suspend(struct device *dev)
6809 {
6810 struct pci_dev *pdev = to_pci_dev(dev);
6811 int retval;
6812 bool wake;
6813
6814 retval = __igb_shutdown(pdev, &wake, 1);
6815 if (retval)
6816 return retval;
6817
6818 if (wake) {
6819 pci_prepare_to_sleep(pdev);
6820 } else {
6821 pci_wake_from_d3(pdev, false);
6822 pci_set_power_state(pdev, PCI_D3hot);
6823 }
6824
6825 return 0;
6826 }
6827
6828 static int igb_runtime_resume(struct device *dev)
6829 {
6830 return igb_resume(dev);
6831 }
6832 #endif /* CONFIG_PM_RUNTIME */
6833 #endif
6834
6835 static void igb_shutdown(struct pci_dev *pdev)
6836 {
6837 bool wake;
6838
6839 __igb_shutdown(pdev, &wake, 0);
6840
6841 if (system_state == SYSTEM_POWER_OFF) {
6842 pci_wake_from_d3(pdev, wake);
6843 pci_set_power_state(pdev, PCI_D3hot);
6844 }
6845 }
6846
6847 #ifdef CONFIG_NET_POLL_CONTROLLER
6848 /*
6849 * Polling 'interrupt' - used by things like netconsole to send skbs
6850 * without having to re-enable interrupts. It's not called while
6851 * the interrupt routine is executing.
6852 */
6853 static void igb_netpoll(struct net_device *netdev)
6854 {
6855 struct igb_adapter *adapter = netdev_priv(netdev);
6856 struct e1000_hw *hw = &adapter->hw;
6857 struct igb_q_vector *q_vector;
6858 int i;
6859
6860 for (i = 0; i < adapter->num_q_vectors; i++) {
6861 q_vector = adapter->q_vector[i];
6862 if (adapter->msix_entries)
6863 wr32(E1000_EIMC, q_vector->eims_value);
6864 else
6865 igb_irq_disable(adapter);
6866 napi_schedule(&q_vector->napi);
6867 }
6868 }
6869 #endif /* CONFIG_NET_POLL_CONTROLLER */
6870
6871 /**
6872 * igb_io_error_detected - called when PCI error is detected
6873 * @pdev: Pointer to PCI device
6874 * @state: The current pci connection state
6875 *
6876 * This function is called after a PCI bus error affecting
6877 * this device has been detected.
6878 */
6879 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6880 pci_channel_state_t state)
6881 {
6882 struct net_device *netdev = pci_get_drvdata(pdev);
6883 struct igb_adapter *adapter = netdev_priv(netdev);
6884
6885 netif_device_detach(netdev);
6886
6887 if (state == pci_channel_io_perm_failure)
6888 return PCI_ERS_RESULT_DISCONNECT;
6889
6890 if (netif_running(netdev))
6891 igb_down(adapter);
6892 pci_disable_device(pdev);
6893
6894 /* Request a slot slot reset. */
6895 return PCI_ERS_RESULT_NEED_RESET;
6896 }
6897
6898 /**
6899 * igb_io_slot_reset - called after the pci bus has been reset.
6900 * @pdev: Pointer to PCI device
6901 *
6902 * Restart the card from scratch, as if from a cold-boot. Implementation
6903 * resembles the first-half of the igb_resume routine.
6904 */
6905 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6906 {
6907 struct net_device *netdev = pci_get_drvdata(pdev);
6908 struct igb_adapter *adapter = netdev_priv(netdev);
6909 struct e1000_hw *hw = &adapter->hw;
6910 pci_ers_result_t result;
6911 int err;
6912
6913 if (pci_enable_device_mem(pdev)) {
6914 dev_err(&pdev->dev,
6915 "Cannot re-enable PCI device after reset.\n");
6916 result = PCI_ERS_RESULT_DISCONNECT;
6917 } else {
6918 pci_set_master(pdev);
6919 pci_restore_state(pdev);
6920 pci_save_state(pdev);
6921
6922 pci_enable_wake(pdev, PCI_D3hot, 0);
6923 pci_enable_wake(pdev, PCI_D3cold, 0);
6924
6925 igb_reset(adapter);
6926 wr32(E1000_WUS, ~0);
6927 result = PCI_ERS_RESULT_RECOVERED;
6928 }
6929
6930 err = pci_cleanup_aer_uncorrect_error_status(pdev);
6931 if (err) {
6932 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6933 "failed 0x%0x\n", err);
6934 /* non-fatal, continue */
6935 }
6936
6937 return result;
6938 }
6939
6940 /**
6941 * igb_io_resume - called when traffic can start flowing again.
6942 * @pdev: Pointer to PCI device
6943 *
6944 * This callback is called when the error recovery driver tells us that
6945 * its OK to resume normal operation. Implementation resembles the
6946 * second-half of the igb_resume routine.
6947 */
6948 static void igb_io_resume(struct pci_dev *pdev)
6949 {
6950 struct net_device *netdev = pci_get_drvdata(pdev);
6951 struct igb_adapter *adapter = netdev_priv(netdev);
6952
6953 if (netif_running(netdev)) {
6954 if (igb_up(adapter)) {
6955 dev_err(&pdev->dev, "igb_up failed after reset\n");
6956 return;
6957 }
6958 }
6959
6960 netif_device_attach(netdev);
6961
6962 /* let the f/w know that the h/w is now under the control of the
6963 * driver. */
6964 igb_get_hw_control(adapter);
6965 }
6966
6967 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6968 u8 qsel)
6969 {
6970 u32 rar_low, rar_high;
6971 struct e1000_hw *hw = &adapter->hw;
6972
6973 /* HW expects these in little endian so we reverse the byte order
6974 * from network order (big endian) to little endian
6975 */
6976 rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6977 ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6978 rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6979
6980 /* Indicate to hardware the Address is Valid. */
6981 rar_high |= E1000_RAH_AV;
6982
6983 if (hw->mac.type == e1000_82575)
6984 rar_high |= E1000_RAH_POOL_1 * qsel;
6985 else
6986 rar_high |= E1000_RAH_POOL_1 << qsel;
6987
6988 wr32(E1000_RAL(index), rar_low);
6989 wrfl();
6990 wr32(E1000_RAH(index), rar_high);
6991 wrfl();
6992 }
6993
6994 static int igb_set_vf_mac(struct igb_adapter *adapter,
6995 int vf, unsigned char *mac_addr)
6996 {
6997 struct e1000_hw *hw = &adapter->hw;
6998 /* VF MAC addresses start at end of receive addresses and moves
6999 * torwards the first, as a result a collision should not be possible */
7000 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
7001
7002 memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
7003
7004 igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
7005
7006 return 0;
7007 }
7008
7009 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
7010 {
7011 struct igb_adapter *adapter = netdev_priv(netdev);
7012 if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
7013 return -EINVAL;
7014 adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
7015 dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
7016 dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
7017 " change effective.");
7018 if (test_bit(__IGB_DOWN, &adapter->state)) {
7019 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
7020 " but the PF device is not up.\n");
7021 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
7022 " attempting to use the VF device.\n");
7023 }
7024 return igb_set_vf_mac(adapter, vf, mac);
7025 }
7026
7027 static int igb_link_mbps(int internal_link_speed)
7028 {
7029 switch (internal_link_speed) {
7030 case SPEED_100:
7031 return 100;
7032 case SPEED_1000:
7033 return 1000;
7034 default:
7035 return 0;
7036 }
7037 }
7038
7039 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
7040 int link_speed)
7041 {
7042 int rf_dec, rf_int;
7043 u32 bcnrc_val;
7044
7045 if (tx_rate != 0) {
7046 /* Calculate the rate factor values to set */
7047 rf_int = link_speed / tx_rate;
7048 rf_dec = (link_speed - (rf_int * tx_rate));
7049 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
7050
7051 bcnrc_val = E1000_RTTBCNRC_RS_ENA;
7052 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
7053 E1000_RTTBCNRC_RF_INT_MASK);
7054 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
7055 } else {
7056 bcnrc_val = 0;
7057 }
7058
7059 wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
7060 wr32(E1000_RTTBCNRC, bcnrc_val);
7061 }
7062
7063 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
7064 {
7065 int actual_link_speed, i;
7066 bool reset_rate = false;
7067
7068 /* VF TX rate limit was not set or not supported */
7069 if ((adapter->vf_rate_link_speed == 0) ||
7070 (adapter->hw.mac.type != e1000_82576))
7071 return;
7072
7073 actual_link_speed = igb_link_mbps(adapter->link_speed);
7074 if (actual_link_speed != adapter->vf_rate_link_speed) {
7075 reset_rate = true;
7076 adapter->vf_rate_link_speed = 0;
7077 dev_info(&adapter->pdev->dev,
7078 "Link speed has been changed. VF Transmit "
7079 "rate is disabled\n");
7080 }
7081
7082 for (i = 0; i < adapter->vfs_allocated_count; i++) {
7083 if (reset_rate)
7084 adapter->vf_data[i].tx_rate = 0;
7085
7086 igb_set_vf_rate_limit(&adapter->hw, i,
7087 adapter->vf_data[i].tx_rate,
7088 actual_link_speed);
7089 }
7090 }
7091
7092 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
7093 {
7094 struct igb_adapter *adapter = netdev_priv(netdev);
7095 struct e1000_hw *hw = &adapter->hw;
7096 int actual_link_speed;
7097
7098 if (hw->mac.type != e1000_82576)
7099 return -EOPNOTSUPP;
7100
7101 actual_link_speed = igb_link_mbps(adapter->link_speed);
7102 if ((vf >= adapter->vfs_allocated_count) ||
7103 (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
7104 (tx_rate < 0) || (tx_rate > actual_link_speed))
7105 return -EINVAL;
7106
7107 adapter->vf_rate_link_speed = actual_link_speed;
7108 adapter->vf_data[vf].tx_rate = (u16)tx_rate;
7109 igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
7110
7111 return 0;
7112 }
7113
7114 static int igb_ndo_get_vf_config(struct net_device *netdev,
7115 int vf, struct ifla_vf_info *ivi)
7116 {
7117 struct igb_adapter *adapter = netdev_priv(netdev);
7118 if (vf >= adapter->vfs_allocated_count)
7119 return -EINVAL;
7120 ivi->vf = vf;
7121 memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
7122 ivi->tx_rate = adapter->vf_data[vf].tx_rate;
7123 ivi->vlan = adapter->vf_data[vf].pf_vlan;
7124 ivi->qos = adapter->vf_data[vf].pf_qos;
7125 return 0;
7126 }
7127
7128 static void igb_vmm_control(struct igb_adapter *adapter)
7129 {
7130 struct e1000_hw *hw = &adapter->hw;
7131 u32 reg;
7132
7133 switch (hw->mac.type) {
7134 case e1000_82575:
7135 default:
7136 /* replication is not supported for 82575 */
7137 return;
7138 case e1000_82576:
7139 /* notify HW that the MAC is adding vlan tags */
7140 reg = rd32(E1000_DTXCTL);
7141 reg |= E1000_DTXCTL_VLAN_ADDED;
7142 wr32(E1000_DTXCTL, reg);
7143 case e1000_82580:
7144 /* enable replication vlan tag stripping */
7145 reg = rd32(E1000_RPLOLR);
7146 reg |= E1000_RPLOLR_STRVLAN;
7147 wr32(E1000_RPLOLR, reg);
7148 case e1000_i350:
7149 /* none of the above registers are supported by i350 */
7150 break;
7151 }
7152
7153 if (adapter->vfs_allocated_count) {
7154 igb_vmdq_set_loopback_pf(hw, true);
7155 igb_vmdq_set_replication_pf(hw, true);
7156 igb_vmdq_set_anti_spoofing_pf(hw, true,
7157 adapter->vfs_allocated_count);
7158 } else {
7159 igb_vmdq_set_loopback_pf(hw, false);
7160 igb_vmdq_set_replication_pf(hw, false);
7161 }
7162 }
7163
7164 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
7165 {
7166 struct e1000_hw *hw = &adapter->hw;
7167 u32 dmac_thr;
7168 u16 hwm;
7169
7170 if (hw->mac.type > e1000_82580) {
7171 if (adapter->flags & IGB_FLAG_DMAC) {
7172 u32 reg;
7173
7174 /* force threshold to 0. */
7175 wr32(E1000_DMCTXTH, 0);
7176
7177 /*
7178 * DMA Coalescing high water mark needs to be greater
7179 * than the Rx threshold. Set hwm to PBA - max frame
7180 * size in 16B units, capping it at PBA - 6KB.
7181 */
7182 hwm = 64 * pba - adapter->max_frame_size / 16;
7183 if (hwm < 64 * (pba - 6))
7184 hwm = 64 * (pba - 6);
7185 reg = rd32(E1000_FCRTC);
7186 reg &= ~E1000_FCRTC_RTH_COAL_MASK;
7187 reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
7188 & E1000_FCRTC_RTH_COAL_MASK);
7189 wr32(E1000_FCRTC, reg);
7190
7191 /*
7192 * Set the DMA Coalescing Rx threshold to PBA - 2 * max
7193 * frame size, capping it at PBA - 10KB.
7194 */
7195 dmac_thr = pba - adapter->max_frame_size / 512;
7196 if (dmac_thr < pba - 10)
7197 dmac_thr = pba - 10;
7198 reg = rd32(E1000_DMACR);
7199 reg &= ~E1000_DMACR_DMACTHR_MASK;
7200 reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT)
7201 & E1000_DMACR_DMACTHR_MASK);
7202
7203 /* transition to L0x or L1 if available..*/
7204 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
7205
7206 /* watchdog timer= +-1000 usec in 32usec intervals */
7207 reg |= (1000 >> 5);
7208 wr32(E1000_DMACR, reg);
7209
7210 /*
7211 * no lower threshold to disable
7212 * coalescing(smart fifb)-UTRESH=0
7213 */
7214 wr32(E1000_DMCRTRH, 0);
7215
7216 reg = (IGB_DMCTLX_DCFLUSH_DIS | 0x4);
7217
7218 wr32(E1000_DMCTLX, reg);
7219
7220 /*
7221 * free space in tx packet buffer to wake from
7222 * DMA coal
7223 */
7224 wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE -
7225 (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6);
7226
7227 /*
7228 * make low power state decision controlled
7229 * by DMA coal
7230 */
7231 reg = rd32(E1000_PCIEMISC);
7232 reg &= ~E1000_PCIEMISC_LX_DECISION;
7233 wr32(E1000_PCIEMISC, reg);
7234 } /* endif adapter->dmac is not disabled */
7235 } else if (hw->mac.type == e1000_82580) {
7236 u32 reg = rd32(E1000_PCIEMISC);
7237 wr32(E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION);
7238 wr32(E1000_DMACR, 0);
7239 }
7240 }
7241
7242 /* igb_main.c */