]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - drivers/net/igb/igb_main.c
Fix common misspellings
[mirror_ubuntu-bionic-kernel.git] / drivers / net / igb / igb_main.c
1 /*******************************************************************************
2
3 Intel(R) Gigabit Ethernet Linux driver
4 Copyright(c) 2007-2009 Intel Corporation.
5
6 This program is free software; you can redistribute it and/or modify it
7 under the terms and conditions of the GNU General Public License,
8 version 2, as published by the Free Software Foundation.
9
10 This program is distributed in the hope it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 more details.
14
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19 The full GNU General Public License is included in this distribution in
20 the file called "COPYING".
21
22 Contact Information:
23 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <linux/slab.h>
36 #include <net/checksum.h>
37 #include <net/ip6_checksum.h>
38 #include <linux/net_tstamp.h>
39 #include <linux/mii.h>
40 #include <linux/ethtool.h>
41 #include <linux/if_vlan.h>
42 #include <linux/pci.h>
43 #include <linux/pci-aspm.h>
44 #include <linux/delay.h>
45 #include <linux/interrupt.h>
46 #include <linux/if_ether.h>
47 #include <linux/aer.h>
48 #ifdef CONFIG_IGB_DCA
49 #include <linux/dca.h>
50 #endif
51 #include "igb.h"
52
53 #define MAJ 3
54 #define MIN 0
55 #define BUILD 6
56 #define KFIX 2
57 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
58 __stringify(BUILD) "-k" __stringify(KFIX)
59 char igb_driver_name[] = "igb";
60 char igb_driver_version[] = DRV_VERSION;
61 static const char igb_driver_string[] =
62 "Intel(R) Gigabit Ethernet Network Driver";
63 static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation.";
64
65 static const struct e1000_info *igb_info_tbl[] = {
66 [board_82575] = &e1000_82575_info,
67 };
68
69 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
70 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
71 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
72 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
73 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
74 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
75 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
76 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
77 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
78 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
79 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
80 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
81 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
82 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
83 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
84 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
85 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
86 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
87 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
88 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
89 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
90 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
91 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
92 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
93 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
94 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
95 /* required last entry */
96 {0, }
97 };
98
99 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
100
101 void igb_reset(struct igb_adapter *);
102 static int igb_setup_all_tx_resources(struct igb_adapter *);
103 static int igb_setup_all_rx_resources(struct igb_adapter *);
104 static void igb_free_all_tx_resources(struct igb_adapter *);
105 static void igb_free_all_rx_resources(struct igb_adapter *);
106 static void igb_setup_mrqc(struct igb_adapter *);
107 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
108 static void __devexit igb_remove(struct pci_dev *pdev);
109 static void igb_init_hw_timer(struct igb_adapter *adapter);
110 static int igb_sw_init(struct igb_adapter *);
111 static int igb_open(struct net_device *);
112 static int igb_close(struct net_device *);
113 static void igb_configure_tx(struct igb_adapter *);
114 static void igb_configure_rx(struct igb_adapter *);
115 static void igb_clean_all_tx_rings(struct igb_adapter *);
116 static void igb_clean_all_rx_rings(struct igb_adapter *);
117 static void igb_clean_tx_ring(struct igb_ring *);
118 static void igb_clean_rx_ring(struct igb_ring *);
119 static void igb_set_rx_mode(struct net_device *);
120 static void igb_update_phy_info(unsigned long);
121 static void igb_watchdog(unsigned long);
122 static void igb_watchdog_task(struct work_struct *);
123 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
124 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
125 struct rtnl_link_stats64 *stats);
126 static int igb_change_mtu(struct net_device *, int);
127 static int igb_set_mac(struct net_device *, void *);
128 static void igb_set_uta(struct igb_adapter *adapter);
129 static irqreturn_t igb_intr(int irq, void *);
130 static irqreturn_t igb_intr_msi(int irq, void *);
131 static irqreturn_t igb_msix_other(int irq, void *);
132 static irqreturn_t igb_msix_ring(int irq, void *);
133 #ifdef CONFIG_IGB_DCA
134 static void igb_update_dca(struct igb_q_vector *);
135 static void igb_setup_dca(struct igb_adapter *);
136 #endif /* CONFIG_IGB_DCA */
137 static bool igb_clean_tx_irq(struct igb_q_vector *);
138 static int igb_poll(struct napi_struct *, int);
139 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
140 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
141 static void igb_tx_timeout(struct net_device *);
142 static void igb_reset_task(struct work_struct *);
143 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
144 static void igb_vlan_rx_add_vid(struct net_device *, u16);
145 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
146 static void igb_restore_vlan(struct igb_adapter *);
147 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
148 static void igb_ping_all_vfs(struct igb_adapter *);
149 static void igb_msg_task(struct igb_adapter *);
150 static void igb_vmm_control(struct igb_adapter *);
151 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
152 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
153 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
154 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
155 int vf, u16 vlan, u8 qos);
156 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
157 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
158 struct ifla_vf_info *ivi);
159 static void igb_check_vf_rate_limit(struct igb_adapter *);
160
161 #ifdef CONFIG_PM
162 static int igb_suspend(struct pci_dev *, pm_message_t);
163 static int igb_resume(struct pci_dev *);
164 #endif
165 static void igb_shutdown(struct pci_dev *);
166 #ifdef CONFIG_IGB_DCA
167 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
168 static struct notifier_block dca_notifier = {
169 .notifier_call = igb_notify_dca,
170 .next = NULL,
171 .priority = 0
172 };
173 #endif
174 #ifdef CONFIG_NET_POLL_CONTROLLER
175 /* for netdump / net console */
176 static void igb_netpoll(struct net_device *);
177 #endif
178 #ifdef CONFIG_PCI_IOV
179 static unsigned int max_vfs = 0;
180 module_param(max_vfs, uint, 0);
181 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
182 "per physical function");
183 #endif /* CONFIG_PCI_IOV */
184
185 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
186 pci_channel_state_t);
187 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
188 static void igb_io_resume(struct pci_dev *);
189
190 static struct pci_error_handlers igb_err_handler = {
191 .error_detected = igb_io_error_detected,
192 .slot_reset = igb_io_slot_reset,
193 .resume = igb_io_resume,
194 };
195
196
197 static struct pci_driver igb_driver = {
198 .name = igb_driver_name,
199 .id_table = igb_pci_tbl,
200 .probe = igb_probe,
201 .remove = __devexit_p(igb_remove),
202 #ifdef CONFIG_PM
203 /* Power Management Hooks */
204 .suspend = igb_suspend,
205 .resume = igb_resume,
206 #endif
207 .shutdown = igb_shutdown,
208 .err_handler = &igb_err_handler
209 };
210
211 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
212 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
213 MODULE_LICENSE("GPL");
214 MODULE_VERSION(DRV_VERSION);
215
216 struct igb_reg_info {
217 u32 ofs;
218 char *name;
219 };
220
221 static const struct igb_reg_info igb_reg_info_tbl[] = {
222
223 /* General Registers */
224 {E1000_CTRL, "CTRL"},
225 {E1000_STATUS, "STATUS"},
226 {E1000_CTRL_EXT, "CTRL_EXT"},
227
228 /* Interrupt Registers */
229 {E1000_ICR, "ICR"},
230
231 /* RX Registers */
232 {E1000_RCTL, "RCTL"},
233 {E1000_RDLEN(0), "RDLEN"},
234 {E1000_RDH(0), "RDH"},
235 {E1000_RDT(0), "RDT"},
236 {E1000_RXDCTL(0), "RXDCTL"},
237 {E1000_RDBAL(0), "RDBAL"},
238 {E1000_RDBAH(0), "RDBAH"},
239
240 /* TX Registers */
241 {E1000_TCTL, "TCTL"},
242 {E1000_TDBAL(0), "TDBAL"},
243 {E1000_TDBAH(0), "TDBAH"},
244 {E1000_TDLEN(0), "TDLEN"},
245 {E1000_TDH(0), "TDH"},
246 {E1000_TDT(0), "TDT"},
247 {E1000_TXDCTL(0), "TXDCTL"},
248 {E1000_TDFH, "TDFH"},
249 {E1000_TDFT, "TDFT"},
250 {E1000_TDFHS, "TDFHS"},
251 {E1000_TDFPC, "TDFPC"},
252
253 /* List Terminator */
254 {}
255 };
256
257 /*
258 * igb_regdump - register printout routine
259 */
260 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
261 {
262 int n = 0;
263 char rname[16];
264 u32 regs[8];
265
266 switch (reginfo->ofs) {
267 case E1000_RDLEN(0):
268 for (n = 0; n < 4; n++)
269 regs[n] = rd32(E1000_RDLEN(n));
270 break;
271 case E1000_RDH(0):
272 for (n = 0; n < 4; n++)
273 regs[n] = rd32(E1000_RDH(n));
274 break;
275 case E1000_RDT(0):
276 for (n = 0; n < 4; n++)
277 regs[n] = rd32(E1000_RDT(n));
278 break;
279 case E1000_RXDCTL(0):
280 for (n = 0; n < 4; n++)
281 regs[n] = rd32(E1000_RXDCTL(n));
282 break;
283 case E1000_RDBAL(0):
284 for (n = 0; n < 4; n++)
285 regs[n] = rd32(E1000_RDBAL(n));
286 break;
287 case E1000_RDBAH(0):
288 for (n = 0; n < 4; n++)
289 regs[n] = rd32(E1000_RDBAH(n));
290 break;
291 case E1000_TDBAL(0):
292 for (n = 0; n < 4; n++)
293 regs[n] = rd32(E1000_RDBAL(n));
294 break;
295 case E1000_TDBAH(0):
296 for (n = 0; n < 4; n++)
297 regs[n] = rd32(E1000_TDBAH(n));
298 break;
299 case E1000_TDLEN(0):
300 for (n = 0; n < 4; n++)
301 regs[n] = rd32(E1000_TDLEN(n));
302 break;
303 case E1000_TDH(0):
304 for (n = 0; n < 4; n++)
305 regs[n] = rd32(E1000_TDH(n));
306 break;
307 case E1000_TDT(0):
308 for (n = 0; n < 4; n++)
309 regs[n] = rd32(E1000_TDT(n));
310 break;
311 case E1000_TXDCTL(0):
312 for (n = 0; n < 4; n++)
313 regs[n] = rd32(E1000_TXDCTL(n));
314 break;
315 default:
316 printk(KERN_INFO "%-15s %08x\n",
317 reginfo->name, rd32(reginfo->ofs));
318 return;
319 }
320
321 snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
322 printk(KERN_INFO "%-15s ", rname);
323 for (n = 0; n < 4; n++)
324 printk(KERN_CONT "%08x ", regs[n]);
325 printk(KERN_CONT "\n");
326 }
327
328 /*
329 * igb_dump - Print registers, tx-rings and rx-rings
330 */
331 static void igb_dump(struct igb_adapter *adapter)
332 {
333 struct net_device *netdev = adapter->netdev;
334 struct e1000_hw *hw = &adapter->hw;
335 struct igb_reg_info *reginfo;
336 int n = 0;
337 struct igb_ring *tx_ring;
338 union e1000_adv_tx_desc *tx_desc;
339 struct my_u0 { u64 a; u64 b; } *u0;
340 struct igb_buffer *buffer_info;
341 struct igb_ring *rx_ring;
342 union e1000_adv_rx_desc *rx_desc;
343 u32 staterr;
344 int i = 0;
345
346 if (!netif_msg_hw(adapter))
347 return;
348
349 /* Print netdevice Info */
350 if (netdev) {
351 dev_info(&adapter->pdev->dev, "Net device Info\n");
352 printk(KERN_INFO "Device Name state "
353 "trans_start last_rx\n");
354 printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
355 netdev->name,
356 netdev->state,
357 netdev->trans_start,
358 netdev->last_rx);
359 }
360
361 /* Print Registers */
362 dev_info(&adapter->pdev->dev, "Register Dump\n");
363 printk(KERN_INFO " Register Name Value\n");
364 for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
365 reginfo->name; reginfo++) {
366 igb_regdump(hw, reginfo);
367 }
368
369 /* Print TX Ring Summary */
370 if (!netdev || !netif_running(netdev))
371 goto exit;
372
373 dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
374 printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma ]"
375 " leng ntw timestamp\n");
376 for (n = 0; n < adapter->num_tx_queues; n++) {
377 tx_ring = adapter->tx_ring[n];
378 buffer_info = &tx_ring->buffer_info[tx_ring->next_to_clean];
379 printk(KERN_INFO " %5d %5X %5X %016llX %04X %3X %016llX\n",
380 n, tx_ring->next_to_use, tx_ring->next_to_clean,
381 (u64)buffer_info->dma,
382 buffer_info->length,
383 buffer_info->next_to_watch,
384 (u64)buffer_info->time_stamp);
385 }
386
387 /* Print TX Rings */
388 if (!netif_msg_tx_done(adapter))
389 goto rx_ring_summary;
390
391 dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
392
393 /* Transmit Descriptor Formats
394 *
395 * Advanced Transmit Descriptor
396 * +--------------------------------------------------------------+
397 * 0 | Buffer Address [63:0] |
398 * +--------------------------------------------------------------+
399 * 8 | PAYLEN | PORTS |CC|IDX | STA | DCMD |DTYP|MAC|RSV| DTALEN |
400 * +--------------------------------------------------------------+
401 * 63 46 45 40 39 38 36 35 32 31 24 15 0
402 */
403
404 for (n = 0; n < adapter->num_tx_queues; n++) {
405 tx_ring = adapter->tx_ring[n];
406 printk(KERN_INFO "------------------------------------\n");
407 printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
408 printk(KERN_INFO "------------------------------------\n");
409 printk(KERN_INFO "T [desc] [address 63:0 ] "
410 "[PlPOCIStDDM Ln] [bi->dma ] "
411 "leng ntw timestamp bi->skb\n");
412
413 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
414 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
415 buffer_info = &tx_ring->buffer_info[i];
416 u0 = (struct my_u0 *)tx_desc;
417 printk(KERN_INFO "T [0x%03X] %016llX %016llX %016llX"
418 " %04X %3X %016llX %p", i,
419 le64_to_cpu(u0->a),
420 le64_to_cpu(u0->b),
421 (u64)buffer_info->dma,
422 buffer_info->length,
423 buffer_info->next_to_watch,
424 (u64)buffer_info->time_stamp,
425 buffer_info->skb);
426 if (i == tx_ring->next_to_use &&
427 i == tx_ring->next_to_clean)
428 printk(KERN_CONT " NTC/U\n");
429 else if (i == tx_ring->next_to_use)
430 printk(KERN_CONT " NTU\n");
431 else if (i == tx_ring->next_to_clean)
432 printk(KERN_CONT " NTC\n");
433 else
434 printk(KERN_CONT "\n");
435
436 if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
437 print_hex_dump(KERN_INFO, "",
438 DUMP_PREFIX_ADDRESS,
439 16, 1, phys_to_virt(buffer_info->dma),
440 buffer_info->length, true);
441 }
442 }
443
444 /* Print RX Rings Summary */
445 rx_ring_summary:
446 dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
447 printk(KERN_INFO "Queue [NTU] [NTC]\n");
448 for (n = 0; n < adapter->num_rx_queues; n++) {
449 rx_ring = adapter->rx_ring[n];
450 printk(KERN_INFO " %5d %5X %5X\n", n,
451 rx_ring->next_to_use, rx_ring->next_to_clean);
452 }
453
454 /* Print RX Rings */
455 if (!netif_msg_rx_status(adapter))
456 goto exit;
457
458 dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
459
460 /* Advanced Receive Descriptor (Read) Format
461 * 63 1 0
462 * +-----------------------------------------------------+
463 * 0 | Packet Buffer Address [63:1] |A0/NSE|
464 * +----------------------------------------------+------+
465 * 8 | Header Buffer Address [63:1] | DD |
466 * +-----------------------------------------------------+
467 *
468 *
469 * Advanced Receive Descriptor (Write-Back) Format
470 *
471 * 63 48 47 32 31 30 21 20 17 16 4 3 0
472 * +------------------------------------------------------+
473 * 0 | Packet IP |SPH| HDR_LEN | RSV|Packet| RSS |
474 * | Checksum Ident | | | | Type | Type |
475 * +------------------------------------------------------+
476 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
477 * +------------------------------------------------------+
478 * 63 48 47 32 31 20 19 0
479 */
480
481 for (n = 0; n < adapter->num_rx_queues; n++) {
482 rx_ring = adapter->rx_ring[n];
483 printk(KERN_INFO "------------------------------------\n");
484 printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
485 printk(KERN_INFO "------------------------------------\n");
486 printk(KERN_INFO "R [desc] [ PktBuf A0] "
487 "[ HeadBuf DD] [bi->dma ] [bi->skb] "
488 "<-- Adv Rx Read format\n");
489 printk(KERN_INFO "RWB[desc] [PcsmIpSHl PtRs] "
490 "[vl er S cks ln] ---------------- [bi->skb] "
491 "<-- Adv Rx Write-Back format\n");
492
493 for (i = 0; i < rx_ring->count; i++) {
494 buffer_info = &rx_ring->buffer_info[i];
495 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
496 u0 = (struct my_u0 *)rx_desc;
497 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
498 if (staterr & E1000_RXD_STAT_DD) {
499 /* Descriptor Done */
500 printk(KERN_INFO "RWB[0x%03X] %016llX "
501 "%016llX ---------------- %p", i,
502 le64_to_cpu(u0->a),
503 le64_to_cpu(u0->b),
504 buffer_info->skb);
505 } else {
506 printk(KERN_INFO "R [0x%03X] %016llX "
507 "%016llX %016llX %p", i,
508 le64_to_cpu(u0->a),
509 le64_to_cpu(u0->b),
510 (u64)buffer_info->dma,
511 buffer_info->skb);
512
513 if (netif_msg_pktdata(adapter)) {
514 print_hex_dump(KERN_INFO, "",
515 DUMP_PREFIX_ADDRESS,
516 16, 1,
517 phys_to_virt(buffer_info->dma),
518 rx_ring->rx_buffer_len, true);
519 if (rx_ring->rx_buffer_len
520 < IGB_RXBUFFER_1024)
521 print_hex_dump(KERN_INFO, "",
522 DUMP_PREFIX_ADDRESS,
523 16, 1,
524 phys_to_virt(
525 buffer_info->page_dma +
526 buffer_info->page_offset),
527 PAGE_SIZE/2, true);
528 }
529 }
530
531 if (i == rx_ring->next_to_use)
532 printk(KERN_CONT " NTU\n");
533 else if (i == rx_ring->next_to_clean)
534 printk(KERN_CONT " NTC\n");
535 else
536 printk(KERN_CONT "\n");
537
538 }
539 }
540
541 exit:
542 return;
543 }
544
545
546 /**
547 * igb_read_clock - read raw cycle counter (to be used by time counter)
548 */
549 static cycle_t igb_read_clock(const struct cyclecounter *tc)
550 {
551 struct igb_adapter *adapter =
552 container_of(tc, struct igb_adapter, cycles);
553 struct e1000_hw *hw = &adapter->hw;
554 u64 stamp = 0;
555 int shift = 0;
556
557 /*
558 * The timestamp latches on lowest register read. For the 82580
559 * the lowest register is SYSTIMR instead of SYSTIML. However we never
560 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
561 */
562 if (hw->mac.type == e1000_82580) {
563 stamp = rd32(E1000_SYSTIMR) >> 8;
564 shift = IGB_82580_TSYNC_SHIFT;
565 }
566
567 stamp |= (u64)rd32(E1000_SYSTIML) << shift;
568 stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
569 return stamp;
570 }
571
572 /**
573 * igb_get_hw_dev - return device
574 * used by hardware layer to print debugging information
575 **/
576 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
577 {
578 struct igb_adapter *adapter = hw->back;
579 return adapter->netdev;
580 }
581
582 /**
583 * igb_init_module - Driver Registration Routine
584 *
585 * igb_init_module is the first routine called when the driver is
586 * loaded. All it does is register with the PCI subsystem.
587 **/
588 static int __init igb_init_module(void)
589 {
590 int ret;
591 printk(KERN_INFO "%s - version %s\n",
592 igb_driver_string, igb_driver_version);
593
594 printk(KERN_INFO "%s\n", igb_copyright);
595
596 #ifdef CONFIG_IGB_DCA
597 dca_register_notify(&dca_notifier);
598 #endif
599 ret = pci_register_driver(&igb_driver);
600 return ret;
601 }
602
603 module_init(igb_init_module);
604
605 /**
606 * igb_exit_module - Driver Exit Cleanup Routine
607 *
608 * igb_exit_module is called just before the driver is removed
609 * from memory.
610 **/
611 static void __exit igb_exit_module(void)
612 {
613 #ifdef CONFIG_IGB_DCA
614 dca_unregister_notify(&dca_notifier);
615 #endif
616 pci_unregister_driver(&igb_driver);
617 }
618
619 module_exit(igb_exit_module);
620
621 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
622 /**
623 * igb_cache_ring_register - Descriptor ring to register mapping
624 * @adapter: board private structure to initialize
625 *
626 * Once we know the feature-set enabled for the device, we'll cache
627 * the register offset the descriptor ring is assigned to.
628 **/
629 static void igb_cache_ring_register(struct igb_adapter *adapter)
630 {
631 int i = 0, j = 0;
632 u32 rbase_offset = adapter->vfs_allocated_count;
633
634 switch (adapter->hw.mac.type) {
635 case e1000_82576:
636 /* The queues are allocated for virtualization such that VF 0
637 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
638 * In order to avoid collision we start at the first free queue
639 * and continue consuming queues in the same sequence
640 */
641 if (adapter->vfs_allocated_count) {
642 for (; i < adapter->rss_queues; i++)
643 adapter->rx_ring[i]->reg_idx = rbase_offset +
644 Q_IDX_82576(i);
645 }
646 case e1000_82575:
647 case e1000_82580:
648 case e1000_i350:
649 default:
650 for (; i < adapter->num_rx_queues; i++)
651 adapter->rx_ring[i]->reg_idx = rbase_offset + i;
652 for (; j < adapter->num_tx_queues; j++)
653 adapter->tx_ring[j]->reg_idx = rbase_offset + j;
654 break;
655 }
656 }
657
658 static void igb_free_queues(struct igb_adapter *adapter)
659 {
660 int i;
661
662 for (i = 0; i < adapter->num_tx_queues; i++) {
663 kfree(adapter->tx_ring[i]);
664 adapter->tx_ring[i] = NULL;
665 }
666 for (i = 0; i < adapter->num_rx_queues; i++) {
667 kfree(adapter->rx_ring[i]);
668 adapter->rx_ring[i] = NULL;
669 }
670 adapter->num_rx_queues = 0;
671 adapter->num_tx_queues = 0;
672 }
673
674 /**
675 * igb_alloc_queues - Allocate memory for all rings
676 * @adapter: board private structure to initialize
677 *
678 * We allocate one ring per queue at run-time since we don't know the
679 * number of queues at compile-time.
680 **/
681 static int igb_alloc_queues(struct igb_adapter *adapter)
682 {
683 struct igb_ring *ring;
684 int i;
685
686 for (i = 0; i < adapter->num_tx_queues; i++) {
687 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
688 if (!ring)
689 goto err;
690 ring->count = adapter->tx_ring_count;
691 ring->queue_index = i;
692 ring->dev = &adapter->pdev->dev;
693 ring->netdev = adapter->netdev;
694 /* For 82575, context index must be unique per ring. */
695 if (adapter->hw.mac.type == e1000_82575)
696 ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
697 adapter->tx_ring[i] = ring;
698 }
699
700 for (i = 0; i < adapter->num_rx_queues; i++) {
701 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
702 if (!ring)
703 goto err;
704 ring->count = adapter->rx_ring_count;
705 ring->queue_index = i;
706 ring->dev = &adapter->pdev->dev;
707 ring->netdev = adapter->netdev;
708 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
709 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
710 /* set flag indicating ring supports SCTP checksum offload */
711 if (adapter->hw.mac.type >= e1000_82576)
712 ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
713 adapter->rx_ring[i] = ring;
714 }
715
716 igb_cache_ring_register(adapter);
717
718 return 0;
719
720 err:
721 igb_free_queues(adapter);
722
723 return -ENOMEM;
724 }
725
726 #define IGB_N0_QUEUE -1
727 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
728 {
729 u32 msixbm = 0;
730 struct igb_adapter *adapter = q_vector->adapter;
731 struct e1000_hw *hw = &adapter->hw;
732 u32 ivar, index;
733 int rx_queue = IGB_N0_QUEUE;
734 int tx_queue = IGB_N0_QUEUE;
735
736 if (q_vector->rx_ring)
737 rx_queue = q_vector->rx_ring->reg_idx;
738 if (q_vector->tx_ring)
739 tx_queue = q_vector->tx_ring->reg_idx;
740
741 switch (hw->mac.type) {
742 case e1000_82575:
743 /* The 82575 assigns vectors using a bitmask, which matches the
744 bitmask for the EICR/EIMS/EIMC registers. To assign one
745 or more queues to a vector, we write the appropriate bits
746 into the MSIXBM register for that vector. */
747 if (rx_queue > IGB_N0_QUEUE)
748 msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
749 if (tx_queue > IGB_N0_QUEUE)
750 msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
751 if (!adapter->msix_entries && msix_vector == 0)
752 msixbm |= E1000_EIMS_OTHER;
753 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
754 q_vector->eims_value = msixbm;
755 break;
756 case e1000_82576:
757 /* 82576 uses a table-based method for assigning vectors.
758 Each queue has a single entry in the table to which we write
759 a vector number along with a "valid" bit. Sadly, the layout
760 of the table is somewhat counterintuitive. */
761 if (rx_queue > IGB_N0_QUEUE) {
762 index = (rx_queue & 0x7);
763 ivar = array_rd32(E1000_IVAR0, index);
764 if (rx_queue < 8) {
765 /* vector goes into low byte of register */
766 ivar = ivar & 0xFFFFFF00;
767 ivar |= msix_vector | E1000_IVAR_VALID;
768 } else {
769 /* vector goes into third byte of register */
770 ivar = ivar & 0xFF00FFFF;
771 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
772 }
773 array_wr32(E1000_IVAR0, index, ivar);
774 }
775 if (tx_queue > IGB_N0_QUEUE) {
776 index = (tx_queue & 0x7);
777 ivar = array_rd32(E1000_IVAR0, index);
778 if (tx_queue < 8) {
779 /* vector goes into second byte of register */
780 ivar = ivar & 0xFFFF00FF;
781 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
782 } else {
783 /* vector goes into high byte of register */
784 ivar = ivar & 0x00FFFFFF;
785 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
786 }
787 array_wr32(E1000_IVAR0, index, ivar);
788 }
789 q_vector->eims_value = 1 << msix_vector;
790 break;
791 case e1000_82580:
792 case e1000_i350:
793 /* 82580 uses the same table-based approach as 82576 but has fewer
794 entries as a result we carry over for queues greater than 4. */
795 if (rx_queue > IGB_N0_QUEUE) {
796 index = (rx_queue >> 1);
797 ivar = array_rd32(E1000_IVAR0, index);
798 if (rx_queue & 0x1) {
799 /* vector goes into third byte of register */
800 ivar = ivar & 0xFF00FFFF;
801 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
802 } else {
803 /* vector goes into low byte of register */
804 ivar = ivar & 0xFFFFFF00;
805 ivar |= msix_vector | E1000_IVAR_VALID;
806 }
807 array_wr32(E1000_IVAR0, index, ivar);
808 }
809 if (tx_queue > IGB_N0_QUEUE) {
810 index = (tx_queue >> 1);
811 ivar = array_rd32(E1000_IVAR0, index);
812 if (tx_queue & 0x1) {
813 /* vector goes into high byte of register */
814 ivar = ivar & 0x00FFFFFF;
815 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
816 } else {
817 /* vector goes into second byte of register */
818 ivar = ivar & 0xFFFF00FF;
819 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
820 }
821 array_wr32(E1000_IVAR0, index, ivar);
822 }
823 q_vector->eims_value = 1 << msix_vector;
824 break;
825 default:
826 BUG();
827 break;
828 }
829
830 /* add q_vector eims value to global eims_enable_mask */
831 adapter->eims_enable_mask |= q_vector->eims_value;
832
833 /* configure q_vector to set itr on first interrupt */
834 q_vector->set_itr = 1;
835 }
836
837 /**
838 * igb_configure_msix - Configure MSI-X hardware
839 *
840 * igb_configure_msix sets up the hardware to properly
841 * generate MSI-X interrupts.
842 **/
843 static void igb_configure_msix(struct igb_adapter *adapter)
844 {
845 u32 tmp;
846 int i, vector = 0;
847 struct e1000_hw *hw = &adapter->hw;
848
849 adapter->eims_enable_mask = 0;
850
851 /* set vector for other causes, i.e. link changes */
852 switch (hw->mac.type) {
853 case e1000_82575:
854 tmp = rd32(E1000_CTRL_EXT);
855 /* enable MSI-X PBA support*/
856 tmp |= E1000_CTRL_EXT_PBA_CLR;
857
858 /* Auto-Mask interrupts upon ICR read. */
859 tmp |= E1000_CTRL_EXT_EIAME;
860 tmp |= E1000_CTRL_EXT_IRCA;
861
862 wr32(E1000_CTRL_EXT, tmp);
863
864 /* enable msix_other interrupt */
865 array_wr32(E1000_MSIXBM(0), vector++,
866 E1000_EIMS_OTHER);
867 adapter->eims_other = E1000_EIMS_OTHER;
868
869 break;
870
871 case e1000_82576:
872 case e1000_82580:
873 case e1000_i350:
874 /* Turn on MSI-X capability first, or our settings
875 * won't stick. And it will take days to debug. */
876 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
877 E1000_GPIE_PBA | E1000_GPIE_EIAME |
878 E1000_GPIE_NSICR);
879
880 /* enable msix_other interrupt */
881 adapter->eims_other = 1 << vector;
882 tmp = (vector++ | E1000_IVAR_VALID) << 8;
883
884 wr32(E1000_IVAR_MISC, tmp);
885 break;
886 default:
887 /* do nothing, since nothing else supports MSI-X */
888 break;
889 } /* switch (hw->mac.type) */
890
891 adapter->eims_enable_mask |= adapter->eims_other;
892
893 for (i = 0; i < adapter->num_q_vectors; i++)
894 igb_assign_vector(adapter->q_vector[i], vector++);
895
896 wrfl();
897 }
898
899 /**
900 * igb_request_msix - Initialize MSI-X interrupts
901 *
902 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
903 * kernel.
904 **/
905 static int igb_request_msix(struct igb_adapter *adapter)
906 {
907 struct net_device *netdev = adapter->netdev;
908 struct e1000_hw *hw = &adapter->hw;
909 int i, err = 0, vector = 0;
910
911 err = request_irq(adapter->msix_entries[vector].vector,
912 igb_msix_other, 0, netdev->name, adapter);
913 if (err)
914 goto out;
915 vector++;
916
917 for (i = 0; i < adapter->num_q_vectors; i++) {
918 struct igb_q_vector *q_vector = adapter->q_vector[i];
919
920 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
921
922 if (q_vector->rx_ring && q_vector->tx_ring)
923 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
924 q_vector->rx_ring->queue_index);
925 else if (q_vector->tx_ring)
926 sprintf(q_vector->name, "%s-tx-%u", netdev->name,
927 q_vector->tx_ring->queue_index);
928 else if (q_vector->rx_ring)
929 sprintf(q_vector->name, "%s-rx-%u", netdev->name,
930 q_vector->rx_ring->queue_index);
931 else
932 sprintf(q_vector->name, "%s-unused", netdev->name);
933
934 err = request_irq(adapter->msix_entries[vector].vector,
935 igb_msix_ring, 0, q_vector->name,
936 q_vector);
937 if (err)
938 goto out;
939 vector++;
940 }
941
942 igb_configure_msix(adapter);
943 return 0;
944 out:
945 return err;
946 }
947
948 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
949 {
950 if (adapter->msix_entries) {
951 pci_disable_msix(adapter->pdev);
952 kfree(adapter->msix_entries);
953 adapter->msix_entries = NULL;
954 } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
955 pci_disable_msi(adapter->pdev);
956 }
957 }
958
959 /**
960 * igb_free_q_vectors - Free memory allocated for interrupt vectors
961 * @adapter: board private structure to initialize
962 *
963 * This function frees the memory allocated to the q_vectors. In addition if
964 * NAPI is enabled it will delete any references to the NAPI struct prior
965 * to freeing the q_vector.
966 **/
967 static void igb_free_q_vectors(struct igb_adapter *adapter)
968 {
969 int v_idx;
970
971 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
972 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
973 adapter->q_vector[v_idx] = NULL;
974 if (!q_vector)
975 continue;
976 netif_napi_del(&q_vector->napi);
977 kfree(q_vector);
978 }
979 adapter->num_q_vectors = 0;
980 }
981
982 /**
983 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
984 *
985 * This function resets the device so that it has 0 rx queues, tx queues, and
986 * MSI-X interrupts allocated.
987 */
988 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
989 {
990 igb_free_queues(adapter);
991 igb_free_q_vectors(adapter);
992 igb_reset_interrupt_capability(adapter);
993 }
994
995 /**
996 * igb_set_interrupt_capability - set MSI or MSI-X if supported
997 *
998 * Attempt to configure interrupts using the best available
999 * capabilities of the hardware and kernel.
1000 **/
1001 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1002 {
1003 int err;
1004 int numvecs, i;
1005
1006 /* Number of supported queues. */
1007 adapter->num_rx_queues = adapter->rss_queues;
1008 if (adapter->vfs_allocated_count)
1009 adapter->num_tx_queues = 1;
1010 else
1011 adapter->num_tx_queues = adapter->rss_queues;
1012
1013 /* start with one vector for every rx queue */
1014 numvecs = adapter->num_rx_queues;
1015
1016 /* if tx handler is separate add 1 for every tx queue */
1017 if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1018 numvecs += adapter->num_tx_queues;
1019
1020 /* store the number of vectors reserved for queues */
1021 adapter->num_q_vectors = numvecs;
1022
1023 /* add 1 vector for link status interrupts */
1024 numvecs++;
1025 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1026 GFP_KERNEL);
1027 if (!adapter->msix_entries)
1028 goto msi_only;
1029
1030 for (i = 0; i < numvecs; i++)
1031 adapter->msix_entries[i].entry = i;
1032
1033 err = pci_enable_msix(adapter->pdev,
1034 adapter->msix_entries,
1035 numvecs);
1036 if (err == 0)
1037 goto out;
1038
1039 igb_reset_interrupt_capability(adapter);
1040
1041 /* If we can't do MSI-X, try MSI */
1042 msi_only:
1043 #ifdef CONFIG_PCI_IOV
1044 /* disable SR-IOV for non MSI-X configurations */
1045 if (adapter->vf_data) {
1046 struct e1000_hw *hw = &adapter->hw;
1047 /* disable iov and allow time for transactions to clear */
1048 pci_disable_sriov(adapter->pdev);
1049 msleep(500);
1050
1051 kfree(adapter->vf_data);
1052 adapter->vf_data = NULL;
1053 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1054 msleep(100);
1055 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1056 }
1057 #endif
1058 adapter->vfs_allocated_count = 0;
1059 adapter->rss_queues = 1;
1060 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1061 adapter->num_rx_queues = 1;
1062 adapter->num_tx_queues = 1;
1063 adapter->num_q_vectors = 1;
1064 if (!pci_enable_msi(adapter->pdev))
1065 adapter->flags |= IGB_FLAG_HAS_MSI;
1066 out:
1067 /* Notify the stack of the (possibly) reduced queue counts. */
1068 netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1069 return netif_set_real_num_rx_queues(adapter->netdev,
1070 adapter->num_rx_queues);
1071 }
1072
1073 /**
1074 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1075 * @adapter: board private structure to initialize
1076 *
1077 * We allocate one q_vector per queue interrupt. If allocation fails we
1078 * return -ENOMEM.
1079 **/
1080 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1081 {
1082 struct igb_q_vector *q_vector;
1083 struct e1000_hw *hw = &adapter->hw;
1084 int v_idx;
1085
1086 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1087 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
1088 if (!q_vector)
1089 goto err_out;
1090 q_vector->adapter = adapter;
1091 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1092 q_vector->itr_val = IGB_START_ITR;
1093 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1094 adapter->q_vector[v_idx] = q_vector;
1095 }
1096 return 0;
1097
1098 err_out:
1099 igb_free_q_vectors(adapter);
1100 return -ENOMEM;
1101 }
1102
1103 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1104 int ring_idx, int v_idx)
1105 {
1106 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1107
1108 q_vector->rx_ring = adapter->rx_ring[ring_idx];
1109 q_vector->rx_ring->q_vector = q_vector;
1110 q_vector->itr_val = adapter->rx_itr_setting;
1111 if (q_vector->itr_val && q_vector->itr_val <= 3)
1112 q_vector->itr_val = IGB_START_ITR;
1113 }
1114
1115 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1116 int ring_idx, int v_idx)
1117 {
1118 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1119
1120 q_vector->tx_ring = adapter->tx_ring[ring_idx];
1121 q_vector->tx_ring->q_vector = q_vector;
1122 q_vector->itr_val = adapter->tx_itr_setting;
1123 if (q_vector->itr_val && q_vector->itr_val <= 3)
1124 q_vector->itr_val = IGB_START_ITR;
1125 }
1126
1127 /**
1128 * igb_map_ring_to_vector - maps allocated queues to vectors
1129 *
1130 * This function maps the recently allocated queues to vectors.
1131 **/
1132 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1133 {
1134 int i;
1135 int v_idx = 0;
1136
1137 if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1138 (adapter->num_q_vectors < adapter->num_tx_queues))
1139 return -ENOMEM;
1140
1141 if (adapter->num_q_vectors >=
1142 (adapter->num_rx_queues + adapter->num_tx_queues)) {
1143 for (i = 0; i < adapter->num_rx_queues; i++)
1144 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1145 for (i = 0; i < adapter->num_tx_queues; i++)
1146 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1147 } else {
1148 for (i = 0; i < adapter->num_rx_queues; i++) {
1149 if (i < adapter->num_tx_queues)
1150 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1151 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1152 }
1153 for (; i < adapter->num_tx_queues; i++)
1154 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1155 }
1156 return 0;
1157 }
1158
1159 /**
1160 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1161 *
1162 * This function initializes the interrupts and allocates all of the queues.
1163 **/
1164 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1165 {
1166 struct pci_dev *pdev = adapter->pdev;
1167 int err;
1168
1169 err = igb_set_interrupt_capability(adapter);
1170 if (err)
1171 return err;
1172
1173 err = igb_alloc_q_vectors(adapter);
1174 if (err) {
1175 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1176 goto err_alloc_q_vectors;
1177 }
1178
1179 err = igb_alloc_queues(adapter);
1180 if (err) {
1181 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1182 goto err_alloc_queues;
1183 }
1184
1185 err = igb_map_ring_to_vector(adapter);
1186 if (err) {
1187 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1188 goto err_map_queues;
1189 }
1190
1191
1192 return 0;
1193 err_map_queues:
1194 igb_free_queues(adapter);
1195 err_alloc_queues:
1196 igb_free_q_vectors(adapter);
1197 err_alloc_q_vectors:
1198 igb_reset_interrupt_capability(adapter);
1199 return err;
1200 }
1201
1202 /**
1203 * igb_request_irq - initialize interrupts
1204 *
1205 * Attempts to configure interrupts using the best available
1206 * capabilities of the hardware and kernel.
1207 **/
1208 static int igb_request_irq(struct igb_adapter *adapter)
1209 {
1210 struct net_device *netdev = adapter->netdev;
1211 struct pci_dev *pdev = adapter->pdev;
1212 int err = 0;
1213
1214 if (adapter->msix_entries) {
1215 err = igb_request_msix(adapter);
1216 if (!err)
1217 goto request_done;
1218 /* fall back to MSI */
1219 igb_clear_interrupt_scheme(adapter);
1220 if (!pci_enable_msi(adapter->pdev))
1221 adapter->flags |= IGB_FLAG_HAS_MSI;
1222 igb_free_all_tx_resources(adapter);
1223 igb_free_all_rx_resources(adapter);
1224 adapter->num_tx_queues = 1;
1225 adapter->num_rx_queues = 1;
1226 adapter->num_q_vectors = 1;
1227 err = igb_alloc_q_vectors(adapter);
1228 if (err) {
1229 dev_err(&pdev->dev,
1230 "Unable to allocate memory for vectors\n");
1231 goto request_done;
1232 }
1233 err = igb_alloc_queues(adapter);
1234 if (err) {
1235 dev_err(&pdev->dev,
1236 "Unable to allocate memory for queues\n");
1237 igb_free_q_vectors(adapter);
1238 goto request_done;
1239 }
1240 igb_setup_all_tx_resources(adapter);
1241 igb_setup_all_rx_resources(adapter);
1242 } else {
1243 igb_assign_vector(adapter->q_vector[0], 0);
1244 }
1245
1246 if (adapter->flags & IGB_FLAG_HAS_MSI) {
1247 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
1248 netdev->name, adapter);
1249 if (!err)
1250 goto request_done;
1251
1252 /* fall back to legacy interrupts */
1253 igb_reset_interrupt_capability(adapter);
1254 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1255 }
1256
1257 err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
1258 netdev->name, adapter);
1259
1260 if (err)
1261 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
1262 err);
1263
1264 request_done:
1265 return err;
1266 }
1267
1268 static void igb_free_irq(struct igb_adapter *adapter)
1269 {
1270 if (adapter->msix_entries) {
1271 int vector = 0, i;
1272
1273 free_irq(adapter->msix_entries[vector++].vector, adapter);
1274
1275 for (i = 0; i < adapter->num_q_vectors; i++) {
1276 struct igb_q_vector *q_vector = adapter->q_vector[i];
1277 free_irq(adapter->msix_entries[vector++].vector,
1278 q_vector);
1279 }
1280 } else {
1281 free_irq(adapter->pdev->irq, adapter);
1282 }
1283 }
1284
1285 /**
1286 * igb_irq_disable - Mask off interrupt generation on the NIC
1287 * @adapter: board private structure
1288 **/
1289 static void igb_irq_disable(struct igb_adapter *adapter)
1290 {
1291 struct e1000_hw *hw = &adapter->hw;
1292
1293 /*
1294 * we need to be careful when disabling interrupts. The VFs are also
1295 * mapped into these registers and so clearing the bits can cause
1296 * issues on the VF drivers so we only need to clear what we set
1297 */
1298 if (adapter->msix_entries) {
1299 u32 regval = rd32(E1000_EIAM);
1300 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1301 wr32(E1000_EIMC, adapter->eims_enable_mask);
1302 regval = rd32(E1000_EIAC);
1303 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1304 }
1305
1306 wr32(E1000_IAM, 0);
1307 wr32(E1000_IMC, ~0);
1308 wrfl();
1309 if (adapter->msix_entries) {
1310 int i;
1311 for (i = 0; i < adapter->num_q_vectors; i++)
1312 synchronize_irq(adapter->msix_entries[i].vector);
1313 } else {
1314 synchronize_irq(adapter->pdev->irq);
1315 }
1316 }
1317
1318 /**
1319 * igb_irq_enable - Enable default interrupt generation settings
1320 * @adapter: board private structure
1321 **/
1322 static void igb_irq_enable(struct igb_adapter *adapter)
1323 {
1324 struct e1000_hw *hw = &adapter->hw;
1325
1326 if (adapter->msix_entries) {
1327 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
1328 u32 regval = rd32(E1000_EIAC);
1329 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1330 regval = rd32(E1000_EIAM);
1331 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1332 wr32(E1000_EIMS, adapter->eims_enable_mask);
1333 if (adapter->vfs_allocated_count) {
1334 wr32(E1000_MBVFIMR, 0xFF);
1335 ims |= E1000_IMS_VMMB;
1336 }
1337 if (adapter->hw.mac.type == e1000_82580)
1338 ims |= E1000_IMS_DRSTA;
1339
1340 wr32(E1000_IMS, ims);
1341 } else {
1342 wr32(E1000_IMS, IMS_ENABLE_MASK |
1343 E1000_IMS_DRSTA);
1344 wr32(E1000_IAM, IMS_ENABLE_MASK |
1345 E1000_IMS_DRSTA);
1346 }
1347 }
1348
1349 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1350 {
1351 struct e1000_hw *hw = &adapter->hw;
1352 u16 vid = adapter->hw.mng_cookie.vlan_id;
1353 u16 old_vid = adapter->mng_vlan_id;
1354
1355 if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1356 /* add VID to filter table */
1357 igb_vfta_set(hw, vid, true);
1358 adapter->mng_vlan_id = vid;
1359 } else {
1360 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1361 }
1362
1363 if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1364 (vid != old_vid) &&
1365 !vlan_group_get_device(adapter->vlgrp, old_vid)) {
1366 /* remove VID from filter table */
1367 igb_vfta_set(hw, old_vid, false);
1368 }
1369 }
1370
1371 /**
1372 * igb_release_hw_control - release control of the h/w to f/w
1373 * @adapter: address of board private structure
1374 *
1375 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1376 * For ASF and Pass Through versions of f/w this means that the
1377 * driver is no longer loaded.
1378 *
1379 **/
1380 static void igb_release_hw_control(struct igb_adapter *adapter)
1381 {
1382 struct e1000_hw *hw = &adapter->hw;
1383 u32 ctrl_ext;
1384
1385 /* Let firmware take over control of h/w */
1386 ctrl_ext = rd32(E1000_CTRL_EXT);
1387 wr32(E1000_CTRL_EXT,
1388 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1389 }
1390
1391 /**
1392 * igb_get_hw_control - get control of the h/w from f/w
1393 * @adapter: address of board private structure
1394 *
1395 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1396 * For ASF and Pass Through versions of f/w this means that
1397 * the driver is loaded.
1398 *
1399 **/
1400 static void igb_get_hw_control(struct igb_adapter *adapter)
1401 {
1402 struct e1000_hw *hw = &adapter->hw;
1403 u32 ctrl_ext;
1404
1405 /* Let firmware know the driver has taken over */
1406 ctrl_ext = rd32(E1000_CTRL_EXT);
1407 wr32(E1000_CTRL_EXT,
1408 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1409 }
1410
1411 /**
1412 * igb_configure - configure the hardware for RX and TX
1413 * @adapter: private board structure
1414 **/
1415 static void igb_configure(struct igb_adapter *adapter)
1416 {
1417 struct net_device *netdev = adapter->netdev;
1418 int i;
1419
1420 igb_get_hw_control(adapter);
1421 igb_set_rx_mode(netdev);
1422
1423 igb_restore_vlan(adapter);
1424
1425 igb_setup_tctl(adapter);
1426 igb_setup_mrqc(adapter);
1427 igb_setup_rctl(adapter);
1428
1429 igb_configure_tx(adapter);
1430 igb_configure_rx(adapter);
1431
1432 igb_rx_fifo_flush_82575(&adapter->hw);
1433
1434 /* call igb_desc_unused which always leaves
1435 * at least 1 descriptor unused to make sure
1436 * next_to_use != next_to_clean */
1437 for (i = 0; i < adapter->num_rx_queues; i++) {
1438 struct igb_ring *ring = adapter->rx_ring[i];
1439 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1440 }
1441 }
1442
1443 /**
1444 * igb_power_up_link - Power up the phy/serdes link
1445 * @adapter: address of board private structure
1446 **/
1447 void igb_power_up_link(struct igb_adapter *adapter)
1448 {
1449 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1450 igb_power_up_phy_copper(&adapter->hw);
1451 else
1452 igb_power_up_serdes_link_82575(&adapter->hw);
1453 }
1454
1455 /**
1456 * igb_power_down_link - Power down the phy/serdes link
1457 * @adapter: address of board private structure
1458 */
1459 static void igb_power_down_link(struct igb_adapter *adapter)
1460 {
1461 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1462 igb_power_down_phy_copper_82575(&adapter->hw);
1463 else
1464 igb_shutdown_serdes_link_82575(&adapter->hw);
1465 }
1466
1467 /**
1468 * igb_up - Open the interface and prepare it to handle traffic
1469 * @adapter: board private structure
1470 **/
1471 int igb_up(struct igb_adapter *adapter)
1472 {
1473 struct e1000_hw *hw = &adapter->hw;
1474 int i;
1475
1476 /* hardware has been reset, we need to reload some things */
1477 igb_configure(adapter);
1478
1479 clear_bit(__IGB_DOWN, &adapter->state);
1480
1481 for (i = 0; i < adapter->num_q_vectors; i++) {
1482 struct igb_q_vector *q_vector = adapter->q_vector[i];
1483 napi_enable(&q_vector->napi);
1484 }
1485 if (adapter->msix_entries)
1486 igb_configure_msix(adapter);
1487 else
1488 igb_assign_vector(adapter->q_vector[0], 0);
1489
1490 /* Clear any pending interrupts. */
1491 rd32(E1000_ICR);
1492 igb_irq_enable(adapter);
1493
1494 /* notify VFs that reset has been completed */
1495 if (adapter->vfs_allocated_count) {
1496 u32 reg_data = rd32(E1000_CTRL_EXT);
1497 reg_data |= E1000_CTRL_EXT_PFRSTD;
1498 wr32(E1000_CTRL_EXT, reg_data);
1499 }
1500
1501 netif_tx_start_all_queues(adapter->netdev);
1502
1503 /* start the watchdog. */
1504 hw->mac.get_link_status = 1;
1505 schedule_work(&adapter->watchdog_task);
1506
1507 return 0;
1508 }
1509
1510 void igb_down(struct igb_adapter *adapter)
1511 {
1512 struct net_device *netdev = adapter->netdev;
1513 struct e1000_hw *hw = &adapter->hw;
1514 u32 tctl, rctl;
1515 int i;
1516
1517 /* signal that we're down so the interrupt handler does not
1518 * reschedule our watchdog timer */
1519 set_bit(__IGB_DOWN, &adapter->state);
1520
1521 /* disable receives in the hardware */
1522 rctl = rd32(E1000_RCTL);
1523 wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1524 /* flush and sleep below */
1525
1526 netif_tx_stop_all_queues(netdev);
1527
1528 /* disable transmits in the hardware */
1529 tctl = rd32(E1000_TCTL);
1530 tctl &= ~E1000_TCTL_EN;
1531 wr32(E1000_TCTL, tctl);
1532 /* flush both disables and wait for them to finish */
1533 wrfl();
1534 msleep(10);
1535
1536 for (i = 0; i < adapter->num_q_vectors; i++) {
1537 struct igb_q_vector *q_vector = adapter->q_vector[i];
1538 napi_disable(&q_vector->napi);
1539 }
1540
1541 igb_irq_disable(adapter);
1542
1543 del_timer_sync(&adapter->watchdog_timer);
1544 del_timer_sync(&adapter->phy_info_timer);
1545
1546 netif_carrier_off(netdev);
1547
1548 /* record the stats before reset*/
1549 spin_lock(&adapter->stats64_lock);
1550 igb_update_stats(adapter, &adapter->stats64);
1551 spin_unlock(&adapter->stats64_lock);
1552
1553 adapter->link_speed = 0;
1554 adapter->link_duplex = 0;
1555
1556 if (!pci_channel_offline(adapter->pdev))
1557 igb_reset(adapter);
1558 igb_clean_all_tx_rings(adapter);
1559 igb_clean_all_rx_rings(adapter);
1560 #ifdef CONFIG_IGB_DCA
1561
1562 /* since we reset the hardware DCA settings were cleared */
1563 igb_setup_dca(adapter);
1564 #endif
1565 }
1566
1567 void igb_reinit_locked(struct igb_adapter *adapter)
1568 {
1569 WARN_ON(in_interrupt());
1570 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1571 msleep(1);
1572 igb_down(adapter);
1573 igb_up(adapter);
1574 clear_bit(__IGB_RESETTING, &adapter->state);
1575 }
1576
1577 void igb_reset(struct igb_adapter *adapter)
1578 {
1579 struct pci_dev *pdev = adapter->pdev;
1580 struct e1000_hw *hw = &adapter->hw;
1581 struct e1000_mac_info *mac = &hw->mac;
1582 struct e1000_fc_info *fc = &hw->fc;
1583 u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1584 u16 hwm;
1585
1586 /* Repartition Pba for greater than 9k mtu
1587 * To take effect CTRL.RST is required.
1588 */
1589 switch (mac->type) {
1590 case e1000_i350:
1591 case e1000_82580:
1592 pba = rd32(E1000_RXPBS);
1593 pba = igb_rxpbs_adjust_82580(pba);
1594 break;
1595 case e1000_82576:
1596 pba = rd32(E1000_RXPBS);
1597 pba &= E1000_RXPBS_SIZE_MASK_82576;
1598 break;
1599 case e1000_82575:
1600 default:
1601 pba = E1000_PBA_34K;
1602 break;
1603 }
1604
1605 if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1606 (mac->type < e1000_82576)) {
1607 /* adjust PBA for jumbo frames */
1608 wr32(E1000_PBA, pba);
1609
1610 /* To maintain wire speed transmits, the Tx FIFO should be
1611 * large enough to accommodate two full transmit packets,
1612 * rounded up to the next 1KB and expressed in KB. Likewise,
1613 * the Rx FIFO should be large enough to accommodate at least
1614 * one full receive packet and is similarly rounded up and
1615 * expressed in KB. */
1616 pba = rd32(E1000_PBA);
1617 /* upper 16 bits has Tx packet buffer allocation size in KB */
1618 tx_space = pba >> 16;
1619 /* lower 16 bits has Rx packet buffer allocation size in KB */
1620 pba &= 0xffff;
1621 /* the tx fifo also stores 16 bytes of information about the tx
1622 * but don't include ethernet FCS because hardware appends it */
1623 min_tx_space = (adapter->max_frame_size +
1624 sizeof(union e1000_adv_tx_desc) -
1625 ETH_FCS_LEN) * 2;
1626 min_tx_space = ALIGN(min_tx_space, 1024);
1627 min_tx_space >>= 10;
1628 /* software strips receive CRC, so leave room for it */
1629 min_rx_space = adapter->max_frame_size;
1630 min_rx_space = ALIGN(min_rx_space, 1024);
1631 min_rx_space >>= 10;
1632
1633 /* If current Tx allocation is less than the min Tx FIFO size,
1634 * and the min Tx FIFO size is less than the current Rx FIFO
1635 * allocation, take space away from current Rx allocation */
1636 if (tx_space < min_tx_space &&
1637 ((min_tx_space - tx_space) < pba)) {
1638 pba = pba - (min_tx_space - tx_space);
1639
1640 /* if short on rx space, rx wins and must trump tx
1641 * adjustment */
1642 if (pba < min_rx_space)
1643 pba = min_rx_space;
1644 }
1645 wr32(E1000_PBA, pba);
1646 }
1647
1648 /* flow control settings */
1649 /* The high water mark must be low enough to fit one full frame
1650 * (or the size used for early receive) above it in the Rx FIFO.
1651 * Set it to the lower of:
1652 * - 90% of the Rx FIFO size, or
1653 * - the full Rx FIFO size minus one full frame */
1654 hwm = min(((pba << 10) * 9 / 10),
1655 ((pba << 10) - 2 * adapter->max_frame_size));
1656
1657 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
1658 fc->low_water = fc->high_water - 16;
1659 fc->pause_time = 0xFFFF;
1660 fc->send_xon = 1;
1661 fc->current_mode = fc->requested_mode;
1662
1663 /* disable receive for all VFs and wait one second */
1664 if (adapter->vfs_allocated_count) {
1665 int i;
1666 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1667 adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1668
1669 /* ping all the active vfs to let them know we are going down */
1670 igb_ping_all_vfs(adapter);
1671
1672 /* disable transmits and receives */
1673 wr32(E1000_VFRE, 0);
1674 wr32(E1000_VFTE, 0);
1675 }
1676
1677 /* Allow time for pending master requests to run */
1678 hw->mac.ops.reset_hw(hw);
1679 wr32(E1000_WUC, 0);
1680
1681 if (hw->mac.ops.init_hw(hw))
1682 dev_err(&pdev->dev, "Hardware Error\n");
1683 if (hw->mac.type > e1000_82580) {
1684 if (adapter->flags & IGB_FLAG_DMAC) {
1685 u32 reg;
1686
1687 /*
1688 * DMA Coalescing high water mark needs to be higher
1689 * than * the * Rx threshold. The Rx threshold is
1690 * currently * pba - 6, so we * should use a high water
1691 * mark of pba * - 4. */
1692 hwm = (pba - 4) << 10;
1693
1694 reg = (((pba-6) << E1000_DMACR_DMACTHR_SHIFT)
1695 & E1000_DMACR_DMACTHR_MASK);
1696
1697 /* transition to L0x or L1 if available..*/
1698 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
1699
1700 /* watchdog timer= +-1000 usec in 32usec intervals */
1701 reg |= (1000 >> 5);
1702 wr32(E1000_DMACR, reg);
1703
1704 /* no lower threshold to disable coalescing(smart fifb)
1705 * -UTRESH=0*/
1706 wr32(E1000_DMCRTRH, 0);
1707
1708 /* set hwm to PBA - 2 * max frame size */
1709 wr32(E1000_FCRTC, hwm);
1710
1711 /*
1712 * This sets the time to wait before requesting tran-
1713 * sition to * low power state to number of usecs needed
1714 * to receive 1 512 * byte frame at gigabit line rate
1715 */
1716 reg = rd32(E1000_DMCTLX);
1717 reg |= IGB_DMCTLX_DCFLUSH_DIS;
1718
1719 /* Delay 255 usec before entering Lx state. */
1720 reg |= 0xFF;
1721 wr32(E1000_DMCTLX, reg);
1722
1723 /* free space in Tx packet buffer to wake from DMAC */
1724 wr32(E1000_DMCTXTH,
1725 (IGB_MIN_TXPBSIZE -
1726 (IGB_TX_BUF_4096 + adapter->max_frame_size))
1727 >> 6);
1728
1729 /* make low power state decision controlled by DMAC */
1730 reg = rd32(E1000_PCIEMISC);
1731 reg |= E1000_PCIEMISC_LX_DECISION;
1732 wr32(E1000_PCIEMISC, reg);
1733 } /* end if IGB_FLAG_DMAC set */
1734 }
1735 if (hw->mac.type == e1000_82580) {
1736 u32 reg = rd32(E1000_PCIEMISC);
1737 wr32(E1000_PCIEMISC,
1738 reg & ~E1000_PCIEMISC_LX_DECISION);
1739 }
1740 if (!netif_running(adapter->netdev))
1741 igb_power_down_link(adapter);
1742
1743 igb_update_mng_vlan(adapter);
1744
1745 /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1746 wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1747
1748 igb_get_phy_info(hw);
1749 }
1750
1751 static const struct net_device_ops igb_netdev_ops = {
1752 .ndo_open = igb_open,
1753 .ndo_stop = igb_close,
1754 .ndo_start_xmit = igb_xmit_frame_adv,
1755 .ndo_get_stats64 = igb_get_stats64,
1756 .ndo_set_rx_mode = igb_set_rx_mode,
1757 .ndo_set_multicast_list = igb_set_rx_mode,
1758 .ndo_set_mac_address = igb_set_mac,
1759 .ndo_change_mtu = igb_change_mtu,
1760 .ndo_do_ioctl = igb_ioctl,
1761 .ndo_tx_timeout = igb_tx_timeout,
1762 .ndo_validate_addr = eth_validate_addr,
1763 .ndo_vlan_rx_register = igb_vlan_rx_register,
1764 .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid,
1765 .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid,
1766 .ndo_set_vf_mac = igb_ndo_set_vf_mac,
1767 .ndo_set_vf_vlan = igb_ndo_set_vf_vlan,
1768 .ndo_set_vf_tx_rate = igb_ndo_set_vf_bw,
1769 .ndo_get_vf_config = igb_ndo_get_vf_config,
1770 #ifdef CONFIG_NET_POLL_CONTROLLER
1771 .ndo_poll_controller = igb_netpoll,
1772 #endif
1773 };
1774
1775 /**
1776 * igb_probe - Device Initialization Routine
1777 * @pdev: PCI device information struct
1778 * @ent: entry in igb_pci_tbl
1779 *
1780 * Returns 0 on success, negative on failure
1781 *
1782 * igb_probe initializes an adapter identified by a pci_dev structure.
1783 * The OS initialization, configuring of the adapter private structure,
1784 * and a hardware reset occur.
1785 **/
1786 static int __devinit igb_probe(struct pci_dev *pdev,
1787 const struct pci_device_id *ent)
1788 {
1789 struct net_device *netdev;
1790 struct igb_adapter *adapter;
1791 struct e1000_hw *hw;
1792 u16 eeprom_data = 0;
1793 s32 ret_val;
1794 static int global_quad_port_a; /* global quad port a indication */
1795 const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1796 unsigned long mmio_start, mmio_len;
1797 int err, pci_using_dac;
1798 u16 eeprom_apme_mask = IGB_EEPROM_APME;
1799 u8 part_str[E1000_PBANUM_LENGTH];
1800
1801 /* Catch broken hardware that put the wrong VF device ID in
1802 * the PCIe SR-IOV capability.
1803 */
1804 if (pdev->is_virtfn) {
1805 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1806 pci_name(pdev), pdev->vendor, pdev->device);
1807 return -EINVAL;
1808 }
1809
1810 err = pci_enable_device_mem(pdev);
1811 if (err)
1812 return err;
1813
1814 pci_using_dac = 0;
1815 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1816 if (!err) {
1817 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1818 if (!err)
1819 pci_using_dac = 1;
1820 } else {
1821 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1822 if (err) {
1823 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1824 if (err) {
1825 dev_err(&pdev->dev, "No usable DMA "
1826 "configuration, aborting\n");
1827 goto err_dma;
1828 }
1829 }
1830 }
1831
1832 err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1833 IORESOURCE_MEM),
1834 igb_driver_name);
1835 if (err)
1836 goto err_pci_reg;
1837
1838 pci_enable_pcie_error_reporting(pdev);
1839
1840 pci_set_master(pdev);
1841 pci_save_state(pdev);
1842
1843 err = -ENOMEM;
1844 netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1845 IGB_ABS_MAX_TX_QUEUES);
1846 if (!netdev)
1847 goto err_alloc_etherdev;
1848
1849 SET_NETDEV_DEV(netdev, &pdev->dev);
1850
1851 pci_set_drvdata(pdev, netdev);
1852 adapter = netdev_priv(netdev);
1853 adapter->netdev = netdev;
1854 adapter->pdev = pdev;
1855 hw = &adapter->hw;
1856 hw->back = adapter;
1857 adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1858
1859 mmio_start = pci_resource_start(pdev, 0);
1860 mmio_len = pci_resource_len(pdev, 0);
1861
1862 err = -EIO;
1863 hw->hw_addr = ioremap(mmio_start, mmio_len);
1864 if (!hw->hw_addr)
1865 goto err_ioremap;
1866
1867 netdev->netdev_ops = &igb_netdev_ops;
1868 igb_set_ethtool_ops(netdev);
1869 netdev->watchdog_timeo = 5 * HZ;
1870
1871 strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1872
1873 netdev->mem_start = mmio_start;
1874 netdev->mem_end = mmio_start + mmio_len;
1875
1876 /* PCI config space info */
1877 hw->vendor_id = pdev->vendor;
1878 hw->device_id = pdev->device;
1879 hw->revision_id = pdev->revision;
1880 hw->subsystem_vendor_id = pdev->subsystem_vendor;
1881 hw->subsystem_device_id = pdev->subsystem_device;
1882
1883 /* Copy the default MAC, PHY and NVM function pointers */
1884 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1885 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1886 memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1887 /* Initialize skew-specific constants */
1888 err = ei->get_invariants(hw);
1889 if (err)
1890 goto err_sw_init;
1891
1892 /* setup the private structure */
1893 err = igb_sw_init(adapter);
1894 if (err)
1895 goto err_sw_init;
1896
1897 igb_get_bus_info_pcie(hw);
1898
1899 hw->phy.autoneg_wait_to_complete = false;
1900
1901 /* Copper options */
1902 if (hw->phy.media_type == e1000_media_type_copper) {
1903 hw->phy.mdix = AUTO_ALL_MODES;
1904 hw->phy.disable_polarity_correction = false;
1905 hw->phy.ms_type = e1000_ms_hw_default;
1906 }
1907
1908 if (igb_check_reset_block(hw))
1909 dev_info(&pdev->dev,
1910 "PHY reset is blocked due to SOL/IDER session.\n");
1911
1912 netdev->features = NETIF_F_SG |
1913 NETIF_F_IP_CSUM |
1914 NETIF_F_HW_VLAN_TX |
1915 NETIF_F_HW_VLAN_RX |
1916 NETIF_F_HW_VLAN_FILTER;
1917
1918 netdev->features |= NETIF_F_IPV6_CSUM;
1919 netdev->features |= NETIF_F_TSO;
1920 netdev->features |= NETIF_F_TSO6;
1921 netdev->features |= NETIF_F_GRO;
1922
1923 netdev->vlan_features |= NETIF_F_TSO;
1924 netdev->vlan_features |= NETIF_F_TSO6;
1925 netdev->vlan_features |= NETIF_F_IP_CSUM;
1926 netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1927 netdev->vlan_features |= NETIF_F_SG;
1928
1929 if (pci_using_dac) {
1930 netdev->features |= NETIF_F_HIGHDMA;
1931 netdev->vlan_features |= NETIF_F_HIGHDMA;
1932 }
1933
1934 if (hw->mac.type >= e1000_82576)
1935 netdev->features |= NETIF_F_SCTP_CSUM;
1936
1937 adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1938
1939 /* before reading the NVM, reset the controller to put the device in a
1940 * known good starting state */
1941 hw->mac.ops.reset_hw(hw);
1942
1943 /* make sure the NVM is good */
1944 if (hw->nvm.ops.validate(hw) < 0) {
1945 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1946 err = -EIO;
1947 goto err_eeprom;
1948 }
1949
1950 /* copy the MAC address out of the NVM */
1951 if (hw->mac.ops.read_mac_addr(hw))
1952 dev_err(&pdev->dev, "NVM Read Error\n");
1953
1954 memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1955 memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1956
1957 if (!is_valid_ether_addr(netdev->perm_addr)) {
1958 dev_err(&pdev->dev, "Invalid MAC Address\n");
1959 err = -EIO;
1960 goto err_eeprom;
1961 }
1962
1963 setup_timer(&adapter->watchdog_timer, igb_watchdog,
1964 (unsigned long) adapter);
1965 setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
1966 (unsigned long) adapter);
1967
1968 INIT_WORK(&adapter->reset_task, igb_reset_task);
1969 INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1970
1971 /* Initialize link properties that are user-changeable */
1972 adapter->fc_autoneg = true;
1973 hw->mac.autoneg = true;
1974 hw->phy.autoneg_advertised = 0x2f;
1975
1976 hw->fc.requested_mode = e1000_fc_default;
1977 hw->fc.current_mode = e1000_fc_default;
1978
1979 igb_validate_mdi_setting(hw);
1980
1981 /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1982 * enable the ACPI Magic Packet filter
1983 */
1984
1985 if (hw->bus.func == 0)
1986 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1987 else if (hw->mac.type == e1000_82580)
1988 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
1989 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
1990 &eeprom_data);
1991 else if (hw->bus.func == 1)
1992 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1993
1994 if (eeprom_data & eeprom_apme_mask)
1995 adapter->eeprom_wol |= E1000_WUFC_MAG;
1996
1997 /* now that we have the eeprom settings, apply the special cases where
1998 * the eeprom may be wrong or the board simply won't support wake on
1999 * lan on a particular port */
2000 switch (pdev->device) {
2001 case E1000_DEV_ID_82575GB_QUAD_COPPER:
2002 adapter->eeprom_wol = 0;
2003 break;
2004 case E1000_DEV_ID_82575EB_FIBER_SERDES:
2005 case E1000_DEV_ID_82576_FIBER:
2006 case E1000_DEV_ID_82576_SERDES:
2007 /* Wake events only supported on port A for dual fiber
2008 * regardless of eeprom setting */
2009 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2010 adapter->eeprom_wol = 0;
2011 break;
2012 case E1000_DEV_ID_82576_QUAD_COPPER:
2013 case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2014 /* if quad port adapter, disable WoL on all but port A */
2015 if (global_quad_port_a != 0)
2016 adapter->eeprom_wol = 0;
2017 else
2018 adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2019 /* Reset for multiple quad port adapters */
2020 if (++global_quad_port_a == 4)
2021 global_quad_port_a = 0;
2022 break;
2023 }
2024
2025 /* initialize the wol settings based on the eeprom settings */
2026 adapter->wol = adapter->eeprom_wol;
2027 device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2028
2029 /* reset the hardware with the new settings */
2030 igb_reset(adapter);
2031
2032 /* let the f/w know that the h/w is now under the control of the
2033 * driver. */
2034 igb_get_hw_control(adapter);
2035
2036 strcpy(netdev->name, "eth%d");
2037 err = register_netdev(netdev);
2038 if (err)
2039 goto err_register;
2040
2041 /* carrier off reporting is important to ethtool even BEFORE open */
2042 netif_carrier_off(netdev);
2043
2044 #ifdef CONFIG_IGB_DCA
2045 if (dca_add_requester(&pdev->dev) == 0) {
2046 adapter->flags |= IGB_FLAG_DCA_ENABLED;
2047 dev_info(&pdev->dev, "DCA enabled\n");
2048 igb_setup_dca(adapter);
2049 }
2050
2051 #endif
2052 /* do hw tstamp init after resetting */
2053 igb_init_hw_timer(adapter);
2054
2055 dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2056 /* print bus type/speed/width info */
2057 dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2058 netdev->name,
2059 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2060 (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2061 "unknown"),
2062 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2063 (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2064 (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2065 "unknown"),
2066 netdev->dev_addr);
2067
2068 ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2069 if (ret_val)
2070 strcpy(part_str, "Unknown");
2071 dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2072 dev_info(&pdev->dev,
2073 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2074 adapter->msix_entries ? "MSI-X" :
2075 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2076 adapter->num_rx_queues, adapter->num_tx_queues);
2077 switch (hw->mac.type) {
2078 case e1000_i350:
2079 igb_set_eee_i350(hw);
2080 break;
2081 default:
2082 break;
2083 }
2084 return 0;
2085
2086 err_register:
2087 igb_release_hw_control(adapter);
2088 err_eeprom:
2089 if (!igb_check_reset_block(hw))
2090 igb_reset_phy(hw);
2091
2092 if (hw->flash_address)
2093 iounmap(hw->flash_address);
2094 err_sw_init:
2095 igb_clear_interrupt_scheme(adapter);
2096 iounmap(hw->hw_addr);
2097 err_ioremap:
2098 free_netdev(netdev);
2099 err_alloc_etherdev:
2100 pci_release_selected_regions(pdev,
2101 pci_select_bars(pdev, IORESOURCE_MEM));
2102 err_pci_reg:
2103 err_dma:
2104 pci_disable_device(pdev);
2105 return err;
2106 }
2107
2108 /**
2109 * igb_remove - Device Removal Routine
2110 * @pdev: PCI device information struct
2111 *
2112 * igb_remove is called by the PCI subsystem to alert the driver
2113 * that it should release a PCI device. The could be caused by a
2114 * Hot-Plug event, or because the driver is going to be removed from
2115 * memory.
2116 **/
2117 static void __devexit igb_remove(struct pci_dev *pdev)
2118 {
2119 struct net_device *netdev = pci_get_drvdata(pdev);
2120 struct igb_adapter *adapter = netdev_priv(netdev);
2121 struct e1000_hw *hw = &adapter->hw;
2122
2123 /*
2124 * The watchdog timer may be rescheduled, so explicitly
2125 * disable watchdog from being rescheduled.
2126 */
2127 set_bit(__IGB_DOWN, &adapter->state);
2128 del_timer_sync(&adapter->watchdog_timer);
2129 del_timer_sync(&adapter->phy_info_timer);
2130
2131 cancel_work_sync(&adapter->reset_task);
2132 cancel_work_sync(&adapter->watchdog_task);
2133
2134 #ifdef CONFIG_IGB_DCA
2135 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2136 dev_info(&pdev->dev, "DCA disabled\n");
2137 dca_remove_requester(&pdev->dev);
2138 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2139 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2140 }
2141 #endif
2142
2143 /* Release control of h/w to f/w. If f/w is AMT enabled, this
2144 * would have already happened in close and is redundant. */
2145 igb_release_hw_control(adapter);
2146
2147 unregister_netdev(netdev);
2148
2149 igb_clear_interrupt_scheme(adapter);
2150
2151 #ifdef CONFIG_PCI_IOV
2152 /* reclaim resources allocated to VFs */
2153 if (adapter->vf_data) {
2154 /* disable iov and allow time for transactions to clear */
2155 pci_disable_sriov(pdev);
2156 msleep(500);
2157
2158 kfree(adapter->vf_data);
2159 adapter->vf_data = NULL;
2160 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2161 msleep(100);
2162 dev_info(&pdev->dev, "IOV Disabled\n");
2163 }
2164 #endif
2165
2166 iounmap(hw->hw_addr);
2167 if (hw->flash_address)
2168 iounmap(hw->flash_address);
2169 pci_release_selected_regions(pdev,
2170 pci_select_bars(pdev, IORESOURCE_MEM));
2171
2172 free_netdev(netdev);
2173
2174 pci_disable_pcie_error_reporting(pdev);
2175
2176 pci_disable_device(pdev);
2177 }
2178
2179 /**
2180 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2181 * @adapter: board private structure to initialize
2182 *
2183 * This function initializes the vf specific data storage and then attempts to
2184 * allocate the VFs. The reason for ordering it this way is because it is much
2185 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2186 * the memory for the VFs.
2187 **/
2188 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2189 {
2190 #ifdef CONFIG_PCI_IOV
2191 struct pci_dev *pdev = adapter->pdev;
2192
2193 if (adapter->vfs_allocated_count) {
2194 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2195 sizeof(struct vf_data_storage),
2196 GFP_KERNEL);
2197 /* if allocation failed then we do not support SR-IOV */
2198 if (!adapter->vf_data) {
2199 adapter->vfs_allocated_count = 0;
2200 dev_err(&pdev->dev, "Unable to allocate memory for VF "
2201 "Data Storage\n");
2202 }
2203 }
2204
2205 if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2206 kfree(adapter->vf_data);
2207 adapter->vf_data = NULL;
2208 #endif /* CONFIG_PCI_IOV */
2209 adapter->vfs_allocated_count = 0;
2210 #ifdef CONFIG_PCI_IOV
2211 } else {
2212 unsigned char mac_addr[ETH_ALEN];
2213 int i;
2214 dev_info(&pdev->dev, "%d vfs allocated\n",
2215 adapter->vfs_allocated_count);
2216 for (i = 0; i < adapter->vfs_allocated_count; i++) {
2217 random_ether_addr(mac_addr);
2218 igb_set_vf_mac(adapter, i, mac_addr);
2219 }
2220 /* DMA Coalescing is not supported in IOV mode. */
2221 if (adapter->flags & IGB_FLAG_DMAC)
2222 adapter->flags &= ~IGB_FLAG_DMAC;
2223 }
2224 #endif /* CONFIG_PCI_IOV */
2225 }
2226
2227
2228 /**
2229 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2230 * @adapter: board private structure to initialize
2231 *
2232 * igb_init_hw_timer initializes the function pointer and values for the hw
2233 * timer found in hardware.
2234 **/
2235 static void igb_init_hw_timer(struct igb_adapter *adapter)
2236 {
2237 struct e1000_hw *hw = &adapter->hw;
2238
2239 switch (hw->mac.type) {
2240 case e1000_i350:
2241 case e1000_82580:
2242 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2243 adapter->cycles.read = igb_read_clock;
2244 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2245 adapter->cycles.mult = 1;
2246 /*
2247 * The 82580 timesync updates the system timer every 8ns by 8ns
2248 * and the value cannot be shifted. Instead we need to shift
2249 * the registers to generate a 64bit timer value. As a result
2250 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2251 * 24 in order to generate a larger value for synchronization.
2252 */
2253 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2254 /* disable system timer temporarily by setting bit 31 */
2255 wr32(E1000_TSAUXC, 0x80000000);
2256 wrfl();
2257
2258 /* Set registers so that rollover occurs soon to test this. */
2259 wr32(E1000_SYSTIMR, 0x00000000);
2260 wr32(E1000_SYSTIML, 0x80000000);
2261 wr32(E1000_SYSTIMH, 0x000000FF);
2262 wrfl();
2263
2264 /* enable system timer by clearing bit 31 */
2265 wr32(E1000_TSAUXC, 0x0);
2266 wrfl();
2267
2268 timecounter_init(&adapter->clock,
2269 &adapter->cycles,
2270 ktime_to_ns(ktime_get_real()));
2271 /*
2272 * Synchronize our NIC clock against system wall clock. NIC
2273 * time stamp reading requires ~3us per sample, each sample
2274 * was pretty stable even under load => only require 10
2275 * samples for each offset comparison.
2276 */
2277 memset(&adapter->compare, 0, sizeof(adapter->compare));
2278 adapter->compare.source = &adapter->clock;
2279 adapter->compare.target = ktime_get_real;
2280 adapter->compare.num_samples = 10;
2281 timecompare_update(&adapter->compare, 0);
2282 break;
2283 case e1000_82576:
2284 /*
2285 * Initialize hardware timer: we keep it running just in case
2286 * that some program needs it later on.
2287 */
2288 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2289 adapter->cycles.read = igb_read_clock;
2290 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2291 adapter->cycles.mult = 1;
2292 /**
2293 * Scale the NIC clock cycle by a large factor so that
2294 * relatively small clock corrections can be added or
2295 * subtracted at each clock tick. The drawbacks of a large
2296 * factor are a) that the clock register overflows more quickly
2297 * (not such a big deal) and b) that the increment per tick has
2298 * to fit into 24 bits. As a result we need to use a shift of
2299 * 19 so we can fit a value of 16 into the TIMINCA register.
2300 */
2301 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2302 wr32(E1000_TIMINCA,
2303 (1 << E1000_TIMINCA_16NS_SHIFT) |
2304 (16 << IGB_82576_TSYNC_SHIFT));
2305
2306 /* Set registers so that rollover occurs soon to test this. */
2307 wr32(E1000_SYSTIML, 0x00000000);
2308 wr32(E1000_SYSTIMH, 0xFF800000);
2309 wrfl();
2310
2311 timecounter_init(&adapter->clock,
2312 &adapter->cycles,
2313 ktime_to_ns(ktime_get_real()));
2314 /*
2315 * Synchronize our NIC clock against system wall clock. NIC
2316 * time stamp reading requires ~3us per sample, each sample
2317 * was pretty stable even under load => only require 10
2318 * samples for each offset comparison.
2319 */
2320 memset(&adapter->compare, 0, sizeof(adapter->compare));
2321 adapter->compare.source = &adapter->clock;
2322 adapter->compare.target = ktime_get_real;
2323 adapter->compare.num_samples = 10;
2324 timecompare_update(&adapter->compare, 0);
2325 break;
2326 case e1000_82575:
2327 /* 82575 does not support timesync */
2328 default:
2329 break;
2330 }
2331
2332 }
2333
2334 /**
2335 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2336 * @adapter: board private structure to initialize
2337 *
2338 * igb_sw_init initializes the Adapter private data structure.
2339 * Fields are initialized based on PCI device information and
2340 * OS network device settings (MTU size).
2341 **/
2342 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2343 {
2344 struct e1000_hw *hw = &adapter->hw;
2345 struct net_device *netdev = adapter->netdev;
2346 struct pci_dev *pdev = adapter->pdev;
2347
2348 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2349
2350 adapter->tx_ring_count = IGB_DEFAULT_TXD;
2351 adapter->rx_ring_count = IGB_DEFAULT_RXD;
2352 adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2353 adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2354
2355 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
2356 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2357
2358 spin_lock_init(&adapter->stats64_lock);
2359 #ifdef CONFIG_PCI_IOV
2360 switch (hw->mac.type) {
2361 case e1000_82576:
2362 case e1000_i350:
2363 if (max_vfs > 7) {
2364 dev_warn(&pdev->dev,
2365 "Maximum of 7 VFs per PF, using max\n");
2366 adapter->vfs_allocated_count = 7;
2367 } else
2368 adapter->vfs_allocated_count = max_vfs;
2369 break;
2370 default:
2371 break;
2372 }
2373 #endif /* CONFIG_PCI_IOV */
2374 adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2375
2376 /*
2377 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2378 * then we should combine the queues into a queue pair in order to
2379 * conserve interrupts due to limited supply
2380 */
2381 if ((adapter->rss_queues > 4) ||
2382 ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2383 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2384
2385 /* This call may decrease the number of queues */
2386 if (igb_init_interrupt_scheme(adapter)) {
2387 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2388 return -ENOMEM;
2389 }
2390
2391 igb_probe_vfs(adapter);
2392
2393 /* Explicitly disable IRQ since the NIC can be in any state. */
2394 igb_irq_disable(adapter);
2395
2396 if (hw->mac.type == e1000_i350)
2397 adapter->flags &= ~IGB_FLAG_DMAC;
2398
2399 set_bit(__IGB_DOWN, &adapter->state);
2400 return 0;
2401 }
2402
2403 /**
2404 * igb_open - Called when a network interface is made active
2405 * @netdev: network interface device structure
2406 *
2407 * Returns 0 on success, negative value on failure
2408 *
2409 * The open entry point is called when a network interface is made
2410 * active by the system (IFF_UP). At this point all resources needed
2411 * for transmit and receive operations are allocated, the interrupt
2412 * handler is registered with the OS, the watchdog timer is started,
2413 * and the stack is notified that the interface is ready.
2414 **/
2415 static int igb_open(struct net_device *netdev)
2416 {
2417 struct igb_adapter *adapter = netdev_priv(netdev);
2418 struct e1000_hw *hw = &adapter->hw;
2419 int err;
2420 int i;
2421
2422 /* disallow open during test */
2423 if (test_bit(__IGB_TESTING, &adapter->state))
2424 return -EBUSY;
2425
2426 netif_carrier_off(netdev);
2427
2428 /* allocate transmit descriptors */
2429 err = igb_setup_all_tx_resources(adapter);
2430 if (err)
2431 goto err_setup_tx;
2432
2433 /* allocate receive descriptors */
2434 err = igb_setup_all_rx_resources(adapter);
2435 if (err)
2436 goto err_setup_rx;
2437
2438 igb_power_up_link(adapter);
2439
2440 /* before we allocate an interrupt, we must be ready to handle it.
2441 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2442 * as soon as we call pci_request_irq, so we have to setup our
2443 * clean_rx handler before we do so. */
2444 igb_configure(adapter);
2445
2446 err = igb_request_irq(adapter);
2447 if (err)
2448 goto err_req_irq;
2449
2450 /* From here on the code is the same as igb_up() */
2451 clear_bit(__IGB_DOWN, &adapter->state);
2452
2453 for (i = 0; i < adapter->num_q_vectors; i++) {
2454 struct igb_q_vector *q_vector = adapter->q_vector[i];
2455 napi_enable(&q_vector->napi);
2456 }
2457
2458 /* Clear any pending interrupts. */
2459 rd32(E1000_ICR);
2460
2461 igb_irq_enable(adapter);
2462
2463 /* notify VFs that reset has been completed */
2464 if (adapter->vfs_allocated_count) {
2465 u32 reg_data = rd32(E1000_CTRL_EXT);
2466 reg_data |= E1000_CTRL_EXT_PFRSTD;
2467 wr32(E1000_CTRL_EXT, reg_data);
2468 }
2469
2470 netif_tx_start_all_queues(netdev);
2471
2472 /* start the watchdog. */
2473 hw->mac.get_link_status = 1;
2474 schedule_work(&adapter->watchdog_task);
2475
2476 return 0;
2477
2478 err_req_irq:
2479 igb_release_hw_control(adapter);
2480 igb_power_down_link(adapter);
2481 igb_free_all_rx_resources(adapter);
2482 err_setup_rx:
2483 igb_free_all_tx_resources(adapter);
2484 err_setup_tx:
2485 igb_reset(adapter);
2486
2487 return err;
2488 }
2489
2490 /**
2491 * igb_close - Disables a network interface
2492 * @netdev: network interface device structure
2493 *
2494 * Returns 0, this is not allowed to fail
2495 *
2496 * The close entry point is called when an interface is de-activated
2497 * by the OS. The hardware is still under the driver's control, but
2498 * needs to be disabled. A global MAC reset is issued to stop the
2499 * hardware, and all transmit and receive resources are freed.
2500 **/
2501 static int igb_close(struct net_device *netdev)
2502 {
2503 struct igb_adapter *adapter = netdev_priv(netdev);
2504
2505 WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2506 igb_down(adapter);
2507
2508 igb_free_irq(adapter);
2509
2510 igb_free_all_tx_resources(adapter);
2511 igb_free_all_rx_resources(adapter);
2512
2513 return 0;
2514 }
2515
2516 /**
2517 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2518 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2519 *
2520 * Return 0 on success, negative on failure
2521 **/
2522 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2523 {
2524 struct device *dev = tx_ring->dev;
2525 int size;
2526
2527 size = sizeof(struct igb_buffer) * tx_ring->count;
2528 tx_ring->buffer_info = vzalloc(size);
2529 if (!tx_ring->buffer_info)
2530 goto err;
2531
2532 /* round up to nearest 4K */
2533 tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2534 tx_ring->size = ALIGN(tx_ring->size, 4096);
2535
2536 tx_ring->desc = dma_alloc_coherent(dev,
2537 tx_ring->size,
2538 &tx_ring->dma,
2539 GFP_KERNEL);
2540
2541 if (!tx_ring->desc)
2542 goto err;
2543
2544 tx_ring->next_to_use = 0;
2545 tx_ring->next_to_clean = 0;
2546 return 0;
2547
2548 err:
2549 vfree(tx_ring->buffer_info);
2550 dev_err(dev,
2551 "Unable to allocate memory for the transmit descriptor ring\n");
2552 return -ENOMEM;
2553 }
2554
2555 /**
2556 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2557 * (Descriptors) for all queues
2558 * @adapter: board private structure
2559 *
2560 * Return 0 on success, negative on failure
2561 **/
2562 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2563 {
2564 struct pci_dev *pdev = adapter->pdev;
2565 int i, err = 0;
2566
2567 for (i = 0; i < adapter->num_tx_queues; i++) {
2568 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2569 if (err) {
2570 dev_err(&pdev->dev,
2571 "Allocation for Tx Queue %u failed\n", i);
2572 for (i--; i >= 0; i--)
2573 igb_free_tx_resources(adapter->tx_ring[i]);
2574 break;
2575 }
2576 }
2577
2578 for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2579 int r_idx = i % adapter->num_tx_queues;
2580 adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2581 }
2582 return err;
2583 }
2584
2585 /**
2586 * igb_setup_tctl - configure the transmit control registers
2587 * @adapter: Board private structure
2588 **/
2589 void igb_setup_tctl(struct igb_adapter *adapter)
2590 {
2591 struct e1000_hw *hw = &adapter->hw;
2592 u32 tctl;
2593
2594 /* disable queue 0 which is enabled by default on 82575 and 82576 */
2595 wr32(E1000_TXDCTL(0), 0);
2596
2597 /* Program the Transmit Control Register */
2598 tctl = rd32(E1000_TCTL);
2599 tctl &= ~E1000_TCTL_CT;
2600 tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2601 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2602
2603 igb_config_collision_dist(hw);
2604
2605 /* Enable transmits */
2606 tctl |= E1000_TCTL_EN;
2607
2608 wr32(E1000_TCTL, tctl);
2609 }
2610
2611 /**
2612 * igb_configure_tx_ring - Configure transmit ring after Reset
2613 * @adapter: board private structure
2614 * @ring: tx ring to configure
2615 *
2616 * Configure a transmit ring after a reset.
2617 **/
2618 void igb_configure_tx_ring(struct igb_adapter *adapter,
2619 struct igb_ring *ring)
2620 {
2621 struct e1000_hw *hw = &adapter->hw;
2622 u32 txdctl;
2623 u64 tdba = ring->dma;
2624 int reg_idx = ring->reg_idx;
2625
2626 /* disable the queue */
2627 txdctl = rd32(E1000_TXDCTL(reg_idx));
2628 wr32(E1000_TXDCTL(reg_idx),
2629 txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2630 wrfl();
2631 mdelay(10);
2632
2633 wr32(E1000_TDLEN(reg_idx),
2634 ring->count * sizeof(union e1000_adv_tx_desc));
2635 wr32(E1000_TDBAL(reg_idx),
2636 tdba & 0x00000000ffffffffULL);
2637 wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2638
2639 ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2640 ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2641 writel(0, ring->head);
2642 writel(0, ring->tail);
2643
2644 txdctl |= IGB_TX_PTHRESH;
2645 txdctl |= IGB_TX_HTHRESH << 8;
2646 txdctl |= IGB_TX_WTHRESH << 16;
2647
2648 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2649 wr32(E1000_TXDCTL(reg_idx), txdctl);
2650 }
2651
2652 /**
2653 * igb_configure_tx - Configure transmit Unit after Reset
2654 * @adapter: board private structure
2655 *
2656 * Configure the Tx unit of the MAC after a reset.
2657 **/
2658 static void igb_configure_tx(struct igb_adapter *adapter)
2659 {
2660 int i;
2661
2662 for (i = 0; i < adapter->num_tx_queues; i++)
2663 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2664 }
2665
2666 /**
2667 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2668 * @rx_ring: rx descriptor ring (for a specific queue) to setup
2669 *
2670 * Returns 0 on success, negative on failure
2671 **/
2672 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2673 {
2674 struct device *dev = rx_ring->dev;
2675 int size, desc_len;
2676
2677 size = sizeof(struct igb_buffer) * rx_ring->count;
2678 rx_ring->buffer_info = vzalloc(size);
2679 if (!rx_ring->buffer_info)
2680 goto err;
2681
2682 desc_len = sizeof(union e1000_adv_rx_desc);
2683
2684 /* Round up to nearest 4K */
2685 rx_ring->size = rx_ring->count * desc_len;
2686 rx_ring->size = ALIGN(rx_ring->size, 4096);
2687
2688 rx_ring->desc = dma_alloc_coherent(dev,
2689 rx_ring->size,
2690 &rx_ring->dma,
2691 GFP_KERNEL);
2692
2693 if (!rx_ring->desc)
2694 goto err;
2695
2696 rx_ring->next_to_clean = 0;
2697 rx_ring->next_to_use = 0;
2698
2699 return 0;
2700
2701 err:
2702 vfree(rx_ring->buffer_info);
2703 rx_ring->buffer_info = NULL;
2704 dev_err(dev, "Unable to allocate memory for the receive descriptor"
2705 " ring\n");
2706 return -ENOMEM;
2707 }
2708
2709 /**
2710 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2711 * (Descriptors) for all queues
2712 * @adapter: board private structure
2713 *
2714 * Return 0 on success, negative on failure
2715 **/
2716 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2717 {
2718 struct pci_dev *pdev = adapter->pdev;
2719 int i, err = 0;
2720
2721 for (i = 0; i < adapter->num_rx_queues; i++) {
2722 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2723 if (err) {
2724 dev_err(&pdev->dev,
2725 "Allocation for Rx Queue %u failed\n", i);
2726 for (i--; i >= 0; i--)
2727 igb_free_rx_resources(adapter->rx_ring[i]);
2728 break;
2729 }
2730 }
2731
2732 return err;
2733 }
2734
2735 /**
2736 * igb_setup_mrqc - configure the multiple receive queue control registers
2737 * @adapter: Board private structure
2738 **/
2739 static void igb_setup_mrqc(struct igb_adapter *adapter)
2740 {
2741 struct e1000_hw *hw = &adapter->hw;
2742 u32 mrqc, rxcsum;
2743 u32 j, num_rx_queues, shift = 0, shift2 = 0;
2744 union e1000_reta {
2745 u32 dword;
2746 u8 bytes[4];
2747 } reta;
2748 static const u8 rsshash[40] = {
2749 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2750 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2751 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2752 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2753
2754 /* Fill out hash function seeds */
2755 for (j = 0; j < 10; j++) {
2756 u32 rsskey = rsshash[(j * 4)];
2757 rsskey |= rsshash[(j * 4) + 1] << 8;
2758 rsskey |= rsshash[(j * 4) + 2] << 16;
2759 rsskey |= rsshash[(j * 4) + 3] << 24;
2760 array_wr32(E1000_RSSRK(0), j, rsskey);
2761 }
2762
2763 num_rx_queues = adapter->rss_queues;
2764
2765 if (adapter->vfs_allocated_count) {
2766 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2767 switch (hw->mac.type) {
2768 case e1000_i350:
2769 case e1000_82580:
2770 num_rx_queues = 1;
2771 shift = 0;
2772 break;
2773 case e1000_82576:
2774 shift = 3;
2775 num_rx_queues = 2;
2776 break;
2777 case e1000_82575:
2778 shift = 2;
2779 shift2 = 6;
2780 default:
2781 break;
2782 }
2783 } else {
2784 if (hw->mac.type == e1000_82575)
2785 shift = 6;
2786 }
2787
2788 for (j = 0; j < (32 * 4); j++) {
2789 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2790 if (shift2)
2791 reta.bytes[j & 3] |= num_rx_queues << shift2;
2792 if ((j & 3) == 3)
2793 wr32(E1000_RETA(j >> 2), reta.dword);
2794 }
2795
2796 /*
2797 * Disable raw packet checksumming so that RSS hash is placed in
2798 * descriptor on writeback. No need to enable TCP/UDP/IP checksum
2799 * offloads as they are enabled by default
2800 */
2801 rxcsum = rd32(E1000_RXCSUM);
2802 rxcsum |= E1000_RXCSUM_PCSD;
2803
2804 if (adapter->hw.mac.type >= e1000_82576)
2805 /* Enable Receive Checksum Offload for SCTP */
2806 rxcsum |= E1000_RXCSUM_CRCOFL;
2807
2808 /* Don't need to set TUOFL or IPOFL, they default to 1 */
2809 wr32(E1000_RXCSUM, rxcsum);
2810
2811 /* If VMDq is enabled then we set the appropriate mode for that, else
2812 * we default to RSS so that an RSS hash is calculated per packet even
2813 * if we are only using one queue */
2814 if (adapter->vfs_allocated_count) {
2815 if (hw->mac.type > e1000_82575) {
2816 /* Set the default pool for the PF's first queue */
2817 u32 vtctl = rd32(E1000_VT_CTL);
2818 vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2819 E1000_VT_CTL_DISABLE_DEF_POOL);
2820 vtctl |= adapter->vfs_allocated_count <<
2821 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2822 wr32(E1000_VT_CTL, vtctl);
2823 }
2824 if (adapter->rss_queues > 1)
2825 mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2826 else
2827 mrqc = E1000_MRQC_ENABLE_VMDQ;
2828 } else {
2829 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2830 }
2831 igb_vmm_control(adapter);
2832
2833 /*
2834 * Generate RSS hash based on TCP port numbers and/or
2835 * IPv4/v6 src and dst addresses since UDP cannot be
2836 * hashed reliably due to IP fragmentation
2837 */
2838 mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2839 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2840 E1000_MRQC_RSS_FIELD_IPV6 |
2841 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2842 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2843
2844 wr32(E1000_MRQC, mrqc);
2845 }
2846
2847 /**
2848 * igb_setup_rctl - configure the receive control registers
2849 * @adapter: Board private structure
2850 **/
2851 void igb_setup_rctl(struct igb_adapter *adapter)
2852 {
2853 struct e1000_hw *hw = &adapter->hw;
2854 u32 rctl;
2855
2856 rctl = rd32(E1000_RCTL);
2857
2858 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2859 rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2860
2861 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2862 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2863
2864 /*
2865 * enable stripping of CRC. It's unlikely this will break BMC
2866 * redirection as it did with e1000. Newer features require
2867 * that the HW strips the CRC.
2868 */
2869 rctl |= E1000_RCTL_SECRC;
2870
2871 /* disable store bad packets and clear size bits. */
2872 rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2873
2874 /* enable LPE to prevent packets larger than max_frame_size */
2875 rctl |= E1000_RCTL_LPE;
2876
2877 /* disable queue 0 to prevent tail write w/o re-config */
2878 wr32(E1000_RXDCTL(0), 0);
2879
2880 /* Attention!!! For SR-IOV PF driver operations you must enable
2881 * queue drop for all VF and PF queues to prevent head of line blocking
2882 * if an un-trusted VF does not provide descriptors to hardware.
2883 */
2884 if (adapter->vfs_allocated_count) {
2885 /* set all queue drop enable bits */
2886 wr32(E1000_QDE, ALL_QUEUES);
2887 }
2888
2889 wr32(E1000_RCTL, rctl);
2890 }
2891
2892 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2893 int vfn)
2894 {
2895 struct e1000_hw *hw = &adapter->hw;
2896 u32 vmolr;
2897
2898 /* if it isn't the PF check to see if VFs are enabled and
2899 * increase the size to support vlan tags */
2900 if (vfn < adapter->vfs_allocated_count &&
2901 adapter->vf_data[vfn].vlans_enabled)
2902 size += VLAN_TAG_SIZE;
2903
2904 vmolr = rd32(E1000_VMOLR(vfn));
2905 vmolr &= ~E1000_VMOLR_RLPML_MASK;
2906 vmolr |= size | E1000_VMOLR_LPE;
2907 wr32(E1000_VMOLR(vfn), vmolr);
2908
2909 return 0;
2910 }
2911
2912 /**
2913 * igb_rlpml_set - set maximum receive packet size
2914 * @adapter: board private structure
2915 *
2916 * Configure maximum receivable packet size.
2917 **/
2918 static void igb_rlpml_set(struct igb_adapter *adapter)
2919 {
2920 u32 max_frame_size = adapter->max_frame_size;
2921 struct e1000_hw *hw = &adapter->hw;
2922 u16 pf_id = adapter->vfs_allocated_count;
2923
2924 if (adapter->vlgrp)
2925 max_frame_size += VLAN_TAG_SIZE;
2926
2927 /* if vfs are enabled we set RLPML to the largest possible request
2928 * size and set the VMOLR RLPML to the size we need */
2929 if (pf_id) {
2930 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2931 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2932 }
2933
2934 wr32(E1000_RLPML, max_frame_size);
2935 }
2936
2937 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2938 int vfn, bool aupe)
2939 {
2940 struct e1000_hw *hw = &adapter->hw;
2941 u32 vmolr;
2942
2943 /*
2944 * This register exists only on 82576 and newer so if we are older then
2945 * we should exit and do nothing
2946 */
2947 if (hw->mac.type < e1000_82576)
2948 return;
2949
2950 vmolr = rd32(E1000_VMOLR(vfn));
2951 vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */
2952 if (aupe)
2953 vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */
2954 else
2955 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
2956
2957 /* clear all bits that might not be set */
2958 vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2959
2960 if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
2961 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2962 /*
2963 * for VMDq only allow the VFs and pool 0 to accept broadcast and
2964 * multicast packets
2965 */
2966 if (vfn <= adapter->vfs_allocated_count)
2967 vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */
2968
2969 wr32(E1000_VMOLR(vfn), vmolr);
2970 }
2971
2972 /**
2973 * igb_configure_rx_ring - Configure a receive ring after Reset
2974 * @adapter: board private structure
2975 * @ring: receive ring to be configured
2976 *
2977 * Configure the Rx unit of the MAC after a reset.
2978 **/
2979 void igb_configure_rx_ring(struct igb_adapter *adapter,
2980 struct igb_ring *ring)
2981 {
2982 struct e1000_hw *hw = &adapter->hw;
2983 u64 rdba = ring->dma;
2984 int reg_idx = ring->reg_idx;
2985 u32 srrctl, rxdctl;
2986
2987 /* disable the queue */
2988 rxdctl = rd32(E1000_RXDCTL(reg_idx));
2989 wr32(E1000_RXDCTL(reg_idx),
2990 rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2991
2992 /* Set DMA base address registers */
2993 wr32(E1000_RDBAL(reg_idx),
2994 rdba & 0x00000000ffffffffULL);
2995 wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2996 wr32(E1000_RDLEN(reg_idx),
2997 ring->count * sizeof(union e1000_adv_rx_desc));
2998
2999 /* initialize head and tail */
3000 ring->head = hw->hw_addr + E1000_RDH(reg_idx);
3001 ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3002 writel(0, ring->head);
3003 writel(0, ring->tail);
3004
3005 /* set descriptor configuration */
3006 if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
3007 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
3008 E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3009 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3010 srrctl |= IGB_RXBUFFER_16384 >>
3011 E1000_SRRCTL_BSIZEPKT_SHIFT;
3012 #else
3013 srrctl |= (PAGE_SIZE / 2) >>
3014 E1000_SRRCTL_BSIZEPKT_SHIFT;
3015 #endif
3016 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3017 } else {
3018 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
3019 E1000_SRRCTL_BSIZEPKT_SHIFT;
3020 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3021 }
3022 if (hw->mac.type == e1000_82580)
3023 srrctl |= E1000_SRRCTL_TIMESTAMP;
3024 /* Only set Drop Enable if we are supporting multiple queues */
3025 if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3026 srrctl |= E1000_SRRCTL_DROP_EN;
3027
3028 wr32(E1000_SRRCTL(reg_idx), srrctl);
3029
3030 /* set filtering for VMDQ pools */
3031 igb_set_vmolr(adapter, reg_idx & 0x7, true);
3032
3033 /* enable receive descriptor fetching */
3034 rxdctl = rd32(E1000_RXDCTL(reg_idx));
3035 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3036 rxdctl &= 0xFFF00000;
3037 rxdctl |= IGB_RX_PTHRESH;
3038 rxdctl |= IGB_RX_HTHRESH << 8;
3039 rxdctl |= IGB_RX_WTHRESH << 16;
3040 wr32(E1000_RXDCTL(reg_idx), rxdctl);
3041 }
3042
3043 /**
3044 * igb_configure_rx - Configure receive Unit after Reset
3045 * @adapter: board private structure
3046 *
3047 * Configure the Rx unit of the MAC after a reset.
3048 **/
3049 static void igb_configure_rx(struct igb_adapter *adapter)
3050 {
3051 int i;
3052
3053 /* set UTA to appropriate mode */
3054 igb_set_uta(adapter);
3055
3056 /* set the correct pool for the PF default MAC address in entry 0 */
3057 igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3058 adapter->vfs_allocated_count);
3059
3060 /* Setup the HW Rx Head and Tail Descriptor Pointers and
3061 * the Base and Length of the Rx Descriptor Ring */
3062 for (i = 0; i < adapter->num_rx_queues; i++)
3063 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3064 }
3065
3066 /**
3067 * igb_free_tx_resources - Free Tx Resources per Queue
3068 * @tx_ring: Tx descriptor ring for a specific queue
3069 *
3070 * Free all transmit software resources
3071 **/
3072 void igb_free_tx_resources(struct igb_ring *tx_ring)
3073 {
3074 igb_clean_tx_ring(tx_ring);
3075
3076 vfree(tx_ring->buffer_info);
3077 tx_ring->buffer_info = NULL;
3078
3079 /* if not set, then don't free */
3080 if (!tx_ring->desc)
3081 return;
3082
3083 dma_free_coherent(tx_ring->dev, tx_ring->size,
3084 tx_ring->desc, tx_ring->dma);
3085
3086 tx_ring->desc = NULL;
3087 }
3088
3089 /**
3090 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3091 * @adapter: board private structure
3092 *
3093 * Free all transmit software resources
3094 **/
3095 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3096 {
3097 int i;
3098
3099 for (i = 0; i < adapter->num_tx_queues; i++)
3100 igb_free_tx_resources(adapter->tx_ring[i]);
3101 }
3102
3103 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
3104 struct igb_buffer *buffer_info)
3105 {
3106 if (buffer_info->dma) {
3107 if (buffer_info->mapped_as_page)
3108 dma_unmap_page(tx_ring->dev,
3109 buffer_info->dma,
3110 buffer_info->length,
3111 DMA_TO_DEVICE);
3112 else
3113 dma_unmap_single(tx_ring->dev,
3114 buffer_info->dma,
3115 buffer_info->length,
3116 DMA_TO_DEVICE);
3117 buffer_info->dma = 0;
3118 }
3119 if (buffer_info->skb) {
3120 dev_kfree_skb_any(buffer_info->skb);
3121 buffer_info->skb = NULL;
3122 }
3123 buffer_info->time_stamp = 0;
3124 buffer_info->length = 0;
3125 buffer_info->next_to_watch = 0;
3126 buffer_info->mapped_as_page = false;
3127 }
3128
3129 /**
3130 * igb_clean_tx_ring - Free Tx Buffers
3131 * @tx_ring: ring to be cleaned
3132 **/
3133 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3134 {
3135 struct igb_buffer *buffer_info;
3136 unsigned long size;
3137 unsigned int i;
3138
3139 if (!tx_ring->buffer_info)
3140 return;
3141 /* Free all the Tx ring sk_buffs */
3142
3143 for (i = 0; i < tx_ring->count; i++) {
3144 buffer_info = &tx_ring->buffer_info[i];
3145 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3146 }
3147
3148 size = sizeof(struct igb_buffer) * tx_ring->count;
3149 memset(tx_ring->buffer_info, 0, size);
3150
3151 /* Zero out the descriptor ring */
3152 memset(tx_ring->desc, 0, tx_ring->size);
3153
3154 tx_ring->next_to_use = 0;
3155 tx_ring->next_to_clean = 0;
3156 }
3157
3158 /**
3159 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3160 * @adapter: board private structure
3161 **/
3162 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3163 {
3164 int i;
3165
3166 for (i = 0; i < adapter->num_tx_queues; i++)
3167 igb_clean_tx_ring(adapter->tx_ring[i]);
3168 }
3169
3170 /**
3171 * igb_free_rx_resources - Free Rx Resources
3172 * @rx_ring: ring to clean the resources from
3173 *
3174 * Free all receive software resources
3175 **/
3176 void igb_free_rx_resources(struct igb_ring *rx_ring)
3177 {
3178 igb_clean_rx_ring(rx_ring);
3179
3180 vfree(rx_ring->buffer_info);
3181 rx_ring->buffer_info = NULL;
3182
3183 /* if not set, then don't free */
3184 if (!rx_ring->desc)
3185 return;
3186
3187 dma_free_coherent(rx_ring->dev, rx_ring->size,
3188 rx_ring->desc, rx_ring->dma);
3189
3190 rx_ring->desc = NULL;
3191 }
3192
3193 /**
3194 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3195 * @adapter: board private structure
3196 *
3197 * Free all receive software resources
3198 **/
3199 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3200 {
3201 int i;
3202
3203 for (i = 0; i < adapter->num_rx_queues; i++)
3204 igb_free_rx_resources(adapter->rx_ring[i]);
3205 }
3206
3207 /**
3208 * igb_clean_rx_ring - Free Rx Buffers per Queue
3209 * @rx_ring: ring to free buffers from
3210 **/
3211 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3212 {
3213 struct igb_buffer *buffer_info;
3214 unsigned long size;
3215 unsigned int i;
3216
3217 if (!rx_ring->buffer_info)
3218 return;
3219
3220 /* Free all the Rx ring sk_buffs */
3221 for (i = 0; i < rx_ring->count; i++) {
3222 buffer_info = &rx_ring->buffer_info[i];
3223 if (buffer_info->dma) {
3224 dma_unmap_single(rx_ring->dev,
3225 buffer_info->dma,
3226 rx_ring->rx_buffer_len,
3227 DMA_FROM_DEVICE);
3228 buffer_info->dma = 0;
3229 }
3230
3231 if (buffer_info->skb) {
3232 dev_kfree_skb(buffer_info->skb);
3233 buffer_info->skb = NULL;
3234 }
3235 if (buffer_info->page_dma) {
3236 dma_unmap_page(rx_ring->dev,
3237 buffer_info->page_dma,
3238 PAGE_SIZE / 2,
3239 DMA_FROM_DEVICE);
3240 buffer_info->page_dma = 0;
3241 }
3242 if (buffer_info->page) {
3243 put_page(buffer_info->page);
3244 buffer_info->page = NULL;
3245 buffer_info->page_offset = 0;
3246 }
3247 }
3248
3249 size = sizeof(struct igb_buffer) * rx_ring->count;
3250 memset(rx_ring->buffer_info, 0, size);
3251
3252 /* Zero out the descriptor ring */
3253 memset(rx_ring->desc, 0, rx_ring->size);
3254
3255 rx_ring->next_to_clean = 0;
3256 rx_ring->next_to_use = 0;
3257 }
3258
3259 /**
3260 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3261 * @adapter: board private structure
3262 **/
3263 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3264 {
3265 int i;
3266
3267 for (i = 0; i < adapter->num_rx_queues; i++)
3268 igb_clean_rx_ring(adapter->rx_ring[i]);
3269 }
3270
3271 /**
3272 * igb_set_mac - Change the Ethernet Address of the NIC
3273 * @netdev: network interface device structure
3274 * @p: pointer to an address structure
3275 *
3276 * Returns 0 on success, negative on failure
3277 **/
3278 static int igb_set_mac(struct net_device *netdev, void *p)
3279 {
3280 struct igb_adapter *adapter = netdev_priv(netdev);
3281 struct e1000_hw *hw = &adapter->hw;
3282 struct sockaddr *addr = p;
3283
3284 if (!is_valid_ether_addr(addr->sa_data))
3285 return -EADDRNOTAVAIL;
3286
3287 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3288 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3289
3290 /* set the correct pool for the new PF MAC address in entry 0 */
3291 igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3292 adapter->vfs_allocated_count);
3293
3294 return 0;
3295 }
3296
3297 /**
3298 * igb_write_mc_addr_list - write multicast addresses to MTA
3299 * @netdev: network interface device structure
3300 *
3301 * Writes multicast address list to the MTA hash table.
3302 * Returns: -ENOMEM on failure
3303 * 0 on no addresses written
3304 * X on writing X addresses to MTA
3305 **/
3306 static int igb_write_mc_addr_list(struct net_device *netdev)
3307 {
3308 struct igb_adapter *adapter = netdev_priv(netdev);
3309 struct e1000_hw *hw = &adapter->hw;
3310 struct netdev_hw_addr *ha;
3311 u8 *mta_list;
3312 int i;
3313
3314 if (netdev_mc_empty(netdev)) {
3315 /* nothing to program, so clear mc list */
3316 igb_update_mc_addr_list(hw, NULL, 0);
3317 igb_restore_vf_multicasts(adapter);
3318 return 0;
3319 }
3320
3321 mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3322 if (!mta_list)
3323 return -ENOMEM;
3324
3325 /* The shared function expects a packed array of only addresses. */
3326 i = 0;
3327 netdev_for_each_mc_addr(ha, netdev)
3328 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3329
3330 igb_update_mc_addr_list(hw, mta_list, i);
3331 kfree(mta_list);
3332
3333 return netdev_mc_count(netdev);
3334 }
3335
3336 /**
3337 * igb_write_uc_addr_list - write unicast addresses to RAR table
3338 * @netdev: network interface device structure
3339 *
3340 * Writes unicast address list to the RAR table.
3341 * Returns: -ENOMEM on failure/insufficient address space
3342 * 0 on no addresses written
3343 * X on writing X addresses to the RAR table
3344 **/
3345 static int igb_write_uc_addr_list(struct net_device *netdev)
3346 {
3347 struct igb_adapter *adapter = netdev_priv(netdev);
3348 struct e1000_hw *hw = &adapter->hw;
3349 unsigned int vfn = adapter->vfs_allocated_count;
3350 unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3351 int count = 0;
3352
3353 /* return ENOMEM indicating insufficient memory for addresses */
3354 if (netdev_uc_count(netdev) > rar_entries)
3355 return -ENOMEM;
3356
3357 if (!netdev_uc_empty(netdev) && rar_entries) {
3358 struct netdev_hw_addr *ha;
3359
3360 netdev_for_each_uc_addr(ha, netdev) {
3361 if (!rar_entries)
3362 break;
3363 igb_rar_set_qsel(adapter, ha->addr,
3364 rar_entries--,
3365 vfn);
3366 count++;
3367 }
3368 }
3369 /* write the addresses in reverse order to avoid write combining */
3370 for (; rar_entries > 0 ; rar_entries--) {
3371 wr32(E1000_RAH(rar_entries), 0);
3372 wr32(E1000_RAL(rar_entries), 0);
3373 }
3374 wrfl();
3375
3376 return count;
3377 }
3378
3379 /**
3380 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3381 * @netdev: network interface device structure
3382 *
3383 * The set_rx_mode entry point is called whenever the unicast or multicast
3384 * address lists or the network interface flags are updated. This routine is
3385 * responsible for configuring the hardware for proper unicast, multicast,
3386 * promiscuous mode, and all-multi behavior.
3387 **/
3388 static void igb_set_rx_mode(struct net_device *netdev)
3389 {
3390 struct igb_adapter *adapter = netdev_priv(netdev);
3391 struct e1000_hw *hw = &adapter->hw;
3392 unsigned int vfn = adapter->vfs_allocated_count;
3393 u32 rctl, vmolr = 0;
3394 int count;
3395
3396 /* Check for Promiscuous and All Multicast modes */
3397 rctl = rd32(E1000_RCTL);
3398
3399 /* clear the effected bits */
3400 rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3401
3402 if (netdev->flags & IFF_PROMISC) {
3403 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3404 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3405 } else {
3406 if (netdev->flags & IFF_ALLMULTI) {
3407 rctl |= E1000_RCTL_MPE;
3408 vmolr |= E1000_VMOLR_MPME;
3409 } else {
3410 /*
3411 * Write addresses to the MTA, if the attempt fails
3412 * then we should just turn on promiscuous mode so
3413 * that we can at least receive multicast traffic
3414 */
3415 count = igb_write_mc_addr_list(netdev);
3416 if (count < 0) {
3417 rctl |= E1000_RCTL_MPE;
3418 vmolr |= E1000_VMOLR_MPME;
3419 } else if (count) {
3420 vmolr |= E1000_VMOLR_ROMPE;
3421 }
3422 }
3423 /*
3424 * Write addresses to available RAR registers, if there is not
3425 * sufficient space to store all the addresses then enable
3426 * unicast promiscuous mode
3427 */
3428 count = igb_write_uc_addr_list(netdev);
3429 if (count < 0) {
3430 rctl |= E1000_RCTL_UPE;
3431 vmolr |= E1000_VMOLR_ROPE;
3432 }
3433 rctl |= E1000_RCTL_VFE;
3434 }
3435 wr32(E1000_RCTL, rctl);
3436
3437 /*
3438 * In order to support SR-IOV and eventually VMDq it is necessary to set
3439 * the VMOLR to enable the appropriate modes. Without this workaround
3440 * we will have issues with VLAN tag stripping not being done for frames
3441 * that are only arriving because we are the default pool
3442 */
3443 if (hw->mac.type < e1000_82576)
3444 return;
3445
3446 vmolr |= rd32(E1000_VMOLR(vfn)) &
3447 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3448 wr32(E1000_VMOLR(vfn), vmolr);
3449 igb_restore_vf_multicasts(adapter);
3450 }
3451
3452 static void igb_check_wvbr(struct igb_adapter *adapter)
3453 {
3454 struct e1000_hw *hw = &adapter->hw;
3455 u32 wvbr = 0;
3456
3457 switch (hw->mac.type) {
3458 case e1000_82576:
3459 case e1000_i350:
3460 if (!(wvbr = rd32(E1000_WVBR)))
3461 return;
3462 break;
3463 default:
3464 break;
3465 }
3466
3467 adapter->wvbr |= wvbr;
3468 }
3469
3470 #define IGB_STAGGERED_QUEUE_OFFSET 8
3471
3472 static void igb_spoof_check(struct igb_adapter *adapter)
3473 {
3474 int j;
3475
3476 if (!adapter->wvbr)
3477 return;
3478
3479 for(j = 0; j < adapter->vfs_allocated_count; j++) {
3480 if (adapter->wvbr & (1 << j) ||
3481 adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3482 dev_warn(&adapter->pdev->dev,
3483 "Spoof event(s) detected on VF %d\n", j);
3484 adapter->wvbr &=
3485 ~((1 << j) |
3486 (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3487 }
3488 }
3489 }
3490
3491 /* Need to wait a few seconds after link up to get diagnostic information from
3492 * the phy */
3493 static void igb_update_phy_info(unsigned long data)
3494 {
3495 struct igb_adapter *adapter = (struct igb_adapter *) data;
3496 igb_get_phy_info(&adapter->hw);
3497 }
3498
3499 /**
3500 * igb_has_link - check shared code for link and determine up/down
3501 * @adapter: pointer to driver private info
3502 **/
3503 bool igb_has_link(struct igb_adapter *adapter)
3504 {
3505 struct e1000_hw *hw = &adapter->hw;
3506 bool link_active = false;
3507 s32 ret_val = 0;
3508
3509 /* get_link_status is set on LSC (link status) interrupt or
3510 * rx sequence error interrupt. get_link_status will stay
3511 * false until the e1000_check_for_link establishes link
3512 * for copper adapters ONLY
3513 */
3514 switch (hw->phy.media_type) {
3515 case e1000_media_type_copper:
3516 if (hw->mac.get_link_status) {
3517 ret_val = hw->mac.ops.check_for_link(hw);
3518 link_active = !hw->mac.get_link_status;
3519 } else {
3520 link_active = true;
3521 }
3522 break;
3523 case e1000_media_type_internal_serdes:
3524 ret_val = hw->mac.ops.check_for_link(hw);
3525 link_active = hw->mac.serdes_has_link;
3526 break;
3527 default:
3528 case e1000_media_type_unknown:
3529 break;
3530 }
3531
3532 return link_active;
3533 }
3534
3535 /**
3536 * igb_watchdog - Timer Call-back
3537 * @data: pointer to adapter cast into an unsigned long
3538 **/
3539 static void igb_watchdog(unsigned long data)
3540 {
3541 struct igb_adapter *adapter = (struct igb_adapter *)data;
3542 /* Do the rest outside of interrupt context */
3543 schedule_work(&adapter->watchdog_task);
3544 }
3545
3546 static void igb_watchdog_task(struct work_struct *work)
3547 {
3548 struct igb_adapter *adapter = container_of(work,
3549 struct igb_adapter,
3550 watchdog_task);
3551 struct e1000_hw *hw = &adapter->hw;
3552 struct net_device *netdev = adapter->netdev;
3553 u32 link, ctrl_ext, thstat;
3554 int i;
3555
3556 link = igb_has_link(adapter);
3557 if (link) {
3558 if (!netif_carrier_ok(netdev)) {
3559 u32 ctrl;
3560 hw->mac.ops.get_speed_and_duplex(hw,
3561 &adapter->link_speed,
3562 &adapter->link_duplex);
3563
3564 ctrl = rd32(E1000_CTRL);
3565 /* Links status message must follow this format */
3566 printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3567 "Flow Control: %s\n",
3568 netdev->name,
3569 adapter->link_speed,
3570 adapter->link_duplex == FULL_DUPLEX ?
3571 "Full Duplex" : "Half Duplex",
3572 ((ctrl & E1000_CTRL_TFCE) &&
3573 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3574 ((ctrl & E1000_CTRL_RFCE) ? "RX" :
3575 ((ctrl & E1000_CTRL_TFCE) ? "TX" : "None")));
3576
3577 /* check for thermal sensor event on i350,
3578 * copper only */
3579 if (hw->mac.type == e1000_i350) {
3580 thstat = rd32(E1000_THSTAT);
3581 ctrl_ext = rd32(E1000_CTRL_EXT);
3582 if ((hw->phy.media_type ==
3583 e1000_media_type_copper) && !(ctrl_ext &
3584 E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3585 if (thstat &
3586 E1000_THSTAT_LINK_THROTTLE) {
3587 printk(KERN_INFO "igb: %s The "
3588 "network adapter link "
3589 "speed was downshifted "
3590 "because it "
3591 "overheated.\n",
3592 netdev->name);
3593 }
3594 }
3595 }
3596 /* adjust timeout factor according to speed/duplex */
3597 adapter->tx_timeout_factor = 1;
3598 switch (adapter->link_speed) {
3599 case SPEED_10:
3600 adapter->tx_timeout_factor = 14;
3601 break;
3602 case SPEED_100:
3603 /* maybe add some timeout factor ? */
3604 break;
3605 }
3606
3607 netif_carrier_on(netdev);
3608
3609 igb_ping_all_vfs(adapter);
3610 igb_check_vf_rate_limit(adapter);
3611
3612 /* link state has changed, schedule phy info update */
3613 if (!test_bit(__IGB_DOWN, &adapter->state))
3614 mod_timer(&adapter->phy_info_timer,
3615 round_jiffies(jiffies + 2 * HZ));
3616 }
3617 } else {
3618 if (netif_carrier_ok(netdev)) {
3619 adapter->link_speed = 0;
3620 adapter->link_duplex = 0;
3621 /* check for thermal sensor event on i350
3622 * copper only*/
3623 if (hw->mac.type == e1000_i350) {
3624 thstat = rd32(E1000_THSTAT);
3625 ctrl_ext = rd32(E1000_CTRL_EXT);
3626 if ((hw->phy.media_type ==
3627 e1000_media_type_copper) && !(ctrl_ext &
3628 E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3629 if (thstat & E1000_THSTAT_PWR_DOWN) {
3630 printk(KERN_ERR "igb: %s The "
3631 "network adapter was stopped "
3632 "because it overheated.\n",
3633 netdev->name);
3634 }
3635 }
3636 }
3637 /* Links status message must follow this format */
3638 printk(KERN_INFO "igb: %s NIC Link is Down\n",
3639 netdev->name);
3640 netif_carrier_off(netdev);
3641
3642 igb_ping_all_vfs(adapter);
3643
3644 /* link state has changed, schedule phy info update */
3645 if (!test_bit(__IGB_DOWN, &adapter->state))
3646 mod_timer(&adapter->phy_info_timer,
3647 round_jiffies(jiffies + 2 * HZ));
3648 }
3649 }
3650
3651 spin_lock(&adapter->stats64_lock);
3652 igb_update_stats(adapter, &adapter->stats64);
3653 spin_unlock(&adapter->stats64_lock);
3654
3655 for (i = 0; i < adapter->num_tx_queues; i++) {
3656 struct igb_ring *tx_ring = adapter->tx_ring[i];
3657 if (!netif_carrier_ok(netdev)) {
3658 /* We've lost link, so the controller stops DMA,
3659 * but we've got queued Tx work that's never going
3660 * to get done, so reset controller to flush Tx.
3661 * (Do the reset outside of interrupt context). */
3662 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3663 adapter->tx_timeout_count++;
3664 schedule_work(&adapter->reset_task);
3665 /* return immediately since reset is imminent */
3666 return;
3667 }
3668 }
3669
3670 /* Force detection of hung controller every watchdog period */
3671 tx_ring->detect_tx_hung = true;
3672 }
3673
3674 /* Cause software interrupt to ensure rx ring is cleaned */
3675 if (adapter->msix_entries) {
3676 u32 eics = 0;
3677 for (i = 0; i < adapter->num_q_vectors; i++) {
3678 struct igb_q_vector *q_vector = adapter->q_vector[i];
3679 eics |= q_vector->eims_value;
3680 }
3681 wr32(E1000_EICS, eics);
3682 } else {
3683 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3684 }
3685
3686 igb_spoof_check(adapter);
3687
3688 /* Reset the timer */
3689 if (!test_bit(__IGB_DOWN, &adapter->state))
3690 mod_timer(&adapter->watchdog_timer,
3691 round_jiffies(jiffies + 2 * HZ));
3692 }
3693
3694 enum latency_range {
3695 lowest_latency = 0,
3696 low_latency = 1,
3697 bulk_latency = 2,
3698 latency_invalid = 255
3699 };
3700
3701 /**
3702 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3703 *
3704 * Stores a new ITR value based on strictly on packet size. This
3705 * algorithm is less sophisticated than that used in igb_update_itr,
3706 * due to the difficulty of synchronizing statistics across multiple
3707 * receive rings. The divisors and thresholds used by this function
3708 * were determined based on theoretical maximum wire speed and testing
3709 * data, in order to minimize response time while increasing bulk
3710 * throughput.
3711 * This functionality is controlled by the InterruptThrottleRate module
3712 * parameter (see igb_param.c)
3713 * NOTE: This function is called only when operating in a multiqueue
3714 * receive environment.
3715 * @q_vector: pointer to q_vector
3716 **/
3717 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3718 {
3719 int new_val = q_vector->itr_val;
3720 int avg_wire_size = 0;
3721 struct igb_adapter *adapter = q_vector->adapter;
3722 struct igb_ring *ring;
3723 unsigned int packets;
3724
3725 /* For non-gigabit speeds, just fix the interrupt rate at 4000
3726 * ints/sec - ITR timer value of 120 ticks.
3727 */
3728 if (adapter->link_speed != SPEED_1000) {
3729 new_val = 976;
3730 goto set_itr_val;
3731 }
3732
3733 ring = q_vector->rx_ring;
3734 if (ring) {
3735 packets = ACCESS_ONCE(ring->total_packets);
3736
3737 if (packets)
3738 avg_wire_size = ring->total_bytes / packets;
3739 }
3740
3741 ring = q_vector->tx_ring;
3742 if (ring) {
3743 packets = ACCESS_ONCE(ring->total_packets);
3744
3745 if (packets)
3746 avg_wire_size = max_t(u32, avg_wire_size,
3747 ring->total_bytes / packets);
3748 }
3749
3750 /* if avg_wire_size isn't set no work was done */
3751 if (!avg_wire_size)
3752 goto clear_counts;
3753
3754 /* Add 24 bytes to size to account for CRC, preamble, and gap */
3755 avg_wire_size += 24;
3756
3757 /* Don't starve jumbo frames */
3758 avg_wire_size = min(avg_wire_size, 3000);
3759
3760 /* Give a little boost to mid-size frames */
3761 if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3762 new_val = avg_wire_size / 3;
3763 else
3764 new_val = avg_wire_size / 2;
3765
3766 /* when in itr mode 3 do not exceed 20K ints/sec */
3767 if (adapter->rx_itr_setting == 3 && new_val < 196)
3768 new_val = 196;
3769
3770 set_itr_val:
3771 if (new_val != q_vector->itr_val) {
3772 q_vector->itr_val = new_val;
3773 q_vector->set_itr = 1;
3774 }
3775 clear_counts:
3776 if (q_vector->rx_ring) {
3777 q_vector->rx_ring->total_bytes = 0;
3778 q_vector->rx_ring->total_packets = 0;
3779 }
3780 if (q_vector->tx_ring) {
3781 q_vector->tx_ring->total_bytes = 0;
3782 q_vector->tx_ring->total_packets = 0;
3783 }
3784 }
3785
3786 /**
3787 * igb_update_itr - update the dynamic ITR value based on statistics
3788 * Stores a new ITR value based on packets and byte
3789 * counts during the last interrupt. The advantage of per interrupt
3790 * computation is faster updates and more accurate ITR for the current
3791 * traffic pattern. Constants in this function were computed
3792 * based on theoretical maximum wire speed and thresholds were set based
3793 * on testing data as well as attempting to minimize response time
3794 * while increasing bulk throughput.
3795 * this functionality is controlled by the InterruptThrottleRate module
3796 * parameter (see igb_param.c)
3797 * NOTE: These calculations are only valid when operating in a single-
3798 * queue environment.
3799 * @adapter: pointer to adapter
3800 * @itr_setting: current q_vector->itr_val
3801 * @packets: the number of packets during this measurement interval
3802 * @bytes: the number of bytes during this measurement interval
3803 **/
3804 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3805 int packets, int bytes)
3806 {
3807 unsigned int retval = itr_setting;
3808
3809 if (packets == 0)
3810 goto update_itr_done;
3811
3812 switch (itr_setting) {
3813 case lowest_latency:
3814 /* handle TSO and jumbo frames */
3815 if (bytes/packets > 8000)
3816 retval = bulk_latency;
3817 else if ((packets < 5) && (bytes > 512))
3818 retval = low_latency;
3819 break;
3820 case low_latency: /* 50 usec aka 20000 ints/s */
3821 if (bytes > 10000) {
3822 /* this if handles the TSO accounting */
3823 if (bytes/packets > 8000) {
3824 retval = bulk_latency;
3825 } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3826 retval = bulk_latency;
3827 } else if ((packets > 35)) {
3828 retval = lowest_latency;
3829 }
3830 } else if (bytes/packets > 2000) {
3831 retval = bulk_latency;
3832 } else if (packets <= 2 && bytes < 512) {
3833 retval = lowest_latency;
3834 }
3835 break;
3836 case bulk_latency: /* 250 usec aka 4000 ints/s */
3837 if (bytes > 25000) {
3838 if (packets > 35)
3839 retval = low_latency;
3840 } else if (bytes < 1500) {
3841 retval = low_latency;
3842 }
3843 break;
3844 }
3845
3846 update_itr_done:
3847 return retval;
3848 }
3849
3850 static void igb_set_itr(struct igb_adapter *adapter)
3851 {
3852 struct igb_q_vector *q_vector = adapter->q_vector[0];
3853 u16 current_itr;
3854 u32 new_itr = q_vector->itr_val;
3855
3856 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3857 if (adapter->link_speed != SPEED_1000) {
3858 current_itr = 0;
3859 new_itr = 4000;
3860 goto set_itr_now;
3861 }
3862
3863 adapter->rx_itr = igb_update_itr(adapter,
3864 adapter->rx_itr,
3865 q_vector->rx_ring->total_packets,
3866 q_vector->rx_ring->total_bytes);
3867
3868 adapter->tx_itr = igb_update_itr(adapter,
3869 adapter->tx_itr,
3870 q_vector->tx_ring->total_packets,
3871 q_vector->tx_ring->total_bytes);
3872 current_itr = max(adapter->rx_itr, adapter->tx_itr);
3873
3874 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3875 if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3876 current_itr = low_latency;
3877
3878 switch (current_itr) {
3879 /* counts and packets in update_itr are dependent on these numbers */
3880 case lowest_latency:
3881 new_itr = 56; /* aka 70,000 ints/sec */
3882 break;
3883 case low_latency:
3884 new_itr = 196; /* aka 20,000 ints/sec */
3885 break;
3886 case bulk_latency:
3887 new_itr = 980; /* aka 4,000 ints/sec */
3888 break;
3889 default:
3890 break;
3891 }
3892
3893 set_itr_now:
3894 q_vector->rx_ring->total_bytes = 0;
3895 q_vector->rx_ring->total_packets = 0;
3896 q_vector->tx_ring->total_bytes = 0;
3897 q_vector->tx_ring->total_packets = 0;
3898
3899 if (new_itr != q_vector->itr_val) {
3900 /* this attempts to bias the interrupt rate towards Bulk
3901 * by adding intermediate steps when interrupt rate is
3902 * increasing */
3903 new_itr = new_itr > q_vector->itr_val ?
3904 max((new_itr * q_vector->itr_val) /
3905 (new_itr + (q_vector->itr_val >> 2)),
3906 new_itr) :
3907 new_itr;
3908 /* Don't write the value here; it resets the adapter's
3909 * internal timer, and causes us to delay far longer than
3910 * we should between interrupts. Instead, we write the ITR
3911 * value at the beginning of the next interrupt so the timing
3912 * ends up being correct.
3913 */
3914 q_vector->itr_val = new_itr;
3915 q_vector->set_itr = 1;
3916 }
3917 }
3918
3919 #define IGB_TX_FLAGS_CSUM 0x00000001
3920 #define IGB_TX_FLAGS_VLAN 0x00000002
3921 #define IGB_TX_FLAGS_TSO 0x00000004
3922 #define IGB_TX_FLAGS_IPV4 0x00000008
3923 #define IGB_TX_FLAGS_TSTAMP 0x00000010
3924 #define IGB_TX_FLAGS_VLAN_MASK 0xffff0000
3925 #define IGB_TX_FLAGS_VLAN_SHIFT 16
3926
3927 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3928 struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3929 {
3930 struct e1000_adv_tx_context_desc *context_desc;
3931 unsigned int i;
3932 int err;
3933 struct igb_buffer *buffer_info;
3934 u32 info = 0, tu_cmd = 0;
3935 u32 mss_l4len_idx;
3936 u8 l4len;
3937
3938 if (skb_header_cloned(skb)) {
3939 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3940 if (err)
3941 return err;
3942 }
3943
3944 l4len = tcp_hdrlen(skb);
3945 *hdr_len += l4len;
3946
3947 if (skb->protocol == htons(ETH_P_IP)) {
3948 struct iphdr *iph = ip_hdr(skb);
3949 iph->tot_len = 0;
3950 iph->check = 0;
3951 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3952 iph->daddr, 0,
3953 IPPROTO_TCP,
3954 0);
3955 } else if (skb_is_gso_v6(skb)) {
3956 ipv6_hdr(skb)->payload_len = 0;
3957 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3958 &ipv6_hdr(skb)->daddr,
3959 0, IPPROTO_TCP, 0);
3960 }
3961
3962 i = tx_ring->next_to_use;
3963
3964 buffer_info = &tx_ring->buffer_info[i];
3965 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3966 /* VLAN MACLEN IPLEN */
3967 if (tx_flags & IGB_TX_FLAGS_VLAN)
3968 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3969 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3970 *hdr_len += skb_network_offset(skb);
3971 info |= skb_network_header_len(skb);
3972 *hdr_len += skb_network_header_len(skb);
3973 context_desc->vlan_macip_lens = cpu_to_le32(info);
3974
3975 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3976 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3977
3978 if (skb->protocol == htons(ETH_P_IP))
3979 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3980 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3981
3982 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3983
3984 /* MSS L4LEN IDX */
3985 mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3986 mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3987
3988 /* For 82575, context index must be unique per ring. */
3989 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3990 mss_l4len_idx |= tx_ring->reg_idx << 4;
3991
3992 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3993 context_desc->seqnum_seed = 0;
3994
3995 buffer_info->time_stamp = jiffies;
3996 buffer_info->next_to_watch = i;
3997 buffer_info->dma = 0;
3998 i++;
3999 if (i == tx_ring->count)
4000 i = 0;
4001
4002 tx_ring->next_to_use = i;
4003
4004 return true;
4005 }
4006
4007 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
4008 struct sk_buff *skb, u32 tx_flags)
4009 {
4010 struct e1000_adv_tx_context_desc *context_desc;
4011 struct device *dev = tx_ring->dev;
4012 struct igb_buffer *buffer_info;
4013 u32 info = 0, tu_cmd = 0;
4014 unsigned int i;
4015
4016 if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
4017 (tx_flags & IGB_TX_FLAGS_VLAN)) {
4018 i = tx_ring->next_to_use;
4019 buffer_info = &tx_ring->buffer_info[i];
4020 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
4021
4022 if (tx_flags & IGB_TX_FLAGS_VLAN)
4023 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
4024
4025 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
4026 if (skb->ip_summed == CHECKSUM_PARTIAL)
4027 info |= skb_network_header_len(skb);
4028
4029 context_desc->vlan_macip_lens = cpu_to_le32(info);
4030
4031 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
4032
4033 if (skb->ip_summed == CHECKSUM_PARTIAL) {
4034 __be16 protocol;
4035
4036 if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
4037 const struct vlan_ethhdr *vhdr =
4038 (const struct vlan_ethhdr*)skb->data;
4039
4040 protocol = vhdr->h_vlan_encapsulated_proto;
4041 } else {
4042 protocol = skb->protocol;
4043 }
4044
4045 switch (protocol) {
4046 case cpu_to_be16(ETH_P_IP):
4047 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
4048 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
4049 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4050 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
4051 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4052 break;
4053 case cpu_to_be16(ETH_P_IPV6):
4054 /* XXX what about other V6 headers?? */
4055 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
4056 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4057 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
4058 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4059 break;
4060 default:
4061 if (unlikely(net_ratelimit()))
4062 dev_warn(dev,
4063 "partial checksum but proto=%x!\n",
4064 skb->protocol);
4065 break;
4066 }
4067 }
4068
4069 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
4070 context_desc->seqnum_seed = 0;
4071 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
4072 context_desc->mss_l4len_idx =
4073 cpu_to_le32(tx_ring->reg_idx << 4);
4074
4075 buffer_info->time_stamp = jiffies;
4076 buffer_info->next_to_watch = i;
4077 buffer_info->dma = 0;
4078
4079 i++;
4080 if (i == tx_ring->count)
4081 i = 0;
4082 tx_ring->next_to_use = i;
4083
4084 return true;
4085 }
4086 return false;
4087 }
4088
4089 #define IGB_MAX_TXD_PWR 16
4090 #define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR)
4091
4092 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
4093 unsigned int first)
4094 {
4095 struct igb_buffer *buffer_info;
4096 struct device *dev = tx_ring->dev;
4097 unsigned int hlen = skb_headlen(skb);
4098 unsigned int count = 0, i;
4099 unsigned int f;
4100 u16 gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
4101
4102 i = tx_ring->next_to_use;
4103
4104 buffer_info = &tx_ring->buffer_info[i];
4105 BUG_ON(hlen >= IGB_MAX_DATA_PER_TXD);
4106 buffer_info->length = hlen;
4107 /* set time_stamp *before* dma to help avoid a possible race */
4108 buffer_info->time_stamp = jiffies;
4109 buffer_info->next_to_watch = i;
4110 buffer_info->dma = dma_map_single(dev, skb->data, hlen,
4111 DMA_TO_DEVICE);
4112 if (dma_mapping_error(dev, buffer_info->dma))
4113 goto dma_error;
4114
4115 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
4116 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[f];
4117 unsigned int len = frag->size;
4118
4119 count++;
4120 i++;
4121 if (i == tx_ring->count)
4122 i = 0;
4123
4124 buffer_info = &tx_ring->buffer_info[i];
4125 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
4126 buffer_info->length = len;
4127 buffer_info->time_stamp = jiffies;
4128 buffer_info->next_to_watch = i;
4129 buffer_info->mapped_as_page = true;
4130 buffer_info->dma = dma_map_page(dev,
4131 frag->page,
4132 frag->page_offset,
4133 len,
4134 DMA_TO_DEVICE);
4135 if (dma_mapping_error(dev, buffer_info->dma))
4136 goto dma_error;
4137
4138 }
4139
4140 tx_ring->buffer_info[i].skb = skb;
4141 tx_ring->buffer_info[i].tx_flags = skb_shinfo(skb)->tx_flags;
4142 /* multiply data chunks by size of headers */
4143 tx_ring->buffer_info[i].bytecount = ((gso_segs - 1) * hlen) + skb->len;
4144 tx_ring->buffer_info[i].gso_segs = gso_segs;
4145 tx_ring->buffer_info[first].next_to_watch = i;
4146
4147 return ++count;
4148
4149 dma_error:
4150 dev_err(dev, "TX DMA map failed\n");
4151
4152 /* clear timestamp and dma mappings for failed buffer_info mapping */
4153 buffer_info->dma = 0;
4154 buffer_info->time_stamp = 0;
4155 buffer_info->length = 0;
4156 buffer_info->next_to_watch = 0;
4157 buffer_info->mapped_as_page = false;
4158
4159 /* clear timestamp and dma mappings for remaining portion of packet */
4160 while (count--) {
4161 if (i == 0)
4162 i = tx_ring->count;
4163 i--;
4164 buffer_info = &tx_ring->buffer_info[i];
4165 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4166 }
4167
4168 return 0;
4169 }
4170
4171 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
4172 u32 tx_flags, int count, u32 paylen,
4173 u8 hdr_len)
4174 {
4175 union e1000_adv_tx_desc *tx_desc;
4176 struct igb_buffer *buffer_info;
4177 u32 olinfo_status = 0, cmd_type_len;
4178 unsigned int i = tx_ring->next_to_use;
4179
4180 cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
4181 E1000_ADVTXD_DCMD_DEXT);
4182
4183 if (tx_flags & IGB_TX_FLAGS_VLAN)
4184 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
4185
4186 if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4187 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
4188
4189 if (tx_flags & IGB_TX_FLAGS_TSO) {
4190 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
4191
4192 /* insert tcp checksum */
4193 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4194
4195 /* insert ip checksum */
4196 if (tx_flags & IGB_TX_FLAGS_IPV4)
4197 olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4198
4199 } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
4200 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4201 }
4202
4203 if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
4204 (tx_flags & (IGB_TX_FLAGS_CSUM |
4205 IGB_TX_FLAGS_TSO |
4206 IGB_TX_FLAGS_VLAN)))
4207 olinfo_status |= tx_ring->reg_idx << 4;
4208
4209 olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
4210
4211 do {
4212 buffer_info = &tx_ring->buffer_info[i];
4213 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4214 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
4215 tx_desc->read.cmd_type_len =
4216 cpu_to_le32(cmd_type_len | buffer_info->length);
4217 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4218 count--;
4219 i++;
4220 if (i == tx_ring->count)
4221 i = 0;
4222 } while (count > 0);
4223
4224 tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
4225 /* Force memory writes to complete before letting h/w
4226 * know there are new descriptors to fetch. (Only
4227 * applicable for weak-ordered memory model archs,
4228 * such as IA-64). */
4229 wmb();
4230
4231 tx_ring->next_to_use = i;
4232 writel(i, tx_ring->tail);
4233 /* we need this if more than one processor can write to our tail
4234 * at a time, it syncronizes IO on IA64/Altix systems */
4235 mmiowb();
4236 }
4237
4238 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4239 {
4240 struct net_device *netdev = tx_ring->netdev;
4241
4242 netif_stop_subqueue(netdev, tx_ring->queue_index);
4243
4244 /* Herbert's original patch had:
4245 * smp_mb__after_netif_stop_queue();
4246 * but since that doesn't exist yet, just open code it. */
4247 smp_mb();
4248
4249 /* We need to check again in a case another CPU has just
4250 * made room available. */
4251 if (igb_desc_unused(tx_ring) < size)
4252 return -EBUSY;
4253
4254 /* A reprieve! */
4255 netif_wake_subqueue(netdev, tx_ring->queue_index);
4256
4257 u64_stats_update_begin(&tx_ring->tx_syncp2);
4258 tx_ring->tx_stats.restart_queue2++;
4259 u64_stats_update_end(&tx_ring->tx_syncp2);
4260
4261 return 0;
4262 }
4263
4264 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4265 {
4266 if (igb_desc_unused(tx_ring) >= size)
4267 return 0;
4268 return __igb_maybe_stop_tx(tx_ring, size);
4269 }
4270
4271 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
4272 struct igb_ring *tx_ring)
4273 {
4274 int tso = 0, count;
4275 u32 tx_flags = 0;
4276 u16 first;
4277 u8 hdr_len = 0;
4278
4279 /* need: 1 descriptor per page,
4280 * + 2 desc gap to keep tail from touching head,
4281 * + 1 desc for skb->data,
4282 * + 1 desc for context descriptor,
4283 * otherwise try next time */
4284 if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4285 /* this is a hard error */
4286 return NETDEV_TX_BUSY;
4287 }
4288
4289 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4290 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4291 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4292 }
4293
4294 if (vlan_tx_tag_present(skb)) {
4295 tx_flags |= IGB_TX_FLAGS_VLAN;
4296 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4297 }
4298
4299 if (skb->protocol == htons(ETH_P_IP))
4300 tx_flags |= IGB_TX_FLAGS_IPV4;
4301
4302 first = tx_ring->next_to_use;
4303 if (skb_is_gso(skb)) {
4304 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
4305
4306 if (tso < 0) {
4307 dev_kfree_skb_any(skb);
4308 return NETDEV_TX_OK;
4309 }
4310 }
4311
4312 if (tso)
4313 tx_flags |= IGB_TX_FLAGS_TSO;
4314 else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
4315 (skb->ip_summed == CHECKSUM_PARTIAL))
4316 tx_flags |= IGB_TX_FLAGS_CSUM;
4317
4318 /*
4319 * count reflects descriptors mapped, if 0 or less then mapping error
4320 * has occurred and we need to rewind the descriptor queue
4321 */
4322 count = igb_tx_map_adv(tx_ring, skb, first);
4323 if (!count) {
4324 dev_kfree_skb_any(skb);
4325 tx_ring->buffer_info[first].time_stamp = 0;
4326 tx_ring->next_to_use = first;
4327 return NETDEV_TX_OK;
4328 }
4329
4330 igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
4331
4332 /* Make sure there is space in the ring for the next send. */
4333 igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4334
4335 return NETDEV_TX_OK;
4336 }
4337
4338 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
4339 struct net_device *netdev)
4340 {
4341 struct igb_adapter *adapter = netdev_priv(netdev);
4342 struct igb_ring *tx_ring;
4343 int r_idx = 0;
4344
4345 if (test_bit(__IGB_DOWN, &adapter->state)) {
4346 dev_kfree_skb_any(skb);
4347 return NETDEV_TX_OK;
4348 }
4349
4350 if (skb->len <= 0) {
4351 dev_kfree_skb_any(skb);
4352 return NETDEV_TX_OK;
4353 }
4354
4355 r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
4356 tx_ring = adapter->multi_tx_table[r_idx];
4357
4358 /* This goes back to the question of how to logically map a tx queue
4359 * to a flow. Right now, performance is impacted slightly negatively
4360 * if using multiple tx queues. If the stack breaks away from a
4361 * single qdisc implementation, we can look at this again. */
4362 return igb_xmit_frame_ring_adv(skb, tx_ring);
4363 }
4364
4365 /**
4366 * igb_tx_timeout - Respond to a Tx Hang
4367 * @netdev: network interface device structure
4368 **/
4369 static void igb_tx_timeout(struct net_device *netdev)
4370 {
4371 struct igb_adapter *adapter = netdev_priv(netdev);
4372 struct e1000_hw *hw = &adapter->hw;
4373
4374 /* Do the reset outside of interrupt context */
4375 adapter->tx_timeout_count++;
4376
4377 if (hw->mac.type == e1000_82580)
4378 hw->dev_spec._82575.global_device_reset = true;
4379
4380 schedule_work(&adapter->reset_task);
4381 wr32(E1000_EICS,
4382 (adapter->eims_enable_mask & ~adapter->eims_other));
4383 }
4384
4385 static void igb_reset_task(struct work_struct *work)
4386 {
4387 struct igb_adapter *adapter;
4388 adapter = container_of(work, struct igb_adapter, reset_task);
4389
4390 igb_dump(adapter);
4391 netdev_err(adapter->netdev, "Reset adapter\n");
4392 igb_reinit_locked(adapter);
4393 }
4394
4395 /**
4396 * igb_get_stats64 - Get System Network Statistics
4397 * @netdev: network interface device structure
4398 * @stats: rtnl_link_stats64 pointer
4399 *
4400 **/
4401 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4402 struct rtnl_link_stats64 *stats)
4403 {
4404 struct igb_adapter *adapter = netdev_priv(netdev);
4405
4406 spin_lock(&adapter->stats64_lock);
4407 igb_update_stats(adapter, &adapter->stats64);
4408 memcpy(stats, &adapter->stats64, sizeof(*stats));
4409 spin_unlock(&adapter->stats64_lock);
4410
4411 return stats;
4412 }
4413
4414 /**
4415 * igb_change_mtu - Change the Maximum Transfer Unit
4416 * @netdev: network interface device structure
4417 * @new_mtu: new value for maximum frame size
4418 *
4419 * Returns 0 on success, negative on failure
4420 **/
4421 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4422 {
4423 struct igb_adapter *adapter = netdev_priv(netdev);
4424 struct pci_dev *pdev = adapter->pdev;
4425 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
4426 u32 rx_buffer_len, i;
4427
4428 if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4429 dev_err(&pdev->dev, "Invalid MTU setting\n");
4430 return -EINVAL;
4431 }
4432
4433 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4434 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4435 return -EINVAL;
4436 }
4437
4438 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4439 msleep(1);
4440
4441 /* igb_down has a dependency on max_frame_size */
4442 adapter->max_frame_size = max_frame;
4443
4444 /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
4445 * means we reserve 2 more, this pushes us to allocate from the next
4446 * larger slab size.
4447 * i.e. RXBUFFER_2048 --> size-4096 slab
4448 */
4449
4450 if (adapter->hw.mac.type == e1000_82580)
4451 max_frame += IGB_TS_HDR_LEN;
4452
4453 if (max_frame <= IGB_RXBUFFER_1024)
4454 rx_buffer_len = IGB_RXBUFFER_1024;
4455 else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
4456 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
4457 else
4458 rx_buffer_len = IGB_RXBUFFER_128;
4459
4460 if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN + IGB_TS_HDR_LEN) ||
4461 (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN))
4462 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN;
4463
4464 if ((adapter->hw.mac.type == e1000_82580) &&
4465 (rx_buffer_len == IGB_RXBUFFER_128))
4466 rx_buffer_len += IGB_RXBUFFER_64;
4467
4468 if (netif_running(netdev))
4469 igb_down(adapter);
4470
4471 dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4472 netdev->mtu, new_mtu);
4473 netdev->mtu = new_mtu;
4474
4475 for (i = 0; i < adapter->num_rx_queues; i++)
4476 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
4477
4478 if (netif_running(netdev))
4479 igb_up(adapter);
4480 else
4481 igb_reset(adapter);
4482
4483 clear_bit(__IGB_RESETTING, &adapter->state);
4484
4485 return 0;
4486 }
4487
4488 /**
4489 * igb_update_stats - Update the board statistics counters
4490 * @adapter: board private structure
4491 **/
4492
4493 void igb_update_stats(struct igb_adapter *adapter,
4494 struct rtnl_link_stats64 *net_stats)
4495 {
4496 struct e1000_hw *hw = &adapter->hw;
4497 struct pci_dev *pdev = adapter->pdev;
4498 u32 reg, mpc;
4499 u16 phy_tmp;
4500 int i;
4501 u64 bytes, packets;
4502 unsigned int start;
4503 u64 _bytes, _packets;
4504
4505 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4506
4507 /*
4508 * Prevent stats update while adapter is being reset, or if the pci
4509 * connection is down.
4510 */
4511 if (adapter->link_speed == 0)
4512 return;
4513 if (pci_channel_offline(pdev))
4514 return;
4515
4516 bytes = 0;
4517 packets = 0;
4518 for (i = 0; i < adapter->num_rx_queues; i++) {
4519 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4520 struct igb_ring *ring = adapter->rx_ring[i];
4521
4522 ring->rx_stats.drops += rqdpc_tmp;
4523 net_stats->rx_fifo_errors += rqdpc_tmp;
4524
4525 do {
4526 start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4527 _bytes = ring->rx_stats.bytes;
4528 _packets = ring->rx_stats.packets;
4529 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4530 bytes += _bytes;
4531 packets += _packets;
4532 }
4533
4534 net_stats->rx_bytes = bytes;
4535 net_stats->rx_packets = packets;
4536
4537 bytes = 0;
4538 packets = 0;
4539 for (i = 0; i < adapter->num_tx_queues; i++) {
4540 struct igb_ring *ring = adapter->tx_ring[i];
4541 do {
4542 start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4543 _bytes = ring->tx_stats.bytes;
4544 _packets = ring->tx_stats.packets;
4545 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4546 bytes += _bytes;
4547 packets += _packets;
4548 }
4549 net_stats->tx_bytes = bytes;
4550 net_stats->tx_packets = packets;
4551
4552 /* read stats registers */
4553 adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4554 adapter->stats.gprc += rd32(E1000_GPRC);
4555 adapter->stats.gorc += rd32(E1000_GORCL);
4556 rd32(E1000_GORCH); /* clear GORCL */
4557 adapter->stats.bprc += rd32(E1000_BPRC);
4558 adapter->stats.mprc += rd32(E1000_MPRC);
4559 adapter->stats.roc += rd32(E1000_ROC);
4560
4561 adapter->stats.prc64 += rd32(E1000_PRC64);
4562 adapter->stats.prc127 += rd32(E1000_PRC127);
4563 adapter->stats.prc255 += rd32(E1000_PRC255);
4564 adapter->stats.prc511 += rd32(E1000_PRC511);
4565 adapter->stats.prc1023 += rd32(E1000_PRC1023);
4566 adapter->stats.prc1522 += rd32(E1000_PRC1522);
4567 adapter->stats.symerrs += rd32(E1000_SYMERRS);
4568 adapter->stats.sec += rd32(E1000_SEC);
4569
4570 mpc = rd32(E1000_MPC);
4571 adapter->stats.mpc += mpc;
4572 net_stats->rx_fifo_errors += mpc;
4573 adapter->stats.scc += rd32(E1000_SCC);
4574 adapter->stats.ecol += rd32(E1000_ECOL);
4575 adapter->stats.mcc += rd32(E1000_MCC);
4576 adapter->stats.latecol += rd32(E1000_LATECOL);
4577 adapter->stats.dc += rd32(E1000_DC);
4578 adapter->stats.rlec += rd32(E1000_RLEC);
4579 adapter->stats.xonrxc += rd32(E1000_XONRXC);
4580 adapter->stats.xontxc += rd32(E1000_XONTXC);
4581 adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4582 adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4583 adapter->stats.fcruc += rd32(E1000_FCRUC);
4584 adapter->stats.gptc += rd32(E1000_GPTC);
4585 adapter->stats.gotc += rd32(E1000_GOTCL);
4586 rd32(E1000_GOTCH); /* clear GOTCL */
4587 adapter->stats.rnbc += rd32(E1000_RNBC);
4588 adapter->stats.ruc += rd32(E1000_RUC);
4589 adapter->stats.rfc += rd32(E1000_RFC);
4590 adapter->stats.rjc += rd32(E1000_RJC);
4591 adapter->stats.tor += rd32(E1000_TORH);
4592 adapter->stats.tot += rd32(E1000_TOTH);
4593 adapter->stats.tpr += rd32(E1000_TPR);
4594
4595 adapter->stats.ptc64 += rd32(E1000_PTC64);
4596 adapter->stats.ptc127 += rd32(E1000_PTC127);
4597 adapter->stats.ptc255 += rd32(E1000_PTC255);
4598 adapter->stats.ptc511 += rd32(E1000_PTC511);
4599 adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4600 adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4601
4602 adapter->stats.mptc += rd32(E1000_MPTC);
4603 adapter->stats.bptc += rd32(E1000_BPTC);
4604
4605 adapter->stats.tpt += rd32(E1000_TPT);
4606 adapter->stats.colc += rd32(E1000_COLC);
4607
4608 adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4609 /* read internal phy specific stats */
4610 reg = rd32(E1000_CTRL_EXT);
4611 if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4612 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4613 adapter->stats.tncrs += rd32(E1000_TNCRS);
4614 }
4615
4616 adapter->stats.tsctc += rd32(E1000_TSCTC);
4617 adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4618
4619 adapter->stats.iac += rd32(E1000_IAC);
4620 adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4621 adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4622 adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4623 adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4624 adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4625 adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4626 adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4627 adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4628
4629 /* Fill out the OS statistics structure */
4630 net_stats->multicast = adapter->stats.mprc;
4631 net_stats->collisions = adapter->stats.colc;
4632
4633 /* Rx Errors */
4634
4635 /* RLEC on some newer hardware can be incorrect so build
4636 * our own version based on RUC and ROC */
4637 net_stats->rx_errors = adapter->stats.rxerrc +
4638 adapter->stats.crcerrs + adapter->stats.algnerrc +
4639 adapter->stats.ruc + adapter->stats.roc +
4640 adapter->stats.cexterr;
4641 net_stats->rx_length_errors = adapter->stats.ruc +
4642 adapter->stats.roc;
4643 net_stats->rx_crc_errors = adapter->stats.crcerrs;
4644 net_stats->rx_frame_errors = adapter->stats.algnerrc;
4645 net_stats->rx_missed_errors = adapter->stats.mpc;
4646
4647 /* Tx Errors */
4648 net_stats->tx_errors = adapter->stats.ecol +
4649 adapter->stats.latecol;
4650 net_stats->tx_aborted_errors = adapter->stats.ecol;
4651 net_stats->tx_window_errors = adapter->stats.latecol;
4652 net_stats->tx_carrier_errors = adapter->stats.tncrs;
4653
4654 /* Tx Dropped needs to be maintained elsewhere */
4655
4656 /* Phy Stats */
4657 if (hw->phy.media_type == e1000_media_type_copper) {
4658 if ((adapter->link_speed == SPEED_1000) &&
4659 (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4660 phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4661 adapter->phy_stats.idle_errors += phy_tmp;
4662 }
4663 }
4664
4665 /* Management Stats */
4666 adapter->stats.mgptc += rd32(E1000_MGTPTC);
4667 adapter->stats.mgprc += rd32(E1000_MGTPRC);
4668 adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4669
4670 /* OS2BMC Stats */
4671 reg = rd32(E1000_MANC);
4672 if (reg & E1000_MANC_EN_BMC2OS) {
4673 adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4674 adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4675 adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4676 adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4677 }
4678 }
4679
4680 static irqreturn_t igb_msix_other(int irq, void *data)
4681 {
4682 struct igb_adapter *adapter = data;
4683 struct e1000_hw *hw = &adapter->hw;
4684 u32 icr = rd32(E1000_ICR);
4685 /* reading ICR causes bit 31 of EICR to be cleared */
4686
4687 if (icr & E1000_ICR_DRSTA)
4688 schedule_work(&adapter->reset_task);
4689
4690 if (icr & E1000_ICR_DOUTSYNC) {
4691 /* HW is reporting DMA is out of sync */
4692 adapter->stats.doosync++;
4693 /* The DMA Out of Sync is also indication of a spoof event
4694 * in IOV mode. Check the Wrong VM Behavior register to
4695 * see if it is really a spoof event. */
4696 igb_check_wvbr(adapter);
4697 }
4698
4699 /* Check for a mailbox event */
4700 if (icr & E1000_ICR_VMMB)
4701 igb_msg_task(adapter);
4702
4703 if (icr & E1000_ICR_LSC) {
4704 hw->mac.get_link_status = 1;
4705 /* guard against interrupt when we're going down */
4706 if (!test_bit(__IGB_DOWN, &adapter->state))
4707 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4708 }
4709
4710 if (adapter->vfs_allocated_count)
4711 wr32(E1000_IMS, E1000_IMS_LSC |
4712 E1000_IMS_VMMB |
4713 E1000_IMS_DOUTSYNC);
4714 else
4715 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4716 wr32(E1000_EIMS, adapter->eims_other);
4717
4718 return IRQ_HANDLED;
4719 }
4720
4721 static void igb_write_itr(struct igb_q_vector *q_vector)
4722 {
4723 struct igb_adapter *adapter = q_vector->adapter;
4724 u32 itr_val = q_vector->itr_val & 0x7FFC;
4725
4726 if (!q_vector->set_itr)
4727 return;
4728
4729 if (!itr_val)
4730 itr_val = 0x4;
4731
4732 if (adapter->hw.mac.type == e1000_82575)
4733 itr_val |= itr_val << 16;
4734 else
4735 itr_val |= 0x8000000;
4736
4737 writel(itr_val, q_vector->itr_register);
4738 q_vector->set_itr = 0;
4739 }
4740
4741 static irqreturn_t igb_msix_ring(int irq, void *data)
4742 {
4743 struct igb_q_vector *q_vector = data;
4744
4745 /* Write the ITR value calculated from the previous interrupt. */
4746 igb_write_itr(q_vector);
4747
4748 napi_schedule(&q_vector->napi);
4749
4750 return IRQ_HANDLED;
4751 }
4752
4753 #ifdef CONFIG_IGB_DCA
4754 static void igb_update_dca(struct igb_q_vector *q_vector)
4755 {
4756 struct igb_adapter *adapter = q_vector->adapter;
4757 struct e1000_hw *hw = &adapter->hw;
4758 int cpu = get_cpu();
4759
4760 if (q_vector->cpu == cpu)
4761 goto out_no_update;
4762
4763 if (q_vector->tx_ring) {
4764 int q = q_vector->tx_ring->reg_idx;
4765 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4766 if (hw->mac.type == e1000_82575) {
4767 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4768 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4769 } else {
4770 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4771 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4772 E1000_DCA_TXCTRL_CPUID_SHIFT;
4773 }
4774 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4775 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4776 }
4777 if (q_vector->rx_ring) {
4778 int q = q_vector->rx_ring->reg_idx;
4779 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4780 if (hw->mac.type == e1000_82575) {
4781 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4782 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4783 } else {
4784 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4785 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4786 E1000_DCA_RXCTRL_CPUID_SHIFT;
4787 }
4788 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4789 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4790 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4791 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4792 }
4793 q_vector->cpu = cpu;
4794 out_no_update:
4795 put_cpu();
4796 }
4797
4798 static void igb_setup_dca(struct igb_adapter *adapter)
4799 {
4800 struct e1000_hw *hw = &adapter->hw;
4801 int i;
4802
4803 if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4804 return;
4805
4806 /* Always use CB2 mode, difference is masked in the CB driver. */
4807 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4808
4809 for (i = 0; i < adapter->num_q_vectors; i++) {
4810 adapter->q_vector[i]->cpu = -1;
4811 igb_update_dca(adapter->q_vector[i]);
4812 }
4813 }
4814
4815 static int __igb_notify_dca(struct device *dev, void *data)
4816 {
4817 struct net_device *netdev = dev_get_drvdata(dev);
4818 struct igb_adapter *adapter = netdev_priv(netdev);
4819 struct pci_dev *pdev = adapter->pdev;
4820 struct e1000_hw *hw = &adapter->hw;
4821 unsigned long event = *(unsigned long *)data;
4822
4823 switch (event) {
4824 case DCA_PROVIDER_ADD:
4825 /* if already enabled, don't do it again */
4826 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4827 break;
4828 if (dca_add_requester(dev) == 0) {
4829 adapter->flags |= IGB_FLAG_DCA_ENABLED;
4830 dev_info(&pdev->dev, "DCA enabled\n");
4831 igb_setup_dca(adapter);
4832 break;
4833 }
4834 /* Fall Through since DCA is disabled. */
4835 case DCA_PROVIDER_REMOVE:
4836 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4837 /* without this a class_device is left
4838 * hanging around in the sysfs model */
4839 dca_remove_requester(dev);
4840 dev_info(&pdev->dev, "DCA disabled\n");
4841 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4842 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4843 }
4844 break;
4845 }
4846
4847 return 0;
4848 }
4849
4850 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4851 void *p)
4852 {
4853 int ret_val;
4854
4855 ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4856 __igb_notify_dca);
4857
4858 return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4859 }
4860 #endif /* CONFIG_IGB_DCA */
4861
4862 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4863 {
4864 struct e1000_hw *hw = &adapter->hw;
4865 u32 ping;
4866 int i;
4867
4868 for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4869 ping = E1000_PF_CONTROL_MSG;
4870 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4871 ping |= E1000_VT_MSGTYPE_CTS;
4872 igb_write_mbx(hw, &ping, 1, i);
4873 }
4874 }
4875
4876 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4877 {
4878 struct e1000_hw *hw = &adapter->hw;
4879 u32 vmolr = rd32(E1000_VMOLR(vf));
4880 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4881
4882 vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
4883 IGB_VF_FLAG_MULTI_PROMISC);
4884 vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4885
4886 if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4887 vmolr |= E1000_VMOLR_MPME;
4888 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
4889 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4890 } else {
4891 /*
4892 * if we have hashes and we are clearing a multicast promisc
4893 * flag we need to write the hashes to the MTA as this step
4894 * was previously skipped
4895 */
4896 if (vf_data->num_vf_mc_hashes > 30) {
4897 vmolr |= E1000_VMOLR_MPME;
4898 } else if (vf_data->num_vf_mc_hashes) {
4899 int j;
4900 vmolr |= E1000_VMOLR_ROMPE;
4901 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4902 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4903 }
4904 }
4905
4906 wr32(E1000_VMOLR(vf), vmolr);
4907
4908 /* there are flags left unprocessed, likely not supported */
4909 if (*msgbuf & E1000_VT_MSGINFO_MASK)
4910 return -EINVAL;
4911
4912 return 0;
4913
4914 }
4915
4916 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4917 u32 *msgbuf, u32 vf)
4918 {
4919 int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4920 u16 *hash_list = (u16 *)&msgbuf[1];
4921 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4922 int i;
4923
4924 /* salt away the number of multicast addresses assigned
4925 * to this VF for later use to restore when the PF multi cast
4926 * list changes
4927 */
4928 vf_data->num_vf_mc_hashes = n;
4929
4930 /* only up to 30 hash values supported */
4931 if (n > 30)
4932 n = 30;
4933
4934 /* store the hashes for later use */
4935 for (i = 0; i < n; i++)
4936 vf_data->vf_mc_hashes[i] = hash_list[i];
4937
4938 /* Flush and reset the mta with the new values */
4939 igb_set_rx_mode(adapter->netdev);
4940
4941 return 0;
4942 }
4943
4944 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4945 {
4946 struct e1000_hw *hw = &adapter->hw;
4947 struct vf_data_storage *vf_data;
4948 int i, j;
4949
4950 for (i = 0; i < adapter->vfs_allocated_count; i++) {
4951 u32 vmolr = rd32(E1000_VMOLR(i));
4952 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4953
4954 vf_data = &adapter->vf_data[i];
4955
4956 if ((vf_data->num_vf_mc_hashes > 30) ||
4957 (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4958 vmolr |= E1000_VMOLR_MPME;
4959 } else if (vf_data->num_vf_mc_hashes) {
4960 vmolr |= E1000_VMOLR_ROMPE;
4961 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4962 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4963 }
4964 wr32(E1000_VMOLR(i), vmolr);
4965 }
4966 }
4967
4968 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4969 {
4970 struct e1000_hw *hw = &adapter->hw;
4971 u32 pool_mask, reg, vid;
4972 int i;
4973
4974 pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4975
4976 /* Find the vlan filter for this id */
4977 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4978 reg = rd32(E1000_VLVF(i));
4979
4980 /* remove the vf from the pool */
4981 reg &= ~pool_mask;
4982
4983 /* if pool is empty then remove entry from vfta */
4984 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4985 (reg & E1000_VLVF_VLANID_ENABLE)) {
4986 reg = 0;
4987 vid = reg & E1000_VLVF_VLANID_MASK;
4988 igb_vfta_set(hw, vid, false);
4989 }
4990
4991 wr32(E1000_VLVF(i), reg);
4992 }
4993
4994 adapter->vf_data[vf].vlans_enabled = 0;
4995 }
4996
4997 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4998 {
4999 struct e1000_hw *hw = &adapter->hw;
5000 u32 reg, i;
5001
5002 /* The vlvf table only exists on 82576 hardware and newer */
5003 if (hw->mac.type < e1000_82576)
5004 return -1;
5005
5006 /* we only need to do this if VMDq is enabled */
5007 if (!adapter->vfs_allocated_count)
5008 return -1;
5009
5010 /* Find the vlan filter for this id */
5011 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5012 reg = rd32(E1000_VLVF(i));
5013 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5014 vid == (reg & E1000_VLVF_VLANID_MASK))
5015 break;
5016 }
5017
5018 if (add) {
5019 if (i == E1000_VLVF_ARRAY_SIZE) {
5020 /* Did not find a matching VLAN ID entry that was
5021 * enabled. Search for a free filter entry, i.e.
5022 * one without the enable bit set
5023 */
5024 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5025 reg = rd32(E1000_VLVF(i));
5026 if (!(reg & E1000_VLVF_VLANID_ENABLE))
5027 break;
5028 }
5029 }
5030 if (i < E1000_VLVF_ARRAY_SIZE) {
5031 /* Found an enabled/available entry */
5032 reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5033
5034 /* if !enabled we need to set this up in vfta */
5035 if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5036 /* add VID to filter table */
5037 igb_vfta_set(hw, vid, true);
5038 reg |= E1000_VLVF_VLANID_ENABLE;
5039 }
5040 reg &= ~E1000_VLVF_VLANID_MASK;
5041 reg |= vid;
5042 wr32(E1000_VLVF(i), reg);
5043
5044 /* do not modify RLPML for PF devices */
5045 if (vf >= adapter->vfs_allocated_count)
5046 return 0;
5047
5048 if (!adapter->vf_data[vf].vlans_enabled) {
5049 u32 size;
5050 reg = rd32(E1000_VMOLR(vf));
5051 size = reg & E1000_VMOLR_RLPML_MASK;
5052 size += 4;
5053 reg &= ~E1000_VMOLR_RLPML_MASK;
5054 reg |= size;
5055 wr32(E1000_VMOLR(vf), reg);
5056 }
5057
5058 adapter->vf_data[vf].vlans_enabled++;
5059 return 0;
5060 }
5061 } else {
5062 if (i < E1000_VLVF_ARRAY_SIZE) {
5063 /* remove vf from the pool */
5064 reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5065 /* if pool is empty then remove entry from vfta */
5066 if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5067 reg = 0;
5068 igb_vfta_set(hw, vid, false);
5069 }
5070 wr32(E1000_VLVF(i), reg);
5071
5072 /* do not modify RLPML for PF devices */
5073 if (vf >= adapter->vfs_allocated_count)
5074 return 0;
5075
5076 adapter->vf_data[vf].vlans_enabled--;
5077 if (!adapter->vf_data[vf].vlans_enabled) {
5078 u32 size;
5079 reg = rd32(E1000_VMOLR(vf));
5080 size = reg & E1000_VMOLR_RLPML_MASK;
5081 size -= 4;
5082 reg &= ~E1000_VMOLR_RLPML_MASK;
5083 reg |= size;
5084 wr32(E1000_VMOLR(vf), reg);
5085 }
5086 }
5087 }
5088 return 0;
5089 }
5090
5091 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5092 {
5093 struct e1000_hw *hw = &adapter->hw;
5094
5095 if (vid)
5096 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5097 else
5098 wr32(E1000_VMVIR(vf), 0);
5099 }
5100
5101 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5102 int vf, u16 vlan, u8 qos)
5103 {
5104 int err = 0;
5105 struct igb_adapter *adapter = netdev_priv(netdev);
5106
5107 if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5108 return -EINVAL;
5109 if (vlan || qos) {
5110 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5111 if (err)
5112 goto out;
5113 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5114 igb_set_vmolr(adapter, vf, !vlan);
5115 adapter->vf_data[vf].pf_vlan = vlan;
5116 adapter->vf_data[vf].pf_qos = qos;
5117 dev_info(&adapter->pdev->dev,
5118 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5119 if (test_bit(__IGB_DOWN, &adapter->state)) {
5120 dev_warn(&adapter->pdev->dev,
5121 "The VF VLAN has been set,"
5122 " but the PF device is not up.\n");
5123 dev_warn(&adapter->pdev->dev,
5124 "Bring the PF device up before"
5125 " attempting to use the VF device.\n");
5126 }
5127 } else {
5128 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5129 false, vf);
5130 igb_set_vmvir(adapter, vlan, vf);
5131 igb_set_vmolr(adapter, vf, true);
5132 adapter->vf_data[vf].pf_vlan = 0;
5133 adapter->vf_data[vf].pf_qos = 0;
5134 }
5135 out:
5136 return err;
5137 }
5138
5139 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5140 {
5141 int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5142 int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5143
5144 return igb_vlvf_set(adapter, vid, add, vf);
5145 }
5146
5147 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5148 {
5149 /* clear flags - except flag that indicates PF has set the MAC */
5150 adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5151 adapter->vf_data[vf].last_nack = jiffies;
5152
5153 /* reset offloads to defaults */
5154 igb_set_vmolr(adapter, vf, true);
5155
5156 /* reset vlans for device */
5157 igb_clear_vf_vfta(adapter, vf);
5158 if (adapter->vf_data[vf].pf_vlan)
5159 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5160 adapter->vf_data[vf].pf_vlan,
5161 adapter->vf_data[vf].pf_qos);
5162 else
5163 igb_clear_vf_vfta(adapter, vf);
5164
5165 /* reset multicast table array for vf */
5166 adapter->vf_data[vf].num_vf_mc_hashes = 0;
5167
5168 /* Flush and reset the mta with the new values */
5169 igb_set_rx_mode(adapter->netdev);
5170 }
5171
5172 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5173 {
5174 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5175
5176 /* generate a new mac address as we were hotplug removed/added */
5177 if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5178 random_ether_addr(vf_mac);
5179
5180 /* process remaining reset events */
5181 igb_vf_reset(adapter, vf);
5182 }
5183
5184 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5185 {
5186 struct e1000_hw *hw = &adapter->hw;
5187 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5188 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5189 u32 reg, msgbuf[3];
5190 u8 *addr = (u8 *)(&msgbuf[1]);
5191
5192 /* process all the same items cleared in a function level reset */
5193 igb_vf_reset(adapter, vf);
5194
5195 /* set vf mac address */
5196 igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5197
5198 /* enable transmit and receive for vf */
5199 reg = rd32(E1000_VFTE);
5200 wr32(E1000_VFTE, reg | (1 << vf));
5201 reg = rd32(E1000_VFRE);
5202 wr32(E1000_VFRE, reg | (1 << vf));
5203
5204 adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5205
5206 /* reply to reset with ack and vf mac address */
5207 msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5208 memcpy(addr, vf_mac, 6);
5209 igb_write_mbx(hw, msgbuf, 3, vf);
5210 }
5211
5212 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5213 {
5214 /*
5215 * The VF MAC Address is stored in a packed array of bytes
5216 * starting at the second 32 bit word of the msg array
5217 */
5218 unsigned char *addr = (char *)&msg[1];
5219 int err = -1;
5220
5221 if (is_valid_ether_addr(addr))
5222 err = igb_set_vf_mac(adapter, vf, addr);
5223
5224 return err;
5225 }
5226
5227 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5228 {
5229 struct e1000_hw *hw = &adapter->hw;
5230 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5231 u32 msg = E1000_VT_MSGTYPE_NACK;
5232
5233 /* if device isn't clear to send it shouldn't be reading either */
5234 if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5235 time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5236 igb_write_mbx(hw, &msg, 1, vf);
5237 vf_data->last_nack = jiffies;
5238 }
5239 }
5240
5241 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5242 {
5243 struct pci_dev *pdev = adapter->pdev;
5244 u32 msgbuf[E1000_VFMAILBOX_SIZE];
5245 struct e1000_hw *hw = &adapter->hw;
5246 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5247 s32 retval;
5248
5249 retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5250
5251 if (retval) {
5252 /* if receive failed revoke VF CTS stats and restart init */
5253 dev_err(&pdev->dev, "Error receiving message from VF\n");
5254 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5255 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5256 return;
5257 goto out;
5258 }
5259
5260 /* this is a message we already processed, do nothing */
5261 if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5262 return;
5263
5264 /*
5265 * until the vf completes a reset it should not be
5266 * allowed to start any configuration.
5267 */
5268
5269 if (msgbuf[0] == E1000_VF_RESET) {
5270 igb_vf_reset_msg(adapter, vf);
5271 return;
5272 }
5273
5274 if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5275 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5276 return;
5277 retval = -1;
5278 goto out;
5279 }
5280
5281 switch ((msgbuf[0] & 0xFFFF)) {
5282 case E1000_VF_SET_MAC_ADDR:
5283 retval = -EINVAL;
5284 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5285 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5286 else
5287 dev_warn(&pdev->dev,
5288 "VF %d attempted to override administratively "
5289 "set MAC address\nReload the VF driver to "
5290 "resume operations\n", vf);
5291 break;
5292 case E1000_VF_SET_PROMISC:
5293 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5294 break;
5295 case E1000_VF_SET_MULTICAST:
5296 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5297 break;
5298 case E1000_VF_SET_LPE:
5299 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5300 break;
5301 case E1000_VF_SET_VLAN:
5302 retval = -1;
5303 if (vf_data->pf_vlan)
5304 dev_warn(&pdev->dev,
5305 "VF %d attempted to override administratively "
5306 "set VLAN tag\nReload the VF driver to "
5307 "resume operations\n", vf);
5308 else
5309 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5310 break;
5311 default:
5312 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5313 retval = -1;
5314 break;
5315 }
5316
5317 msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5318 out:
5319 /* notify the VF of the results of what it sent us */
5320 if (retval)
5321 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5322 else
5323 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5324
5325 igb_write_mbx(hw, msgbuf, 1, vf);
5326 }
5327
5328 static void igb_msg_task(struct igb_adapter *adapter)
5329 {
5330 struct e1000_hw *hw = &adapter->hw;
5331 u32 vf;
5332
5333 for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5334 /* process any reset requests */
5335 if (!igb_check_for_rst(hw, vf))
5336 igb_vf_reset_event(adapter, vf);
5337
5338 /* process any messages pending */
5339 if (!igb_check_for_msg(hw, vf))
5340 igb_rcv_msg_from_vf(adapter, vf);
5341
5342 /* process any acks */
5343 if (!igb_check_for_ack(hw, vf))
5344 igb_rcv_ack_from_vf(adapter, vf);
5345 }
5346 }
5347
5348 /**
5349 * igb_set_uta - Set unicast filter table address
5350 * @adapter: board private structure
5351 *
5352 * The unicast table address is a register array of 32-bit registers.
5353 * The table is meant to be used in a way similar to how the MTA is used
5354 * however due to certain limitations in the hardware it is necessary to
5355 * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5356 * enable bit to allow vlan tag stripping when promiscuous mode is enabled
5357 **/
5358 static void igb_set_uta(struct igb_adapter *adapter)
5359 {
5360 struct e1000_hw *hw = &adapter->hw;
5361 int i;
5362
5363 /* The UTA table only exists on 82576 hardware and newer */
5364 if (hw->mac.type < e1000_82576)
5365 return;
5366
5367 /* we only need to do this if VMDq is enabled */
5368 if (!adapter->vfs_allocated_count)
5369 return;
5370
5371 for (i = 0; i < hw->mac.uta_reg_count; i++)
5372 array_wr32(E1000_UTA, i, ~0);
5373 }
5374
5375 /**
5376 * igb_intr_msi - Interrupt Handler
5377 * @irq: interrupt number
5378 * @data: pointer to a network interface device structure
5379 **/
5380 static irqreturn_t igb_intr_msi(int irq, void *data)
5381 {
5382 struct igb_adapter *adapter = data;
5383 struct igb_q_vector *q_vector = adapter->q_vector[0];
5384 struct e1000_hw *hw = &adapter->hw;
5385 /* read ICR disables interrupts using IAM */
5386 u32 icr = rd32(E1000_ICR);
5387
5388 igb_write_itr(q_vector);
5389
5390 if (icr & E1000_ICR_DRSTA)
5391 schedule_work(&adapter->reset_task);
5392
5393 if (icr & E1000_ICR_DOUTSYNC) {
5394 /* HW is reporting DMA is out of sync */
5395 adapter->stats.doosync++;
5396 }
5397
5398 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5399 hw->mac.get_link_status = 1;
5400 if (!test_bit(__IGB_DOWN, &adapter->state))
5401 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5402 }
5403
5404 napi_schedule(&q_vector->napi);
5405
5406 return IRQ_HANDLED;
5407 }
5408
5409 /**
5410 * igb_intr - Legacy Interrupt Handler
5411 * @irq: interrupt number
5412 * @data: pointer to a network interface device structure
5413 **/
5414 static irqreturn_t igb_intr(int irq, void *data)
5415 {
5416 struct igb_adapter *adapter = data;
5417 struct igb_q_vector *q_vector = adapter->q_vector[0];
5418 struct e1000_hw *hw = &adapter->hw;
5419 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No
5420 * need for the IMC write */
5421 u32 icr = rd32(E1000_ICR);
5422 if (!icr)
5423 return IRQ_NONE; /* Not our interrupt */
5424
5425 igb_write_itr(q_vector);
5426
5427 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5428 * not set, then the adapter didn't send an interrupt */
5429 if (!(icr & E1000_ICR_INT_ASSERTED))
5430 return IRQ_NONE;
5431
5432 if (icr & E1000_ICR_DRSTA)
5433 schedule_work(&adapter->reset_task);
5434
5435 if (icr & E1000_ICR_DOUTSYNC) {
5436 /* HW is reporting DMA is out of sync */
5437 adapter->stats.doosync++;
5438 }
5439
5440 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5441 hw->mac.get_link_status = 1;
5442 /* guard against interrupt when we're going down */
5443 if (!test_bit(__IGB_DOWN, &adapter->state))
5444 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5445 }
5446
5447 napi_schedule(&q_vector->napi);
5448
5449 return IRQ_HANDLED;
5450 }
5451
5452 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5453 {
5454 struct igb_adapter *adapter = q_vector->adapter;
5455 struct e1000_hw *hw = &adapter->hw;
5456
5457 if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
5458 (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
5459 if (!adapter->msix_entries)
5460 igb_set_itr(adapter);
5461 else
5462 igb_update_ring_itr(q_vector);
5463 }
5464
5465 if (!test_bit(__IGB_DOWN, &adapter->state)) {
5466 if (adapter->msix_entries)
5467 wr32(E1000_EIMS, q_vector->eims_value);
5468 else
5469 igb_irq_enable(adapter);
5470 }
5471 }
5472
5473 /**
5474 * igb_poll - NAPI Rx polling callback
5475 * @napi: napi polling structure
5476 * @budget: count of how many packets we should handle
5477 **/
5478 static int igb_poll(struct napi_struct *napi, int budget)
5479 {
5480 struct igb_q_vector *q_vector = container_of(napi,
5481 struct igb_q_vector,
5482 napi);
5483 int tx_clean_complete = 1, work_done = 0;
5484
5485 #ifdef CONFIG_IGB_DCA
5486 if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5487 igb_update_dca(q_vector);
5488 #endif
5489 if (q_vector->tx_ring)
5490 tx_clean_complete = igb_clean_tx_irq(q_vector);
5491
5492 if (q_vector->rx_ring)
5493 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
5494
5495 if (!tx_clean_complete)
5496 work_done = budget;
5497
5498 /* If not enough Rx work done, exit the polling mode */
5499 if (work_done < budget) {
5500 napi_complete(napi);
5501 igb_ring_irq_enable(q_vector);
5502 }
5503
5504 return work_done;
5505 }
5506
5507 /**
5508 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5509 * @adapter: board private structure
5510 * @shhwtstamps: timestamp structure to update
5511 * @regval: unsigned 64bit system time value.
5512 *
5513 * We need to convert the system time value stored in the RX/TXSTMP registers
5514 * into a hwtstamp which can be used by the upper level timestamping functions
5515 */
5516 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5517 struct skb_shared_hwtstamps *shhwtstamps,
5518 u64 regval)
5519 {
5520 u64 ns;
5521
5522 /*
5523 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5524 * 24 to match clock shift we setup earlier.
5525 */
5526 if (adapter->hw.mac.type == e1000_82580)
5527 regval <<= IGB_82580_TSYNC_SHIFT;
5528
5529 ns = timecounter_cyc2time(&adapter->clock, regval);
5530 timecompare_update(&adapter->compare, ns);
5531 memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5532 shhwtstamps->hwtstamp = ns_to_ktime(ns);
5533 shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5534 }
5535
5536 /**
5537 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5538 * @q_vector: pointer to q_vector containing needed info
5539 * @buffer: pointer to igb_buffer structure
5540 *
5541 * If we were asked to do hardware stamping and such a time stamp is
5542 * available, then it must have been for this skb here because we only
5543 * allow only one such packet into the queue.
5544 */
5545 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct igb_buffer *buffer_info)
5546 {
5547 struct igb_adapter *adapter = q_vector->adapter;
5548 struct e1000_hw *hw = &adapter->hw;
5549 struct skb_shared_hwtstamps shhwtstamps;
5550 u64 regval;
5551
5552 /* if skb does not support hw timestamp or TX stamp not valid exit */
5553 if (likely(!(buffer_info->tx_flags & SKBTX_HW_TSTAMP)) ||
5554 !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5555 return;
5556
5557 regval = rd32(E1000_TXSTMPL);
5558 regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5559
5560 igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5561 skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5562 }
5563
5564 /**
5565 * igb_clean_tx_irq - Reclaim resources after transmit completes
5566 * @q_vector: pointer to q_vector containing needed info
5567 * returns true if ring is completely cleaned
5568 **/
5569 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5570 {
5571 struct igb_adapter *adapter = q_vector->adapter;
5572 struct igb_ring *tx_ring = q_vector->tx_ring;
5573 struct net_device *netdev = tx_ring->netdev;
5574 struct e1000_hw *hw = &adapter->hw;
5575 struct igb_buffer *buffer_info;
5576 union e1000_adv_tx_desc *tx_desc, *eop_desc;
5577 unsigned int total_bytes = 0, total_packets = 0;
5578 unsigned int i, eop, count = 0;
5579 bool cleaned = false;
5580
5581 i = tx_ring->next_to_clean;
5582 eop = tx_ring->buffer_info[i].next_to_watch;
5583 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5584
5585 while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5586 (count < tx_ring->count)) {
5587 rmb(); /* read buffer_info after eop_desc status */
5588 for (cleaned = false; !cleaned; count++) {
5589 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5590 buffer_info = &tx_ring->buffer_info[i];
5591 cleaned = (i == eop);
5592
5593 if (buffer_info->skb) {
5594 total_bytes += buffer_info->bytecount;
5595 /* gso_segs is currently only valid for tcp */
5596 total_packets += buffer_info->gso_segs;
5597 igb_tx_hwtstamp(q_vector, buffer_info);
5598 }
5599
5600 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5601 tx_desc->wb.status = 0;
5602
5603 i++;
5604 if (i == tx_ring->count)
5605 i = 0;
5606 }
5607 eop = tx_ring->buffer_info[i].next_to_watch;
5608 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5609 }
5610
5611 tx_ring->next_to_clean = i;
5612
5613 if (unlikely(count &&
5614 netif_carrier_ok(netdev) &&
5615 igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5616 /* Make sure that anybody stopping the queue after this
5617 * sees the new next_to_clean.
5618 */
5619 smp_mb();
5620 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5621 !(test_bit(__IGB_DOWN, &adapter->state))) {
5622 netif_wake_subqueue(netdev, tx_ring->queue_index);
5623
5624 u64_stats_update_begin(&tx_ring->tx_syncp);
5625 tx_ring->tx_stats.restart_queue++;
5626 u64_stats_update_end(&tx_ring->tx_syncp);
5627 }
5628 }
5629
5630 if (tx_ring->detect_tx_hung) {
5631 /* Detect a transmit hang in hardware, this serializes the
5632 * check with the clearing of time_stamp and movement of i */
5633 tx_ring->detect_tx_hung = false;
5634 if (tx_ring->buffer_info[i].time_stamp &&
5635 time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5636 (adapter->tx_timeout_factor * HZ)) &&
5637 !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5638
5639 /* detected Tx unit hang */
5640 dev_err(tx_ring->dev,
5641 "Detected Tx Unit Hang\n"
5642 " Tx Queue <%d>\n"
5643 " TDH <%x>\n"
5644 " TDT <%x>\n"
5645 " next_to_use <%x>\n"
5646 " next_to_clean <%x>\n"
5647 "buffer_info[next_to_clean]\n"
5648 " time_stamp <%lx>\n"
5649 " next_to_watch <%x>\n"
5650 " jiffies <%lx>\n"
5651 " desc.status <%x>\n",
5652 tx_ring->queue_index,
5653 readl(tx_ring->head),
5654 readl(tx_ring->tail),
5655 tx_ring->next_to_use,
5656 tx_ring->next_to_clean,
5657 tx_ring->buffer_info[eop].time_stamp,
5658 eop,
5659 jiffies,
5660 eop_desc->wb.status);
5661 netif_stop_subqueue(netdev, tx_ring->queue_index);
5662 }
5663 }
5664 tx_ring->total_bytes += total_bytes;
5665 tx_ring->total_packets += total_packets;
5666 u64_stats_update_begin(&tx_ring->tx_syncp);
5667 tx_ring->tx_stats.bytes += total_bytes;
5668 tx_ring->tx_stats.packets += total_packets;
5669 u64_stats_update_end(&tx_ring->tx_syncp);
5670 return count < tx_ring->count;
5671 }
5672
5673 /**
5674 * igb_receive_skb - helper function to handle rx indications
5675 * @q_vector: structure containing interrupt and ring information
5676 * @skb: packet to send up
5677 * @vlan_tag: vlan tag for packet
5678 **/
5679 static void igb_receive_skb(struct igb_q_vector *q_vector,
5680 struct sk_buff *skb,
5681 u16 vlan_tag)
5682 {
5683 struct igb_adapter *adapter = q_vector->adapter;
5684
5685 if (vlan_tag && adapter->vlgrp)
5686 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
5687 vlan_tag, skb);
5688 else
5689 napi_gro_receive(&q_vector->napi, skb);
5690 }
5691
5692 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5693 u32 status_err, struct sk_buff *skb)
5694 {
5695 skb_checksum_none_assert(skb);
5696
5697 /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5698 if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5699 (status_err & E1000_RXD_STAT_IXSM))
5700 return;
5701
5702 /* TCP/UDP checksum error bit is set */
5703 if (status_err &
5704 (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5705 /*
5706 * work around errata with sctp packets where the TCPE aka
5707 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5708 * packets, (aka let the stack check the crc32c)
5709 */
5710 if ((skb->len == 60) &&
5711 (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM)) {
5712 u64_stats_update_begin(&ring->rx_syncp);
5713 ring->rx_stats.csum_err++;
5714 u64_stats_update_end(&ring->rx_syncp);
5715 }
5716 /* let the stack verify checksum errors */
5717 return;
5718 }
5719 /* It must be a TCP or UDP packet with a valid checksum */
5720 if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5721 skb->ip_summed = CHECKSUM_UNNECESSARY;
5722
5723 dev_dbg(ring->dev, "cksum success: bits %08X\n", status_err);
5724 }
5725
5726 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5727 struct sk_buff *skb)
5728 {
5729 struct igb_adapter *adapter = q_vector->adapter;
5730 struct e1000_hw *hw = &adapter->hw;
5731 u64 regval;
5732
5733 /*
5734 * If this bit is set, then the RX registers contain the time stamp. No
5735 * other packet will be time stamped until we read these registers, so
5736 * read the registers to make them available again. Because only one
5737 * packet can be time stamped at a time, we know that the register
5738 * values must belong to this one here and therefore we don't need to
5739 * compare any of the additional attributes stored for it.
5740 *
5741 * If nothing went wrong, then it should have a shared tx_flags that we
5742 * can turn into a skb_shared_hwtstamps.
5743 */
5744 if (staterr & E1000_RXDADV_STAT_TSIP) {
5745 u32 *stamp = (u32 *)skb->data;
5746 regval = le32_to_cpu(*(stamp + 2));
5747 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5748 skb_pull(skb, IGB_TS_HDR_LEN);
5749 } else {
5750 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5751 return;
5752
5753 regval = rd32(E1000_RXSTMPL);
5754 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5755 }
5756
5757 igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5758 }
5759 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5760 union e1000_adv_rx_desc *rx_desc)
5761 {
5762 /* HW will not DMA in data larger than the given buffer, even if it
5763 * parses the (NFS, of course) header to be larger. In that case, it
5764 * fills the header buffer and spills the rest into the page.
5765 */
5766 u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5767 E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5768 if (hlen > rx_ring->rx_buffer_len)
5769 hlen = rx_ring->rx_buffer_len;
5770 return hlen;
5771 }
5772
5773 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5774 int *work_done, int budget)
5775 {
5776 struct igb_ring *rx_ring = q_vector->rx_ring;
5777 struct net_device *netdev = rx_ring->netdev;
5778 struct device *dev = rx_ring->dev;
5779 union e1000_adv_rx_desc *rx_desc , *next_rxd;
5780 struct igb_buffer *buffer_info , *next_buffer;
5781 struct sk_buff *skb;
5782 bool cleaned = false;
5783 int cleaned_count = 0;
5784 int current_node = numa_node_id();
5785 unsigned int total_bytes = 0, total_packets = 0;
5786 unsigned int i;
5787 u32 staterr;
5788 u16 length;
5789 u16 vlan_tag;
5790
5791 i = rx_ring->next_to_clean;
5792 buffer_info = &rx_ring->buffer_info[i];
5793 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5794 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5795
5796 while (staterr & E1000_RXD_STAT_DD) {
5797 if (*work_done >= budget)
5798 break;
5799 (*work_done)++;
5800 rmb(); /* read descriptor and rx_buffer_info after status DD */
5801
5802 skb = buffer_info->skb;
5803 prefetch(skb->data - NET_IP_ALIGN);
5804 buffer_info->skb = NULL;
5805
5806 i++;
5807 if (i == rx_ring->count)
5808 i = 0;
5809
5810 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5811 prefetch(next_rxd);
5812 next_buffer = &rx_ring->buffer_info[i];
5813
5814 length = le16_to_cpu(rx_desc->wb.upper.length);
5815 cleaned = true;
5816 cleaned_count++;
5817
5818 if (buffer_info->dma) {
5819 dma_unmap_single(dev, buffer_info->dma,
5820 rx_ring->rx_buffer_len,
5821 DMA_FROM_DEVICE);
5822 buffer_info->dma = 0;
5823 if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5824 skb_put(skb, length);
5825 goto send_up;
5826 }
5827 skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5828 }
5829
5830 if (length) {
5831 dma_unmap_page(dev, buffer_info->page_dma,
5832 PAGE_SIZE / 2, DMA_FROM_DEVICE);
5833 buffer_info->page_dma = 0;
5834
5835 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5836 buffer_info->page,
5837 buffer_info->page_offset,
5838 length);
5839
5840 if ((page_count(buffer_info->page) != 1) ||
5841 (page_to_nid(buffer_info->page) != current_node))
5842 buffer_info->page = NULL;
5843 else
5844 get_page(buffer_info->page);
5845
5846 skb->len += length;
5847 skb->data_len += length;
5848 skb->truesize += length;
5849 }
5850
5851 if (!(staterr & E1000_RXD_STAT_EOP)) {
5852 buffer_info->skb = next_buffer->skb;
5853 buffer_info->dma = next_buffer->dma;
5854 next_buffer->skb = skb;
5855 next_buffer->dma = 0;
5856 goto next_desc;
5857 }
5858 send_up:
5859 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5860 dev_kfree_skb_irq(skb);
5861 goto next_desc;
5862 }
5863
5864 if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5865 igb_rx_hwtstamp(q_vector, staterr, skb);
5866 total_bytes += skb->len;
5867 total_packets++;
5868
5869 igb_rx_checksum_adv(rx_ring, staterr, skb);
5870
5871 skb->protocol = eth_type_trans(skb, netdev);
5872 skb_record_rx_queue(skb, rx_ring->queue_index);
5873
5874 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5875 le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5876
5877 igb_receive_skb(q_vector, skb, vlan_tag);
5878
5879 next_desc:
5880 rx_desc->wb.upper.status_error = 0;
5881
5882 /* return some buffers to hardware, one at a time is too slow */
5883 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5884 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5885 cleaned_count = 0;
5886 }
5887
5888 /* use prefetched values */
5889 rx_desc = next_rxd;
5890 buffer_info = next_buffer;
5891 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5892 }
5893
5894 rx_ring->next_to_clean = i;
5895 cleaned_count = igb_desc_unused(rx_ring);
5896
5897 if (cleaned_count)
5898 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5899
5900 rx_ring->total_packets += total_packets;
5901 rx_ring->total_bytes += total_bytes;
5902 u64_stats_update_begin(&rx_ring->rx_syncp);
5903 rx_ring->rx_stats.packets += total_packets;
5904 rx_ring->rx_stats.bytes += total_bytes;
5905 u64_stats_update_end(&rx_ring->rx_syncp);
5906 return cleaned;
5907 }
5908
5909 /**
5910 * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5911 * @adapter: address of board private structure
5912 **/
5913 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5914 {
5915 struct net_device *netdev = rx_ring->netdev;
5916 union e1000_adv_rx_desc *rx_desc;
5917 struct igb_buffer *buffer_info;
5918 struct sk_buff *skb;
5919 unsigned int i;
5920 int bufsz;
5921
5922 i = rx_ring->next_to_use;
5923 buffer_info = &rx_ring->buffer_info[i];
5924
5925 bufsz = rx_ring->rx_buffer_len;
5926
5927 while (cleaned_count--) {
5928 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5929
5930 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5931 if (!buffer_info->page) {
5932 buffer_info->page = netdev_alloc_page(netdev);
5933 if (unlikely(!buffer_info->page)) {
5934 u64_stats_update_begin(&rx_ring->rx_syncp);
5935 rx_ring->rx_stats.alloc_failed++;
5936 u64_stats_update_end(&rx_ring->rx_syncp);
5937 goto no_buffers;
5938 }
5939 buffer_info->page_offset = 0;
5940 } else {
5941 buffer_info->page_offset ^= PAGE_SIZE / 2;
5942 }
5943 buffer_info->page_dma =
5944 dma_map_page(rx_ring->dev, buffer_info->page,
5945 buffer_info->page_offset,
5946 PAGE_SIZE / 2,
5947 DMA_FROM_DEVICE);
5948 if (dma_mapping_error(rx_ring->dev,
5949 buffer_info->page_dma)) {
5950 buffer_info->page_dma = 0;
5951 u64_stats_update_begin(&rx_ring->rx_syncp);
5952 rx_ring->rx_stats.alloc_failed++;
5953 u64_stats_update_end(&rx_ring->rx_syncp);
5954 goto no_buffers;
5955 }
5956 }
5957
5958 skb = buffer_info->skb;
5959 if (!skb) {
5960 skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5961 if (unlikely(!skb)) {
5962 u64_stats_update_begin(&rx_ring->rx_syncp);
5963 rx_ring->rx_stats.alloc_failed++;
5964 u64_stats_update_end(&rx_ring->rx_syncp);
5965 goto no_buffers;
5966 }
5967
5968 buffer_info->skb = skb;
5969 }
5970 if (!buffer_info->dma) {
5971 buffer_info->dma = dma_map_single(rx_ring->dev,
5972 skb->data,
5973 bufsz,
5974 DMA_FROM_DEVICE);
5975 if (dma_mapping_error(rx_ring->dev,
5976 buffer_info->dma)) {
5977 buffer_info->dma = 0;
5978 u64_stats_update_begin(&rx_ring->rx_syncp);
5979 rx_ring->rx_stats.alloc_failed++;
5980 u64_stats_update_end(&rx_ring->rx_syncp);
5981 goto no_buffers;
5982 }
5983 }
5984 /* Refresh the desc even if buffer_addrs didn't change because
5985 * each write-back erases this info. */
5986 if (bufsz < IGB_RXBUFFER_1024) {
5987 rx_desc->read.pkt_addr =
5988 cpu_to_le64(buffer_info->page_dma);
5989 rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5990 } else {
5991 rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
5992 rx_desc->read.hdr_addr = 0;
5993 }
5994
5995 i++;
5996 if (i == rx_ring->count)
5997 i = 0;
5998 buffer_info = &rx_ring->buffer_info[i];
5999 }
6000
6001 no_buffers:
6002 if (rx_ring->next_to_use != i) {
6003 rx_ring->next_to_use = i;
6004 if (i == 0)
6005 i = (rx_ring->count - 1);
6006 else
6007 i--;
6008
6009 /* Force memory writes to complete before letting h/w
6010 * know there are new descriptors to fetch. (Only
6011 * applicable for weak-ordered memory model archs,
6012 * such as IA-64). */
6013 wmb();
6014 writel(i, rx_ring->tail);
6015 }
6016 }
6017
6018 /**
6019 * igb_mii_ioctl -
6020 * @netdev:
6021 * @ifreq:
6022 * @cmd:
6023 **/
6024 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6025 {
6026 struct igb_adapter *adapter = netdev_priv(netdev);
6027 struct mii_ioctl_data *data = if_mii(ifr);
6028
6029 if (adapter->hw.phy.media_type != e1000_media_type_copper)
6030 return -EOPNOTSUPP;
6031
6032 switch (cmd) {
6033 case SIOCGMIIPHY:
6034 data->phy_id = adapter->hw.phy.addr;
6035 break;
6036 case SIOCGMIIREG:
6037 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6038 &data->val_out))
6039 return -EIO;
6040 break;
6041 case SIOCSMIIREG:
6042 default:
6043 return -EOPNOTSUPP;
6044 }
6045 return 0;
6046 }
6047
6048 /**
6049 * igb_hwtstamp_ioctl - control hardware time stamping
6050 * @netdev:
6051 * @ifreq:
6052 * @cmd:
6053 *
6054 * Outgoing time stamping can be enabled and disabled. Play nice and
6055 * disable it when requested, although it shouldn't case any overhead
6056 * when no packet needs it. At most one packet in the queue may be
6057 * marked for time stamping, otherwise it would be impossible to tell
6058 * for sure to which packet the hardware time stamp belongs.
6059 *
6060 * Incoming time stamping has to be configured via the hardware
6061 * filters. Not all combinations are supported, in particular event
6062 * type has to be specified. Matching the kind of event packet is
6063 * not supported, with the exception of "all V2 events regardless of
6064 * level 2 or 4".
6065 *
6066 **/
6067 static int igb_hwtstamp_ioctl(struct net_device *netdev,
6068 struct ifreq *ifr, int cmd)
6069 {
6070 struct igb_adapter *adapter = netdev_priv(netdev);
6071 struct e1000_hw *hw = &adapter->hw;
6072 struct hwtstamp_config config;
6073 u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6074 u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6075 u32 tsync_rx_cfg = 0;
6076 bool is_l4 = false;
6077 bool is_l2 = false;
6078 u32 regval;
6079
6080 if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6081 return -EFAULT;
6082
6083 /* reserved for future extensions */
6084 if (config.flags)
6085 return -EINVAL;
6086
6087 switch (config.tx_type) {
6088 case HWTSTAMP_TX_OFF:
6089 tsync_tx_ctl = 0;
6090 case HWTSTAMP_TX_ON:
6091 break;
6092 default:
6093 return -ERANGE;
6094 }
6095
6096 switch (config.rx_filter) {
6097 case HWTSTAMP_FILTER_NONE:
6098 tsync_rx_ctl = 0;
6099 break;
6100 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6101 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6102 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6103 case HWTSTAMP_FILTER_ALL:
6104 /*
6105 * register TSYNCRXCFG must be set, therefore it is not
6106 * possible to time stamp both Sync and Delay_Req messages
6107 * => fall back to time stamping all packets
6108 */
6109 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6110 config.rx_filter = HWTSTAMP_FILTER_ALL;
6111 break;
6112 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6113 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6114 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6115 is_l4 = true;
6116 break;
6117 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6118 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6119 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6120 is_l4 = true;
6121 break;
6122 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6123 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6124 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6125 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6126 is_l2 = true;
6127 is_l4 = true;
6128 config.rx_filter = HWTSTAMP_FILTER_SOME;
6129 break;
6130 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6131 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6132 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6133 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6134 is_l2 = true;
6135 is_l4 = true;
6136 config.rx_filter = HWTSTAMP_FILTER_SOME;
6137 break;
6138 case HWTSTAMP_FILTER_PTP_V2_EVENT:
6139 case HWTSTAMP_FILTER_PTP_V2_SYNC:
6140 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6141 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6142 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6143 is_l2 = true;
6144 break;
6145 default:
6146 return -ERANGE;
6147 }
6148
6149 if (hw->mac.type == e1000_82575) {
6150 if (tsync_rx_ctl | tsync_tx_ctl)
6151 return -EINVAL;
6152 return 0;
6153 }
6154
6155 /*
6156 * Per-packet timestamping only works if all packets are
6157 * timestamped, so enable timestamping in all packets as
6158 * long as one rx filter was configured.
6159 */
6160 if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
6161 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6162 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6163 }
6164
6165 /* enable/disable TX */
6166 regval = rd32(E1000_TSYNCTXCTL);
6167 regval &= ~E1000_TSYNCTXCTL_ENABLED;
6168 regval |= tsync_tx_ctl;
6169 wr32(E1000_TSYNCTXCTL, regval);
6170
6171 /* enable/disable RX */
6172 regval = rd32(E1000_TSYNCRXCTL);
6173 regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6174 regval |= tsync_rx_ctl;
6175 wr32(E1000_TSYNCRXCTL, regval);
6176
6177 /* define which PTP packets are time stamped */
6178 wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6179
6180 /* define ethertype filter for timestamped packets */
6181 if (is_l2)
6182 wr32(E1000_ETQF(3),
6183 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6184 E1000_ETQF_1588 | /* enable timestamping */
6185 ETH_P_1588)); /* 1588 eth protocol type */
6186 else
6187 wr32(E1000_ETQF(3), 0);
6188
6189 #define PTP_PORT 319
6190 /* L4 Queue Filter[3]: filter by destination port and protocol */
6191 if (is_l4) {
6192 u32 ftqf = (IPPROTO_UDP /* UDP */
6193 | E1000_FTQF_VF_BP /* VF not compared */
6194 | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6195 | E1000_FTQF_MASK); /* mask all inputs */
6196 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6197
6198 wr32(E1000_IMIR(3), htons(PTP_PORT));
6199 wr32(E1000_IMIREXT(3),
6200 (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6201 if (hw->mac.type == e1000_82576) {
6202 /* enable source port check */
6203 wr32(E1000_SPQF(3), htons(PTP_PORT));
6204 ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6205 }
6206 wr32(E1000_FTQF(3), ftqf);
6207 } else {
6208 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6209 }
6210 wrfl();
6211
6212 adapter->hwtstamp_config = config;
6213
6214 /* clear TX/RX time stamp registers, just to be sure */
6215 regval = rd32(E1000_TXSTMPH);
6216 regval = rd32(E1000_RXSTMPH);
6217
6218 return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6219 -EFAULT : 0;
6220 }
6221
6222 /**
6223 * igb_ioctl -
6224 * @netdev:
6225 * @ifreq:
6226 * @cmd:
6227 **/
6228 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6229 {
6230 switch (cmd) {
6231 case SIOCGMIIPHY:
6232 case SIOCGMIIREG:
6233 case SIOCSMIIREG:
6234 return igb_mii_ioctl(netdev, ifr, cmd);
6235 case SIOCSHWTSTAMP:
6236 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6237 default:
6238 return -EOPNOTSUPP;
6239 }
6240 }
6241
6242 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6243 {
6244 struct igb_adapter *adapter = hw->back;
6245 u16 cap_offset;
6246
6247 cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
6248 if (!cap_offset)
6249 return -E1000_ERR_CONFIG;
6250
6251 pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6252
6253 return 0;
6254 }
6255
6256 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6257 {
6258 struct igb_adapter *adapter = hw->back;
6259 u16 cap_offset;
6260
6261 cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
6262 if (!cap_offset)
6263 return -E1000_ERR_CONFIG;
6264
6265 pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6266
6267 return 0;
6268 }
6269
6270 static void igb_vlan_rx_register(struct net_device *netdev,
6271 struct vlan_group *grp)
6272 {
6273 struct igb_adapter *adapter = netdev_priv(netdev);
6274 struct e1000_hw *hw = &adapter->hw;
6275 u32 ctrl, rctl;
6276
6277 igb_irq_disable(adapter);
6278 adapter->vlgrp = grp;
6279
6280 if (grp) {
6281 /* enable VLAN tag insert/strip */
6282 ctrl = rd32(E1000_CTRL);
6283 ctrl |= E1000_CTRL_VME;
6284 wr32(E1000_CTRL, ctrl);
6285
6286 /* Disable CFI check */
6287 rctl = rd32(E1000_RCTL);
6288 rctl &= ~E1000_RCTL_CFIEN;
6289 wr32(E1000_RCTL, rctl);
6290 } else {
6291 /* disable VLAN tag insert/strip */
6292 ctrl = rd32(E1000_CTRL);
6293 ctrl &= ~E1000_CTRL_VME;
6294 wr32(E1000_CTRL, ctrl);
6295 }
6296
6297 igb_rlpml_set(adapter);
6298
6299 if (!test_bit(__IGB_DOWN, &adapter->state))
6300 igb_irq_enable(adapter);
6301 }
6302
6303 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6304 {
6305 struct igb_adapter *adapter = netdev_priv(netdev);
6306 struct e1000_hw *hw = &adapter->hw;
6307 int pf_id = adapter->vfs_allocated_count;
6308
6309 /* attempt to add filter to vlvf array */
6310 igb_vlvf_set(adapter, vid, true, pf_id);
6311
6312 /* add the filter since PF can receive vlans w/o entry in vlvf */
6313 igb_vfta_set(hw, vid, true);
6314 }
6315
6316 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6317 {
6318 struct igb_adapter *adapter = netdev_priv(netdev);
6319 struct e1000_hw *hw = &adapter->hw;
6320 int pf_id = adapter->vfs_allocated_count;
6321 s32 err;
6322
6323 igb_irq_disable(adapter);
6324 vlan_group_set_device(adapter->vlgrp, vid, NULL);
6325
6326 if (!test_bit(__IGB_DOWN, &adapter->state))
6327 igb_irq_enable(adapter);
6328
6329 /* remove vlan from VLVF table array */
6330 err = igb_vlvf_set(adapter, vid, false, pf_id);
6331
6332 /* if vid was not present in VLVF just remove it from table */
6333 if (err)
6334 igb_vfta_set(hw, vid, false);
6335 }
6336
6337 static void igb_restore_vlan(struct igb_adapter *adapter)
6338 {
6339 igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
6340
6341 if (adapter->vlgrp) {
6342 u16 vid;
6343 for (vid = 0; vid < VLAN_N_VID; vid++) {
6344 if (!vlan_group_get_device(adapter->vlgrp, vid))
6345 continue;
6346 igb_vlan_rx_add_vid(adapter->netdev, vid);
6347 }
6348 }
6349 }
6350
6351 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
6352 {
6353 struct pci_dev *pdev = adapter->pdev;
6354 struct e1000_mac_info *mac = &adapter->hw.mac;
6355
6356 mac->autoneg = 0;
6357
6358 /* Fiber NIC's only allow 1000 Gbps Full duplex */
6359 if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6360 spddplx != (SPEED_1000 + DUPLEX_FULL)) {
6361 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6362 return -EINVAL;
6363 }
6364
6365 switch (spddplx) {
6366 case SPEED_10 + DUPLEX_HALF:
6367 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6368 break;
6369 case SPEED_10 + DUPLEX_FULL:
6370 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6371 break;
6372 case SPEED_100 + DUPLEX_HALF:
6373 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6374 break;
6375 case SPEED_100 + DUPLEX_FULL:
6376 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6377 break;
6378 case SPEED_1000 + DUPLEX_FULL:
6379 mac->autoneg = 1;
6380 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6381 break;
6382 case SPEED_1000 + DUPLEX_HALF: /* not supported */
6383 default:
6384 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6385 return -EINVAL;
6386 }
6387 return 0;
6388 }
6389
6390 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6391 {
6392 struct net_device *netdev = pci_get_drvdata(pdev);
6393 struct igb_adapter *adapter = netdev_priv(netdev);
6394 struct e1000_hw *hw = &adapter->hw;
6395 u32 ctrl, rctl, status;
6396 u32 wufc = adapter->wol;
6397 #ifdef CONFIG_PM
6398 int retval = 0;
6399 #endif
6400
6401 netif_device_detach(netdev);
6402
6403 if (netif_running(netdev))
6404 igb_close(netdev);
6405
6406 igb_clear_interrupt_scheme(adapter);
6407
6408 #ifdef CONFIG_PM
6409 retval = pci_save_state(pdev);
6410 if (retval)
6411 return retval;
6412 #endif
6413
6414 status = rd32(E1000_STATUS);
6415 if (status & E1000_STATUS_LU)
6416 wufc &= ~E1000_WUFC_LNKC;
6417
6418 if (wufc) {
6419 igb_setup_rctl(adapter);
6420 igb_set_rx_mode(netdev);
6421
6422 /* turn on all-multi mode if wake on multicast is enabled */
6423 if (wufc & E1000_WUFC_MC) {
6424 rctl = rd32(E1000_RCTL);
6425 rctl |= E1000_RCTL_MPE;
6426 wr32(E1000_RCTL, rctl);
6427 }
6428
6429 ctrl = rd32(E1000_CTRL);
6430 /* advertise wake from D3Cold */
6431 #define E1000_CTRL_ADVD3WUC 0x00100000
6432 /* phy power management enable */
6433 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6434 ctrl |= E1000_CTRL_ADVD3WUC;
6435 wr32(E1000_CTRL, ctrl);
6436
6437 /* Allow time for pending master requests to run */
6438 igb_disable_pcie_master(hw);
6439
6440 wr32(E1000_WUC, E1000_WUC_PME_EN);
6441 wr32(E1000_WUFC, wufc);
6442 } else {
6443 wr32(E1000_WUC, 0);
6444 wr32(E1000_WUFC, 0);
6445 }
6446
6447 *enable_wake = wufc || adapter->en_mng_pt;
6448 if (!*enable_wake)
6449 igb_power_down_link(adapter);
6450 else
6451 igb_power_up_link(adapter);
6452
6453 /* Release control of h/w to f/w. If f/w is AMT enabled, this
6454 * would have already happened in close and is redundant. */
6455 igb_release_hw_control(adapter);
6456
6457 pci_disable_device(pdev);
6458
6459 return 0;
6460 }
6461
6462 #ifdef CONFIG_PM
6463 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6464 {
6465 int retval;
6466 bool wake;
6467
6468 retval = __igb_shutdown(pdev, &wake);
6469 if (retval)
6470 return retval;
6471
6472 if (wake) {
6473 pci_prepare_to_sleep(pdev);
6474 } else {
6475 pci_wake_from_d3(pdev, false);
6476 pci_set_power_state(pdev, PCI_D3hot);
6477 }
6478
6479 return 0;
6480 }
6481
6482 static int igb_resume(struct pci_dev *pdev)
6483 {
6484 struct net_device *netdev = pci_get_drvdata(pdev);
6485 struct igb_adapter *adapter = netdev_priv(netdev);
6486 struct e1000_hw *hw = &adapter->hw;
6487 u32 err;
6488
6489 pci_set_power_state(pdev, PCI_D0);
6490 pci_restore_state(pdev);
6491 pci_save_state(pdev);
6492
6493 err = pci_enable_device_mem(pdev);
6494 if (err) {
6495 dev_err(&pdev->dev,
6496 "igb: Cannot enable PCI device from suspend\n");
6497 return err;
6498 }
6499 pci_set_master(pdev);
6500
6501 pci_enable_wake(pdev, PCI_D3hot, 0);
6502 pci_enable_wake(pdev, PCI_D3cold, 0);
6503
6504 if (igb_init_interrupt_scheme(adapter)) {
6505 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6506 return -ENOMEM;
6507 }
6508
6509 igb_reset(adapter);
6510
6511 /* let the f/w know that the h/w is now under the control of the
6512 * driver. */
6513 igb_get_hw_control(adapter);
6514
6515 wr32(E1000_WUS, ~0);
6516
6517 if (netif_running(netdev)) {
6518 err = igb_open(netdev);
6519 if (err)
6520 return err;
6521 }
6522
6523 netif_device_attach(netdev);
6524
6525 return 0;
6526 }
6527 #endif
6528
6529 static void igb_shutdown(struct pci_dev *pdev)
6530 {
6531 bool wake;
6532
6533 __igb_shutdown(pdev, &wake);
6534
6535 if (system_state == SYSTEM_POWER_OFF) {
6536 pci_wake_from_d3(pdev, wake);
6537 pci_set_power_state(pdev, PCI_D3hot);
6538 }
6539 }
6540
6541 #ifdef CONFIG_NET_POLL_CONTROLLER
6542 /*
6543 * Polling 'interrupt' - used by things like netconsole to send skbs
6544 * without having to re-enable interrupts. It's not called while
6545 * the interrupt routine is executing.
6546 */
6547 static void igb_netpoll(struct net_device *netdev)
6548 {
6549 struct igb_adapter *adapter = netdev_priv(netdev);
6550 struct e1000_hw *hw = &adapter->hw;
6551 int i;
6552
6553 if (!adapter->msix_entries) {
6554 struct igb_q_vector *q_vector = adapter->q_vector[0];
6555 igb_irq_disable(adapter);
6556 napi_schedule(&q_vector->napi);
6557 return;
6558 }
6559
6560 for (i = 0; i < adapter->num_q_vectors; i++) {
6561 struct igb_q_vector *q_vector = adapter->q_vector[i];
6562 wr32(E1000_EIMC, q_vector->eims_value);
6563 napi_schedule(&q_vector->napi);
6564 }
6565 }
6566 #endif /* CONFIG_NET_POLL_CONTROLLER */
6567
6568 /**
6569 * igb_io_error_detected - called when PCI error is detected
6570 * @pdev: Pointer to PCI device
6571 * @state: The current pci connection state
6572 *
6573 * This function is called after a PCI bus error affecting
6574 * this device has been detected.
6575 */
6576 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6577 pci_channel_state_t state)
6578 {
6579 struct net_device *netdev = pci_get_drvdata(pdev);
6580 struct igb_adapter *adapter = netdev_priv(netdev);
6581
6582 netif_device_detach(netdev);
6583
6584 if (state == pci_channel_io_perm_failure)
6585 return PCI_ERS_RESULT_DISCONNECT;
6586
6587 if (netif_running(netdev))
6588 igb_down(adapter);
6589 pci_disable_device(pdev);
6590
6591 /* Request a slot slot reset. */
6592 return PCI_ERS_RESULT_NEED_RESET;
6593 }
6594
6595 /**
6596 * igb_io_slot_reset - called after the pci bus has been reset.
6597 * @pdev: Pointer to PCI device
6598 *
6599 * Restart the card from scratch, as if from a cold-boot. Implementation
6600 * resembles the first-half of the igb_resume routine.
6601 */
6602 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6603 {
6604 struct net_device *netdev = pci_get_drvdata(pdev);
6605 struct igb_adapter *adapter = netdev_priv(netdev);
6606 struct e1000_hw *hw = &adapter->hw;
6607 pci_ers_result_t result;
6608 int err;
6609
6610 if (pci_enable_device_mem(pdev)) {
6611 dev_err(&pdev->dev,
6612 "Cannot re-enable PCI device after reset.\n");
6613 result = PCI_ERS_RESULT_DISCONNECT;
6614 } else {
6615 pci_set_master(pdev);
6616 pci_restore_state(pdev);
6617 pci_save_state(pdev);
6618
6619 pci_enable_wake(pdev, PCI_D3hot, 0);
6620 pci_enable_wake(pdev, PCI_D3cold, 0);
6621
6622 igb_reset(adapter);
6623 wr32(E1000_WUS, ~0);
6624 result = PCI_ERS_RESULT_RECOVERED;
6625 }
6626
6627 err = pci_cleanup_aer_uncorrect_error_status(pdev);
6628 if (err) {
6629 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6630 "failed 0x%0x\n", err);
6631 /* non-fatal, continue */
6632 }
6633
6634 return result;
6635 }
6636
6637 /**
6638 * igb_io_resume - called when traffic can start flowing again.
6639 * @pdev: Pointer to PCI device
6640 *
6641 * This callback is called when the error recovery driver tells us that
6642 * its OK to resume normal operation. Implementation resembles the
6643 * second-half of the igb_resume routine.
6644 */
6645 static void igb_io_resume(struct pci_dev *pdev)
6646 {
6647 struct net_device *netdev = pci_get_drvdata(pdev);
6648 struct igb_adapter *adapter = netdev_priv(netdev);
6649
6650 if (netif_running(netdev)) {
6651 if (igb_up(adapter)) {
6652 dev_err(&pdev->dev, "igb_up failed after reset\n");
6653 return;
6654 }
6655 }
6656
6657 netif_device_attach(netdev);
6658
6659 /* let the f/w know that the h/w is now under the control of the
6660 * driver. */
6661 igb_get_hw_control(adapter);
6662 }
6663
6664 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6665 u8 qsel)
6666 {
6667 u32 rar_low, rar_high;
6668 struct e1000_hw *hw = &adapter->hw;
6669
6670 /* HW expects these in little endian so we reverse the byte order
6671 * from network order (big endian) to little endian
6672 */
6673 rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6674 ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6675 rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6676
6677 /* Indicate to hardware the Address is Valid. */
6678 rar_high |= E1000_RAH_AV;
6679
6680 if (hw->mac.type == e1000_82575)
6681 rar_high |= E1000_RAH_POOL_1 * qsel;
6682 else
6683 rar_high |= E1000_RAH_POOL_1 << qsel;
6684
6685 wr32(E1000_RAL(index), rar_low);
6686 wrfl();
6687 wr32(E1000_RAH(index), rar_high);
6688 wrfl();
6689 }
6690
6691 static int igb_set_vf_mac(struct igb_adapter *adapter,
6692 int vf, unsigned char *mac_addr)
6693 {
6694 struct e1000_hw *hw = &adapter->hw;
6695 /* VF MAC addresses start at end of receive addresses and moves
6696 * torwards the first, as a result a collision should not be possible */
6697 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6698
6699 memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6700
6701 igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6702
6703 return 0;
6704 }
6705
6706 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6707 {
6708 struct igb_adapter *adapter = netdev_priv(netdev);
6709 if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6710 return -EINVAL;
6711 adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6712 dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6713 dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6714 " change effective.");
6715 if (test_bit(__IGB_DOWN, &adapter->state)) {
6716 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6717 " but the PF device is not up.\n");
6718 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6719 " attempting to use the VF device.\n");
6720 }
6721 return igb_set_vf_mac(adapter, vf, mac);
6722 }
6723
6724 static int igb_link_mbps(int internal_link_speed)
6725 {
6726 switch (internal_link_speed) {
6727 case SPEED_100:
6728 return 100;
6729 case SPEED_1000:
6730 return 1000;
6731 default:
6732 return 0;
6733 }
6734 }
6735
6736 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
6737 int link_speed)
6738 {
6739 int rf_dec, rf_int;
6740 u32 bcnrc_val;
6741
6742 if (tx_rate != 0) {
6743 /* Calculate the rate factor values to set */
6744 rf_int = link_speed / tx_rate;
6745 rf_dec = (link_speed - (rf_int * tx_rate));
6746 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
6747
6748 bcnrc_val = E1000_RTTBCNRC_RS_ENA;
6749 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
6750 E1000_RTTBCNRC_RF_INT_MASK);
6751 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
6752 } else {
6753 bcnrc_val = 0;
6754 }
6755
6756 wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
6757 wr32(E1000_RTTBCNRC, bcnrc_val);
6758 }
6759
6760 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
6761 {
6762 int actual_link_speed, i;
6763 bool reset_rate = false;
6764
6765 /* VF TX rate limit was not set or not supported */
6766 if ((adapter->vf_rate_link_speed == 0) ||
6767 (adapter->hw.mac.type != e1000_82576))
6768 return;
6769
6770 actual_link_speed = igb_link_mbps(adapter->link_speed);
6771 if (actual_link_speed != adapter->vf_rate_link_speed) {
6772 reset_rate = true;
6773 adapter->vf_rate_link_speed = 0;
6774 dev_info(&adapter->pdev->dev,
6775 "Link speed has been changed. VF Transmit "
6776 "rate is disabled\n");
6777 }
6778
6779 for (i = 0; i < adapter->vfs_allocated_count; i++) {
6780 if (reset_rate)
6781 adapter->vf_data[i].tx_rate = 0;
6782
6783 igb_set_vf_rate_limit(&adapter->hw, i,
6784 adapter->vf_data[i].tx_rate,
6785 actual_link_speed);
6786 }
6787 }
6788
6789 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6790 {
6791 struct igb_adapter *adapter = netdev_priv(netdev);
6792 struct e1000_hw *hw = &adapter->hw;
6793 int actual_link_speed;
6794
6795 if (hw->mac.type != e1000_82576)
6796 return -EOPNOTSUPP;
6797
6798 actual_link_speed = igb_link_mbps(adapter->link_speed);
6799 if ((vf >= adapter->vfs_allocated_count) ||
6800 (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
6801 (tx_rate < 0) || (tx_rate > actual_link_speed))
6802 return -EINVAL;
6803
6804 adapter->vf_rate_link_speed = actual_link_speed;
6805 adapter->vf_data[vf].tx_rate = (u16)tx_rate;
6806 igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
6807
6808 return 0;
6809 }
6810
6811 static int igb_ndo_get_vf_config(struct net_device *netdev,
6812 int vf, struct ifla_vf_info *ivi)
6813 {
6814 struct igb_adapter *adapter = netdev_priv(netdev);
6815 if (vf >= adapter->vfs_allocated_count)
6816 return -EINVAL;
6817 ivi->vf = vf;
6818 memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6819 ivi->tx_rate = adapter->vf_data[vf].tx_rate;
6820 ivi->vlan = adapter->vf_data[vf].pf_vlan;
6821 ivi->qos = adapter->vf_data[vf].pf_qos;
6822 return 0;
6823 }
6824
6825 static void igb_vmm_control(struct igb_adapter *adapter)
6826 {
6827 struct e1000_hw *hw = &adapter->hw;
6828 u32 reg;
6829
6830 switch (hw->mac.type) {
6831 case e1000_82575:
6832 default:
6833 /* replication is not supported for 82575 */
6834 return;
6835 case e1000_82576:
6836 /* notify HW that the MAC is adding vlan tags */
6837 reg = rd32(E1000_DTXCTL);
6838 reg |= E1000_DTXCTL_VLAN_ADDED;
6839 wr32(E1000_DTXCTL, reg);
6840 case e1000_82580:
6841 /* enable replication vlan tag stripping */
6842 reg = rd32(E1000_RPLOLR);
6843 reg |= E1000_RPLOLR_STRVLAN;
6844 wr32(E1000_RPLOLR, reg);
6845 case e1000_i350:
6846 /* none of the above registers are supported by i350 */
6847 break;
6848 }
6849
6850 if (adapter->vfs_allocated_count) {
6851 igb_vmdq_set_loopback_pf(hw, true);
6852 igb_vmdq_set_replication_pf(hw, true);
6853 igb_vmdq_set_anti_spoofing_pf(hw, true,
6854 adapter->vfs_allocated_count);
6855 } else {
6856 igb_vmdq_set_loopback_pf(hw, false);
6857 igb_vmdq_set_replication_pf(hw, false);
6858 }
6859 }
6860
6861 /* igb_main.c */