]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - drivers/net/ethernet/intel/igb/igb_main.c
3a5c75dda526edec63e3b34ada358d9590f3455d
[mirror_ubuntu-bionic-kernel.git] / drivers / net / ethernet / intel / igb / igb_main.c
1 /*******************************************************************************
2
3 Intel(R) Gigabit Ethernet Linux driver
4 Copyright(c) 2007-2011 Intel Corporation.
5
6 This program is free software; you can redistribute it and/or modify it
7 under the terms and conditions of the GNU General Public License,
8 version 2, as published by the Free Software Foundation.
9
10 This program is distributed in the hope it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 more details.
14
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19 The full GNU General Public License is included in this distribution in
20 the file called "COPYING".
21
22 Contact Information:
23 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/bitops.h>
32 #include <linux/vmalloc.h>
33 #include <linux/pagemap.h>
34 #include <linux/netdevice.h>
35 #include <linux/ipv6.h>
36 #include <linux/slab.h>
37 #include <net/checksum.h>
38 #include <net/ip6_checksum.h>
39 #include <linux/net_tstamp.h>
40 #include <linux/mii.h>
41 #include <linux/ethtool.h>
42 #include <linux/if.h>
43 #include <linux/if_vlan.h>
44 #include <linux/pci.h>
45 #include <linux/pci-aspm.h>
46 #include <linux/delay.h>
47 #include <linux/interrupt.h>
48 #include <linux/ip.h>
49 #include <linux/tcp.h>
50 #include <linux/sctp.h>
51 #include <linux/if_ether.h>
52 #include <linux/aer.h>
53 #include <linux/prefetch.h>
54 #ifdef CONFIG_IGB_DCA
55 #include <linux/dca.h>
56 #endif
57 #include "igb.h"
58
59 #define MAJ 3
60 #define MIN 0
61 #define BUILD 6
62 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
63 __stringify(BUILD) "-k"
64 char igb_driver_name[] = "igb";
65 char igb_driver_version[] = DRV_VERSION;
66 static const char igb_driver_string[] =
67 "Intel(R) Gigabit Ethernet Network Driver";
68 static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation.";
69
70 static const struct e1000_info *igb_info_tbl[] = {
71 [board_82575] = &e1000_82575_info,
72 };
73
74 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
75 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
76 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
77 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
78 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
79 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
80 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
81 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
82 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
83 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
84 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
85 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
86 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
87 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
88 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
89 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
90 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
91 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
92 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
93 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
94 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
95 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
96 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
97 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
98 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
99 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
100 /* required last entry */
101 {0, }
102 };
103
104 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
105
106 void igb_reset(struct igb_adapter *);
107 static int igb_setup_all_tx_resources(struct igb_adapter *);
108 static int igb_setup_all_rx_resources(struct igb_adapter *);
109 static void igb_free_all_tx_resources(struct igb_adapter *);
110 static void igb_free_all_rx_resources(struct igb_adapter *);
111 static void igb_setup_mrqc(struct igb_adapter *);
112 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
113 static void __devexit igb_remove(struct pci_dev *pdev);
114 static void igb_init_hw_timer(struct igb_adapter *adapter);
115 static int igb_sw_init(struct igb_adapter *);
116 static int igb_open(struct net_device *);
117 static int igb_close(struct net_device *);
118 static void igb_configure_tx(struct igb_adapter *);
119 static void igb_configure_rx(struct igb_adapter *);
120 static void igb_clean_all_tx_rings(struct igb_adapter *);
121 static void igb_clean_all_rx_rings(struct igb_adapter *);
122 static void igb_clean_tx_ring(struct igb_ring *);
123 static void igb_clean_rx_ring(struct igb_ring *);
124 static void igb_set_rx_mode(struct net_device *);
125 static void igb_update_phy_info(unsigned long);
126 static void igb_watchdog(unsigned long);
127 static void igb_watchdog_task(struct work_struct *);
128 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
129 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
130 struct rtnl_link_stats64 *stats);
131 static int igb_change_mtu(struct net_device *, int);
132 static int igb_set_mac(struct net_device *, void *);
133 static void igb_set_uta(struct igb_adapter *adapter);
134 static irqreturn_t igb_intr(int irq, void *);
135 static irqreturn_t igb_intr_msi(int irq, void *);
136 static irqreturn_t igb_msix_other(int irq, void *);
137 static irqreturn_t igb_msix_ring(int irq, void *);
138 #ifdef CONFIG_IGB_DCA
139 static void igb_update_dca(struct igb_q_vector *);
140 static void igb_setup_dca(struct igb_adapter *);
141 #endif /* CONFIG_IGB_DCA */
142 static int igb_poll(struct napi_struct *, int);
143 static bool igb_clean_tx_irq(struct igb_q_vector *);
144 static bool igb_clean_rx_irq(struct igb_q_vector *, int);
145 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
146 static void igb_tx_timeout(struct net_device *);
147 static void igb_reset_task(struct work_struct *);
148 static void igb_vlan_mode(struct net_device *netdev, u32 features);
149 static void igb_vlan_rx_add_vid(struct net_device *, u16);
150 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
151 static void igb_restore_vlan(struct igb_adapter *);
152 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
153 static void igb_ping_all_vfs(struct igb_adapter *);
154 static void igb_msg_task(struct igb_adapter *);
155 static void igb_vmm_control(struct igb_adapter *);
156 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
157 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
158 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
159 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
160 int vf, u16 vlan, u8 qos);
161 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
162 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
163 struct ifla_vf_info *ivi);
164 static void igb_check_vf_rate_limit(struct igb_adapter *);
165
166 #ifdef CONFIG_PM
167 static int igb_suspend(struct pci_dev *, pm_message_t);
168 static int igb_resume(struct pci_dev *);
169 #endif
170 static void igb_shutdown(struct pci_dev *);
171 #ifdef CONFIG_IGB_DCA
172 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
173 static struct notifier_block dca_notifier = {
174 .notifier_call = igb_notify_dca,
175 .next = NULL,
176 .priority = 0
177 };
178 #endif
179 #ifdef CONFIG_NET_POLL_CONTROLLER
180 /* for netdump / net console */
181 static void igb_netpoll(struct net_device *);
182 #endif
183 #ifdef CONFIG_PCI_IOV
184 static unsigned int max_vfs = 0;
185 module_param(max_vfs, uint, 0);
186 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
187 "per physical function");
188 #endif /* CONFIG_PCI_IOV */
189
190 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
191 pci_channel_state_t);
192 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
193 static void igb_io_resume(struct pci_dev *);
194
195 static struct pci_error_handlers igb_err_handler = {
196 .error_detected = igb_io_error_detected,
197 .slot_reset = igb_io_slot_reset,
198 .resume = igb_io_resume,
199 };
200
201
202 static struct pci_driver igb_driver = {
203 .name = igb_driver_name,
204 .id_table = igb_pci_tbl,
205 .probe = igb_probe,
206 .remove = __devexit_p(igb_remove),
207 #ifdef CONFIG_PM
208 /* Power Management Hooks */
209 .suspend = igb_suspend,
210 .resume = igb_resume,
211 #endif
212 .shutdown = igb_shutdown,
213 .err_handler = &igb_err_handler
214 };
215
216 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
217 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
218 MODULE_LICENSE("GPL");
219 MODULE_VERSION(DRV_VERSION);
220
221 struct igb_reg_info {
222 u32 ofs;
223 char *name;
224 };
225
226 static const struct igb_reg_info igb_reg_info_tbl[] = {
227
228 /* General Registers */
229 {E1000_CTRL, "CTRL"},
230 {E1000_STATUS, "STATUS"},
231 {E1000_CTRL_EXT, "CTRL_EXT"},
232
233 /* Interrupt Registers */
234 {E1000_ICR, "ICR"},
235
236 /* RX Registers */
237 {E1000_RCTL, "RCTL"},
238 {E1000_RDLEN(0), "RDLEN"},
239 {E1000_RDH(0), "RDH"},
240 {E1000_RDT(0), "RDT"},
241 {E1000_RXDCTL(0), "RXDCTL"},
242 {E1000_RDBAL(0), "RDBAL"},
243 {E1000_RDBAH(0), "RDBAH"},
244
245 /* TX Registers */
246 {E1000_TCTL, "TCTL"},
247 {E1000_TDBAL(0), "TDBAL"},
248 {E1000_TDBAH(0), "TDBAH"},
249 {E1000_TDLEN(0), "TDLEN"},
250 {E1000_TDH(0), "TDH"},
251 {E1000_TDT(0), "TDT"},
252 {E1000_TXDCTL(0), "TXDCTL"},
253 {E1000_TDFH, "TDFH"},
254 {E1000_TDFT, "TDFT"},
255 {E1000_TDFHS, "TDFHS"},
256 {E1000_TDFPC, "TDFPC"},
257
258 /* List Terminator */
259 {}
260 };
261
262 /*
263 * igb_regdump - register printout routine
264 */
265 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
266 {
267 int n = 0;
268 char rname[16];
269 u32 regs[8];
270
271 switch (reginfo->ofs) {
272 case E1000_RDLEN(0):
273 for (n = 0; n < 4; n++)
274 regs[n] = rd32(E1000_RDLEN(n));
275 break;
276 case E1000_RDH(0):
277 for (n = 0; n < 4; n++)
278 regs[n] = rd32(E1000_RDH(n));
279 break;
280 case E1000_RDT(0):
281 for (n = 0; n < 4; n++)
282 regs[n] = rd32(E1000_RDT(n));
283 break;
284 case E1000_RXDCTL(0):
285 for (n = 0; n < 4; n++)
286 regs[n] = rd32(E1000_RXDCTL(n));
287 break;
288 case E1000_RDBAL(0):
289 for (n = 0; n < 4; n++)
290 regs[n] = rd32(E1000_RDBAL(n));
291 break;
292 case E1000_RDBAH(0):
293 for (n = 0; n < 4; n++)
294 regs[n] = rd32(E1000_RDBAH(n));
295 break;
296 case E1000_TDBAL(0):
297 for (n = 0; n < 4; n++)
298 regs[n] = rd32(E1000_RDBAL(n));
299 break;
300 case E1000_TDBAH(0):
301 for (n = 0; n < 4; n++)
302 regs[n] = rd32(E1000_TDBAH(n));
303 break;
304 case E1000_TDLEN(0):
305 for (n = 0; n < 4; n++)
306 regs[n] = rd32(E1000_TDLEN(n));
307 break;
308 case E1000_TDH(0):
309 for (n = 0; n < 4; n++)
310 regs[n] = rd32(E1000_TDH(n));
311 break;
312 case E1000_TDT(0):
313 for (n = 0; n < 4; n++)
314 regs[n] = rd32(E1000_TDT(n));
315 break;
316 case E1000_TXDCTL(0):
317 for (n = 0; n < 4; n++)
318 regs[n] = rd32(E1000_TXDCTL(n));
319 break;
320 default:
321 printk(KERN_INFO "%-15s %08x\n",
322 reginfo->name, rd32(reginfo->ofs));
323 return;
324 }
325
326 snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
327 printk(KERN_INFO "%-15s ", rname);
328 for (n = 0; n < 4; n++)
329 printk(KERN_CONT "%08x ", regs[n]);
330 printk(KERN_CONT "\n");
331 }
332
333 /*
334 * igb_dump - Print registers, tx-rings and rx-rings
335 */
336 static void igb_dump(struct igb_adapter *adapter)
337 {
338 struct net_device *netdev = adapter->netdev;
339 struct e1000_hw *hw = &adapter->hw;
340 struct igb_reg_info *reginfo;
341 struct igb_ring *tx_ring;
342 union e1000_adv_tx_desc *tx_desc;
343 struct my_u0 { u64 a; u64 b; } *u0;
344 struct igb_ring *rx_ring;
345 union e1000_adv_rx_desc *rx_desc;
346 u32 staterr;
347 u16 i, n;
348
349 if (!netif_msg_hw(adapter))
350 return;
351
352 /* Print netdevice Info */
353 if (netdev) {
354 dev_info(&adapter->pdev->dev, "Net device Info\n");
355 printk(KERN_INFO "Device Name state "
356 "trans_start last_rx\n");
357 printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
358 netdev->name,
359 netdev->state,
360 netdev->trans_start,
361 netdev->last_rx);
362 }
363
364 /* Print Registers */
365 dev_info(&adapter->pdev->dev, "Register Dump\n");
366 printk(KERN_INFO " Register Name Value\n");
367 for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
368 reginfo->name; reginfo++) {
369 igb_regdump(hw, reginfo);
370 }
371
372 /* Print TX Ring Summary */
373 if (!netdev || !netif_running(netdev))
374 goto exit;
375
376 dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
377 printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma ]"
378 " leng ntw timestamp\n");
379 for (n = 0; n < adapter->num_tx_queues; n++) {
380 struct igb_tx_buffer *buffer_info;
381 tx_ring = adapter->tx_ring[n];
382 buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
383 printk(KERN_INFO " %5d %5X %5X %016llX %04X %p %016llX\n",
384 n, tx_ring->next_to_use, tx_ring->next_to_clean,
385 (u64)buffer_info->dma,
386 buffer_info->length,
387 buffer_info->next_to_watch,
388 (u64)buffer_info->time_stamp);
389 }
390
391 /* Print TX Rings */
392 if (!netif_msg_tx_done(adapter))
393 goto rx_ring_summary;
394
395 dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
396
397 /* Transmit Descriptor Formats
398 *
399 * Advanced Transmit Descriptor
400 * +--------------------------------------------------------------+
401 * 0 | Buffer Address [63:0] |
402 * +--------------------------------------------------------------+
403 * 8 | PAYLEN | PORTS |CC|IDX | STA | DCMD |DTYP|MAC|RSV| DTALEN |
404 * +--------------------------------------------------------------+
405 * 63 46 45 40 39 38 36 35 32 31 24 15 0
406 */
407
408 for (n = 0; n < adapter->num_tx_queues; n++) {
409 tx_ring = adapter->tx_ring[n];
410 printk(KERN_INFO "------------------------------------\n");
411 printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
412 printk(KERN_INFO "------------------------------------\n");
413 printk(KERN_INFO "T [desc] [address 63:0 ] "
414 "[PlPOCIStDDM Ln] [bi->dma ] "
415 "leng ntw timestamp bi->skb\n");
416
417 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
418 struct igb_tx_buffer *buffer_info;
419 tx_desc = IGB_TX_DESC(tx_ring, i);
420 buffer_info = &tx_ring->tx_buffer_info[i];
421 u0 = (struct my_u0 *)tx_desc;
422 printk(KERN_INFO "T [0x%03X] %016llX %016llX %016llX"
423 " %04X %p %016llX %p", i,
424 le64_to_cpu(u0->a),
425 le64_to_cpu(u0->b),
426 (u64)buffer_info->dma,
427 buffer_info->length,
428 buffer_info->next_to_watch,
429 (u64)buffer_info->time_stamp,
430 buffer_info->skb);
431 if (i == tx_ring->next_to_use &&
432 i == tx_ring->next_to_clean)
433 printk(KERN_CONT " NTC/U\n");
434 else if (i == tx_ring->next_to_use)
435 printk(KERN_CONT " NTU\n");
436 else if (i == tx_ring->next_to_clean)
437 printk(KERN_CONT " NTC\n");
438 else
439 printk(KERN_CONT "\n");
440
441 if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
442 print_hex_dump(KERN_INFO, "",
443 DUMP_PREFIX_ADDRESS,
444 16, 1, phys_to_virt(buffer_info->dma),
445 buffer_info->length, true);
446 }
447 }
448
449 /* Print RX Rings Summary */
450 rx_ring_summary:
451 dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
452 printk(KERN_INFO "Queue [NTU] [NTC]\n");
453 for (n = 0; n < adapter->num_rx_queues; n++) {
454 rx_ring = adapter->rx_ring[n];
455 printk(KERN_INFO " %5d %5X %5X\n", n,
456 rx_ring->next_to_use, rx_ring->next_to_clean);
457 }
458
459 /* Print RX Rings */
460 if (!netif_msg_rx_status(adapter))
461 goto exit;
462
463 dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
464
465 /* Advanced Receive Descriptor (Read) Format
466 * 63 1 0
467 * +-----------------------------------------------------+
468 * 0 | Packet Buffer Address [63:1] |A0/NSE|
469 * +----------------------------------------------+------+
470 * 8 | Header Buffer Address [63:1] | DD |
471 * +-----------------------------------------------------+
472 *
473 *
474 * Advanced Receive Descriptor (Write-Back) Format
475 *
476 * 63 48 47 32 31 30 21 20 17 16 4 3 0
477 * +------------------------------------------------------+
478 * 0 | Packet IP |SPH| HDR_LEN | RSV|Packet| RSS |
479 * | Checksum Ident | | | | Type | Type |
480 * +------------------------------------------------------+
481 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
482 * +------------------------------------------------------+
483 * 63 48 47 32 31 20 19 0
484 */
485
486 for (n = 0; n < adapter->num_rx_queues; n++) {
487 rx_ring = adapter->rx_ring[n];
488 printk(KERN_INFO "------------------------------------\n");
489 printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
490 printk(KERN_INFO "------------------------------------\n");
491 printk(KERN_INFO "R [desc] [ PktBuf A0] "
492 "[ HeadBuf DD] [bi->dma ] [bi->skb] "
493 "<-- Adv Rx Read format\n");
494 printk(KERN_INFO "RWB[desc] [PcsmIpSHl PtRs] "
495 "[vl er S cks ln] ---------------- [bi->skb] "
496 "<-- Adv Rx Write-Back format\n");
497
498 for (i = 0; i < rx_ring->count; i++) {
499 struct igb_rx_buffer *buffer_info;
500 buffer_info = &rx_ring->rx_buffer_info[i];
501 rx_desc = IGB_RX_DESC(rx_ring, i);
502 u0 = (struct my_u0 *)rx_desc;
503 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
504 if (staterr & E1000_RXD_STAT_DD) {
505 /* Descriptor Done */
506 printk(KERN_INFO "RWB[0x%03X] %016llX "
507 "%016llX ---------------- %p", i,
508 le64_to_cpu(u0->a),
509 le64_to_cpu(u0->b),
510 buffer_info->skb);
511 } else {
512 printk(KERN_INFO "R [0x%03X] %016llX "
513 "%016llX %016llX %p", i,
514 le64_to_cpu(u0->a),
515 le64_to_cpu(u0->b),
516 (u64)buffer_info->dma,
517 buffer_info->skb);
518
519 if (netif_msg_pktdata(adapter)) {
520 print_hex_dump(KERN_INFO, "",
521 DUMP_PREFIX_ADDRESS,
522 16, 1,
523 phys_to_virt(buffer_info->dma),
524 IGB_RX_HDR_LEN, true);
525 print_hex_dump(KERN_INFO, "",
526 DUMP_PREFIX_ADDRESS,
527 16, 1,
528 phys_to_virt(
529 buffer_info->page_dma +
530 buffer_info->page_offset),
531 PAGE_SIZE/2, true);
532 }
533 }
534
535 if (i == rx_ring->next_to_use)
536 printk(KERN_CONT " NTU\n");
537 else if (i == rx_ring->next_to_clean)
538 printk(KERN_CONT " NTC\n");
539 else
540 printk(KERN_CONT "\n");
541
542 }
543 }
544
545 exit:
546 return;
547 }
548
549
550 /**
551 * igb_read_clock - read raw cycle counter (to be used by time counter)
552 */
553 static cycle_t igb_read_clock(const struct cyclecounter *tc)
554 {
555 struct igb_adapter *adapter =
556 container_of(tc, struct igb_adapter, cycles);
557 struct e1000_hw *hw = &adapter->hw;
558 u64 stamp = 0;
559 int shift = 0;
560
561 /*
562 * The timestamp latches on lowest register read. For the 82580
563 * the lowest register is SYSTIMR instead of SYSTIML. However we never
564 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
565 */
566 if (hw->mac.type == e1000_82580) {
567 stamp = rd32(E1000_SYSTIMR) >> 8;
568 shift = IGB_82580_TSYNC_SHIFT;
569 }
570
571 stamp |= (u64)rd32(E1000_SYSTIML) << shift;
572 stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
573 return stamp;
574 }
575
576 /**
577 * igb_get_hw_dev - return device
578 * used by hardware layer to print debugging information
579 **/
580 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
581 {
582 struct igb_adapter *adapter = hw->back;
583 return adapter->netdev;
584 }
585
586 /**
587 * igb_init_module - Driver Registration Routine
588 *
589 * igb_init_module is the first routine called when the driver is
590 * loaded. All it does is register with the PCI subsystem.
591 **/
592 static int __init igb_init_module(void)
593 {
594 int ret;
595 printk(KERN_INFO "%s - version %s\n",
596 igb_driver_string, igb_driver_version);
597
598 printk(KERN_INFO "%s\n", igb_copyright);
599
600 #ifdef CONFIG_IGB_DCA
601 dca_register_notify(&dca_notifier);
602 #endif
603 ret = pci_register_driver(&igb_driver);
604 return ret;
605 }
606
607 module_init(igb_init_module);
608
609 /**
610 * igb_exit_module - Driver Exit Cleanup Routine
611 *
612 * igb_exit_module is called just before the driver is removed
613 * from memory.
614 **/
615 static void __exit igb_exit_module(void)
616 {
617 #ifdef CONFIG_IGB_DCA
618 dca_unregister_notify(&dca_notifier);
619 #endif
620 pci_unregister_driver(&igb_driver);
621 }
622
623 module_exit(igb_exit_module);
624
625 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
626 /**
627 * igb_cache_ring_register - Descriptor ring to register mapping
628 * @adapter: board private structure to initialize
629 *
630 * Once we know the feature-set enabled for the device, we'll cache
631 * the register offset the descriptor ring is assigned to.
632 **/
633 static void igb_cache_ring_register(struct igb_adapter *adapter)
634 {
635 int i = 0, j = 0;
636 u32 rbase_offset = adapter->vfs_allocated_count;
637
638 switch (adapter->hw.mac.type) {
639 case e1000_82576:
640 /* The queues are allocated for virtualization such that VF 0
641 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
642 * In order to avoid collision we start at the first free queue
643 * and continue consuming queues in the same sequence
644 */
645 if (adapter->vfs_allocated_count) {
646 for (; i < adapter->rss_queues; i++)
647 adapter->rx_ring[i]->reg_idx = rbase_offset +
648 Q_IDX_82576(i);
649 }
650 case e1000_82575:
651 case e1000_82580:
652 case e1000_i350:
653 default:
654 for (; i < adapter->num_rx_queues; i++)
655 adapter->rx_ring[i]->reg_idx = rbase_offset + i;
656 for (; j < adapter->num_tx_queues; j++)
657 adapter->tx_ring[j]->reg_idx = rbase_offset + j;
658 break;
659 }
660 }
661
662 static void igb_free_queues(struct igb_adapter *adapter)
663 {
664 int i;
665
666 for (i = 0; i < adapter->num_tx_queues; i++) {
667 kfree(adapter->tx_ring[i]);
668 adapter->tx_ring[i] = NULL;
669 }
670 for (i = 0; i < adapter->num_rx_queues; i++) {
671 kfree(adapter->rx_ring[i]);
672 adapter->rx_ring[i] = NULL;
673 }
674 adapter->num_rx_queues = 0;
675 adapter->num_tx_queues = 0;
676 }
677
678 /**
679 * igb_alloc_queues - Allocate memory for all rings
680 * @adapter: board private structure to initialize
681 *
682 * We allocate one ring per queue at run-time since we don't know the
683 * number of queues at compile-time.
684 **/
685 static int igb_alloc_queues(struct igb_adapter *adapter)
686 {
687 struct igb_ring *ring;
688 int i;
689 int orig_node = adapter->node;
690
691 for (i = 0; i < adapter->num_tx_queues; i++) {
692 if (orig_node == -1) {
693 int cur_node = next_online_node(adapter->node);
694 if (cur_node == MAX_NUMNODES)
695 cur_node = first_online_node;
696 adapter->node = cur_node;
697 }
698 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
699 adapter->node);
700 if (!ring)
701 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
702 if (!ring)
703 goto err;
704 ring->count = adapter->tx_ring_count;
705 ring->queue_index = i;
706 ring->dev = &adapter->pdev->dev;
707 ring->netdev = adapter->netdev;
708 ring->numa_node = adapter->node;
709 /* For 82575, context index must be unique per ring. */
710 if (adapter->hw.mac.type == e1000_82575)
711 ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
712 adapter->tx_ring[i] = ring;
713 }
714 /* Restore the adapter's original node */
715 adapter->node = orig_node;
716
717 for (i = 0; i < adapter->num_rx_queues; i++) {
718 if (orig_node == -1) {
719 int cur_node = next_online_node(adapter->node);
720 if (cur_node == MAX_NUMNODES)
721 cur_node = first_online_node;
722 adapter->node = cur_node;
723 }
724 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
725 adapter->node);
726 if (!ring)
727 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
728 if (!ring)
729 goto err;
730 ring->count = adapter->rx_ring_count;
731 ring->queue_index = i;
732 ring->dev = &adapter->pdev->dev;
733 ring->netdev = adapter->netdev;
734 ring->numa_node = adapter->node;
735 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
736 /* set flag indicating ring supports SCTP checksum offload */
737 if (adapter->hw.mac.type >= e1000_82576)
738 ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
739 adapter->rx_ring[i] = ring;
740 }
741 /* Restore the adapter's original node */
742 adapter->node = orig_node;
743
744 igb_cache_ring_register(adapter);
745
746 return 0;
747
748 err:
749 /* Restore the adapter's original node */
750 adapter->node = orig_node;
751 igb_free_queues(adapter);
752
753 return -ENOMEM;
754 }
755
756 #define IGB_N0_QUEUE -1
757 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
758 {
759 u32 msixbm = 0;
760 struct igb_adapter *adapter = q_vector->adapter;
761 struct e1000_hw *hw = &adapter->hw;
762 u32 ivar, index;
763 int rx_queue = IGB_N0_QUEUE;
764 int tx_queue = IGB_N0_QUEUE;
765
766 if (q_vector->rx_ring)
767 rx_queue = q_vector->rx_ring->reg_idx;
768 if (q_vector->tx_ring)
769 tx_queue = q_vector->tx_ring->reg_idx;
770
771 switch (hw->mac.type) {
772 case e1000_82575:
773 /* The 82575 assigns vectors using a bitmask, which matches the
774 bitmask for the EICR/EIMS/EIMC registers. To assign one
775 or more queues to a vector, we write the appropriate bits
776 into the MSIXBM register for that vector. */
777 if (rx_queue > IGB_N0_QUEUE)
778 msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
779 if (tx_queue > IGB_N0_QUEUE)
780 msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
781 if (!adapter->msix_entries && msix_vector == 0)
782 msixbm |= E1000_EIMS_OTHER;
783 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
784 q_vector->eims_value = msixbm;
785 break;
786 case e1000_82576:
787 /* 82576 uses a table-based method for assigning vectors.
788 Each queue has a single entry in the table to which we write
789 a vector number along with a "valid" bit. Sadly, the layout
790 of the table is somewhat counterintuitive. */
791 if (rx_queue > IGB_N0_QUEUE) {
792 index = (rx_queue & 0x7);
793 ivar = array_rd32(E1000_IVAR0, index);
794 if (rx_queue < 8) {
795 /* vector goes into low byte of register */
796 ivar = ivar & 0xFFFFFF00;
797 ivar |= msix_vector | E1000_IVAR_VALID;
798 } else {
799 /* vector goes into third byte of register */
800 ivar = ivar & 0xFF00FFFF;
801 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
802 }
803 array_wr32(E1000_IVAR0, index, ivar);
804 }
805 if (tx_queue > IGB_N0_QUEUE) {
806 index = (tx_queue & 0x7);
807 ivar = array_rd32(E1000_IVAR0, index);
808 if (tx_queue < 8) {
809 /* vector goes into second byte of register */
810 ivar = ivar & 0xFFFF00FF;
811 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
812 } else {
813 /* vector goes into high byte of register */
814 ivar = ivar & 0x00FFFFFF;
815 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
816 }
817 array_wr32(E1000_IVAR0, index, ivar);
818 }
819 q_vector->eims_value = 1 << msix_vector;
820 break;
821 case e1000_82580:
822 case e1000_i350:
823 /* 82580 uses the same table-based approach as 82576 but has fewer
824 entries as a result we carry over for queues greater than 4. */
825 if (rx_queue > IGB_N0_QUEUE) {
826 index = (rx_queue >> 1);
827 ivar = array_rd32(E1000_IVAR0, index);
828 if (rx_queue & 0x1) {
829 /* vector goes into third byte of register */
830 ivar = ivar & 0xFF00FFFF;
831 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
832 } else {
833 /* vector goes into low byte of register */
834 ivar = ivar & 0xFFFFFF00;
835 ivar |= msix_vector | E1000_IVAR_VALID;
836 }
837 array_wr32(E1000_IVAR0, index, ivar);
838 }
839 if (tx_queue > IGB_N0_QUEUE) {
840 index = (tx_queue >> 1);
841 ivar = array_rd32(E1000_IVAR0, index);
842 if (tx_queue & 0x1) {
843 /* vector goes into high byte of register */
844 ivar = ivar & 0x00FFFFFF;
845 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
846 } else {
847 /* vector goes into second byte of register */
848 ivar = ivar & 0xFFFF00FF;
849 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
850 }
851 array_wr32(E1000_IVAR0, index, ivar);
852 }
853 q_vector->eims_value = 1 << msix_vector;
854 break;
855 default:
856 BUG();
857 break;
858 }
859
860 /* add q_vector eims value to global eims_enable_mask */
861 adapter->eims_enable_mask |= q_vector->eims_value;
862
863 /* configure q_vector to set itr on first interrupt */
864 q_vector->set_itr = 1;
865 }
866
867 /**
868 * igb_configure_msix - Configure MSI-X hardware
869 *
870 * igb_configure_msix sets up the hardware to properly
871 * generate MSI-X interrupts.
872 **/
873 static void igb_configure_msix(struct igb_adapter *adapter)
874 {
875 u32 tmp;
876 int i, vector = 0;
877 struct e1000_hw *hw = &adapter->hw;
878
879 adapter->eims_enable_mask = 0;
880
881 /* set vector for other causes, i.e. link changes */
882 switch (hw->mac.type) {
883 case e1000_82575:
884 tmp = rd32(E1000_CTRL_EXT);
885 /* enable MSI-X PBA support*/
886 tmp |= E1000_CTRL_EXT_PBA_CLR;
887
888 /* Auto-Mask interrupts upon ICR read. */
889 tmp |= E1000_CTRL_EXT_EIAME;
890 tmp |= E1000_CTRL_EXT_IRCA;
891
892 wr32(E1000_CTRL_EXT, tmp);
893
894 /* enable msix_other interrupt */
895 array_wr32(E1000_MSIXBM(0), vector++,
896 E1000_EIMS_OTHER);
897 adapter->eims_other = E1000_EIMS_OTHER;
898
899 break;
900
901 case e1000_82576:
902 case e1000_82580:
903 case e1000_i350:
904 /* Turn on MSI-X capability first, or our settings
905 * won't stick. And it will take days to debug. */
906 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
907 E1000_GPIE_PBA | E1000_GPIE_EIAME |
908 E1000_GPIE_NSICR);
909
910 /* enable msix_other interrupt */
911 adapter->eims_other = 1 << vector;
912 tmp = (vector++ | E1000_IVAR_VALID) << 8;
913
914 wr32(E1000_IVAR_MISC, tmp);
915 break;
916 default:
917 /* do nothing, since nothing else supports MSI-X */
918 break;
919 } /* switch (hw->mac.type) */
920
921 adapter->eims_enable_mask |= adapter->eims_other;
922
923 for (i = 0; i < adapter->num_q_vectors; i++)
924 igb_assign_vector(adapter->q_vector[i], vector++);
925
926 wrfl();
927 }
928
929 /**
930 * igb_request_msix - Initialize MSI-X interrupts
931 *
932 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
933 * kernel.
934 **/
935 static int igb_request_msix(struct igb_adapter *adapter)
936 {
937 struct net_device *netdev = adapter->netdev;
938 struct e1000_hw *hw = &adapter->hw;
939 int i, err = 0, vector = 0;
940
941 err = request_irq(adapter->msix_entries[vector].vector,
942 igb_msix_other, 0, netdev->name, adapter);
943 if (err)
944 goto out;
945 vector++;
946
947 for (i = 0; i < adapter->num_q_vectors; i++) {
948 struct igb_q_vector *q_vector = adapter->q_vector[i];
949
950 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
951
952 if (q_vector->rx_ring && q_vector->tx_ring)
953 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
954 q_vector->rx_ring->queue_index);
955 else if (q_vector->tx_ring)
956 sprintf(q_vector->name, "%s-tx-%u", netdev->name,
957 q_vector->tx_ring->queue_index);
958 else if (q_vector->rx_ring)
959 sprintf(q_vector->name, "%s-rx-%u", netdev->name,
960 q_vector->rx_ring->queue_index);
961 else
962 sprintf(q_vector->name, "%s-unused", netdev->name);
963
964 err = request_irq(adapter->msix_entries[vector].vector,
965 igb_msix_ring, 0, q_vector->name,
966 q_vector);
967 if (err)
968 goto out;
969 vector++;
970 }
971
972 igb_configure_msix(adapter);
973 return 0;
974 out:
975 return err;
976 }
977
978 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
979 {
980 if (adapter->msix_entries) {
981 pci_disable_msix(adapter->pdev);
982 kfree(adapter->msix_entries);
983 adapter->msix_entries = NULL;
984 } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
985 pci_disable_msi(adapter->pdev);
986 }
987 }
988
989 /**
990 * igb_free_q_vectors - Free memory allocated for interrupt vectors
991 * @adapter: board private structure to initialize
992 *
993 * This function frees the memory allocated to the q_vectors. In addition if
994 * NAPI is enabled it will delete any references to the NAPI struct prior
995 * to freeing the q_vector.
996 **/
997 static void igb_free_q_vectors(struct igb_adapter *adapter)
998 {
999 int v_idx;
1000
1001 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1002 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1003 adapter->q_vector[v_idx] = NULL;
1004 if (!q_vector)
1005 continue;
1006 netif_napi_del(&q_vector->napi);
1007 kfree(q_vector);
1008 }
1009 adapter->num_q_vectors = 0;
1010 }
1011
1012 /**
1013 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
1014 *
1015 * This function resets the device so that it has 0 rx queues, tx queues, and
1016 * MSI-X interrupts allocated.
1017 */
1018 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
1019 {
1020 igb_free_queues(adapter);
1021 igb_free_q_vectors(adapter);
1022 igb_reset_interrupt_capability(adapter);
1023 }
1024
1025 /**
1026 * igb_set_interrupt_capability - set MSI or MSI-X if supported
1027 *
1028 * Attempt to configure interrupts using the best available
1029 * capabilities of the hardware and kernel.
1030 **/
1031 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1032 {
1033 int err;
1034 int numvecs, i;
1035
1036 /* Number of supported queues. */
1037 adapter->num_rx_queues = adapter->rss_queues;
1038 if (adapter->vfs_allocated_count)
1039 adapter->num_tx_queues = 1;
1040 else
1041 adapter->num_tx_queues = adapter->rss_queues;
1042
1043 /* start with one vector for every rx queue */
1044 numvecs = adapter->num_rx_queues;
1045
1046 /* if tx handler is separate add 1 for every tx queue */
1047 if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1048 numvecs += adapter->num_tx_queues;
1049
1050 /* store the number of vectors reserved for queues */
1051 adapter->num_q_vectors = numvecs;
1052
1053 /* add 1 vector for link status interrupts */
1054 numvecs++;
1055 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1056 GFP_KERNEL);
1057 if (!adapter->msix_entries)
1058 goto msi_only;
1059
1060 for (i = 0; i < numvecs; i++)
1061 adapter->msix_entries[i].entry = i;
1062
1063 err = pci_enable_msix(adapter->pdev,
1064 adapter->msix_entries,
1065 numvecs);
1066 if (err == 0)
1067 goto out;
1068
1069 igb_reset_interrupt_capability(adapter);
1070
1071 /* If we can't do MSI-X, try MSI */
1072 msi_only:
1073 #ifdef CONFIG_PCI_IOV
1074 /* disable SR-IOV for non MSI-X configurations */
1075 if (adapter->vf_data) {
1076 struct e1000_hw *hw = &adapter->hw;
1077 /* disable iov and allow time for transactions to clear */
1078 pci_disable_sriov(adapter->pdev);
1079 msleep(500);
1080
1081 kfree(adapter->vf_data);
1082 adapter->vf_data = NULL;
1083 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1084 wrfl();
1085 msleep(100);
1086 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1087 }
1088 #endif
1089 adapter->vfs_allocated_count = 0;
1090 adapter->rss_queues = 1;
1091 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1092 adapter->num_rx_queues = 1;
1093 adapter->num_tx_queues = 1;
1094 adapter->num_q_vectors = 1;
1095 if (!pci_enable_msi(adapter->pdev))
1096 adapter->flags |= IGB_FLAG_HAS_MSI;
1097 out:
1098 /* Notify the stack of the (possibly) reduced queue counts. */
1099 netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1100 return netif_set_real_num_rx_queues(adapter->netdev,
1101 adapter->num_rx_queues);
1102 }
1103
1104 /**
1105 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1106 * @adapter: board private structure to initialize
1107 *
1108 * We allocate one q_vector per queue interrupt. If allocation fails we
1109 * return -ENOMEM.
1110 **/
1111 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1112 {
1113 struct igb_q_vector *q_vector;
1114 struct e1000_hw *hw = &adapter->hw;
1115 int v_idx;
1116 int orig_node = adapter->node;
1117
1118 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1119 if ((adapter->num_q_vectors == (adapter->num_rx_queues +
1120 adapter->num_tx_queues)) &&
1121 (adapter->num_rx_queues == v_idx))
1122 adapter->node = orig_node;
1123 if (orig_node == -1) {
1124 int cur_node = next_online_node(adapter->node);
1125 if (cur_node == MAX_NUMNODES)
1126 cur_node = first_online_node;
1127 adapter->node = cur_node;
1128 }
1129 q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL,
1130 adapter->node);
1131 if (!q_vector)
1132 q_vector = kzalloc(sizeof(struct igb_q_vector),
1133 GFP_KERNEL);
1134 if (!q_vector)
1135 goto err_out;
1136 q_vector->adapter = adapter;
1137 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1138 q_vector->itr_val = IGB_START_ITR;
1139 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1140 adapter->q_vector[v_idx] = q_vector;
1141 }
1142 /* Restore the adapter's original node */
1143 adapter->node = orig_node;
1144
1145 return 0;
1146
1147 err_out:
1148 /* Restore the adapter's original node */
1149 adapter->node = orig_node;
1150 igb_free_q_vectors(adapter);
1151 return -ENOMEM;
1152 }
1153
1154 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1155 int ring_idx, int v_idx)
1156 {
1157 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1158
1159 q_vector->rx_ring = adapter->rx_ring[ring_idx];
1160 q_vector->rx_ring->q_vector = q_vector;
1161 q_vector->itr_val = adapter->rx_itr_setting;
1162 if (q_vector->itr_val && q_vector->itr_val <= 3)
1163 q_vector->itr_val = IGB_START_ITR;
1164 }
1165
1166 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1167 int ring_idx, int v_idx)
1168 {
1169 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1170
1171 q_vector->tx_ring = adapter->tx_ring[ring_idx];
1172 q_vector->tx_ring->q_vector = q_vector;
1173 q_vector->itr_val = adapter->tx_itr_setting;
1174 q_vector->tx_work_limit = adapter->tx_work_limit;
1175 if (q_vector->itr_val && q_vector->itr_val <= 3)
1176 q_vector->itr_val = IGB_START_ITR;
1177 }
1178
1179 /**
1180 * igb_map_ring_to_vector - maps allocated queues to vectors
1181 *
1182 * This function maps the recently allocated queues to vectors.
1183 **/
1184 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1185 {
1186 int i;
1187 int v_idx = 0;
1188
1189 if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1190 (adapter->num_q_vectors < adapter->num_tx_queues))
1191 return -ENOMEM;
1192
1193 if (adapter->num_q_vectors >=
1194 (adapter->num_rx_queues + adapter->num_tx_queues)) {
1195 for (i = 0; i < adapter->num_rx_queues; i++)
1196 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1197 for (i = 0; i < adapter->num_tx_queues; i++)
1198 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1199 } else {
1200 for (i = 0; i < adapter->num_rx_queues; i++) {
1201 if (i < adapter->num_tx_queues)
1202 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1203 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1204 }
1205 for (; i < adapter->num_tx_queues; i++)
1206 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1207 }
1208 return 0;
1209 }
1210
1211 /**
1212 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1213 *
1214 * This function initializes the interrupts and allocates all of the queues.
1215 **/
1216 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1217 {
1218 struct pci_dev *pdev = adapter->pdev;
1219 int err;
1220
1221 err = igb_set_interrupt_capability(adapter);
1222 if (err)
1223 return err;
1224
1225 err = igb_alloc_q_vectors(adapter);
1226 if (err) {
1227 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1228 goto err_alloc_q_vectors;
1229 }
1230
1231 err = igb_alloc_queues(adapter);
1232 if (err) {
1233 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1234 goto err_alloc_queues;
1235 }
1236
1237 err = igb_map_ring_to_vector(adapter);
1238 if (err) {
1239 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1240 goto err_map_queues;
1241 }
1242
1243
1244 return 0;
1245 err_map_queues:
1246 igb_free_queues(adapter);
1247 err_alloc_queues:
1248 igb_free_q_vectors(adapter);
1249 err_alloc_q_vectors:
1250 igb_reset_interrupt_capability(adapter);
1251 return err;
1252 }
1253
1254 /**
1255 * igb_request_irq - initialize interrupts
1256 *
1257 * Attempts to configure interrupts using the best available
1258 * capabilities of the hardware and kernel.
1259 **/
1260 static int igb_request_irq(struct igb_adapter *adapter)
1261 {
1262 struct net_device *netdev = adapter->netdev;
1263 struct pci_dev *pdev = adapter->pdev;
1264 int err = 0;
1265
1266 if (adapter->msix_entries) {
1267 err = igb_request_msix(adapter);
1268 if (!err)
1269 goto request_done;
1270 /* fall back to MSI */
1271 igb_clear_interrupt_scheme(adapter);
1272 if (!pci_enable_msi(adapter->pdev))
1273 adapter->flags |= IGB_FLAG_HAS_MSI;
1274 igb_free_all_tx_resources(adapter);
1275 igb_free_all_rx_resources(adapter);
1276 adapter->num_tx_queues = 1;
1277 adapter->num_rx_queues = 1;
1278 adapter->num_q_vectors = 1;
1279 err = igb_alloc_q_vectors(adapter);
1280 if (err) {
1281 dev_err(&pdev->dev,
1282 "Unable to allocate memory for vectors\n");
1283 goto request_done;
1284 }
1285 err = igb_alloc_queues(adapter);
1286 if (err) {
1287 dev_err(&pdev->dev,
1288 "Unable to allocate memory for queues\n");
1289 igb_free_q_vectors(adapter);
1290 goto request_done;
1291 }
1292 igb_setup_all_tx_resources(adapter);
1293 igb_setup_all_rx_resources(adapter);
1294 } else {
1295 igb_assign_vector(adapter->q_vector[0], 0);
1296 }
1297
1298 if (adapter->flags & IGB_FLAG_HAS_MSI) {
1299 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
1300 netdev->name, adapter);
1301 if (!err)
1302 goto request_done;
1303
1304 /* fall back to legacy interrupts */
1305 igb_reset_interrupt_capability(adapter);
1306 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1307 }
1308
1309 err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
1310 netdev->name, adapter);
1311
1312 if (err)
1313 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
1314 err);
1315
1316 request_done:
1317 return err;
1318 }
1319
1320 static void igb_free_irq(struct igb_adapter *adapter)
1321 {
1322 if (adapter->msix_entries) {
1323 int vector = 0, i;
1324
1325 free_irq(adapter->msix_entries[vector++].vector, adapter);
1326
1327 for (i = 0; i < adapter->num_q_vectors; i++) {
1328 struct igb_q_vector *q_vector = adapter->q_vector[i];
1329 free_irq(adapter->msix_entries[vector++].vector,
1330 q_vector);
1331 }
1332 } else {
1333 free_irq(adapter->pdev->irq, adapter);
1334 }
1335 }
1336
1337 /**
1338 * igb_irq_disable - Mask off interrupt generation on the NIC
1339 * @adapter: board private structure
1340 **/
1341 static void igb_irq_disable(struct igb_adapter *adapter)
1342 {
1343 struct e1000_hw *hw = &adapter->hw;
1344
1345 /*
1346 * we need to be careful when disabling interrupts. The VFs are also
1347 * mapped into these registers and so clearing the bits can cause
1348 * issues on the VF drivers so we only need to clear what we set
1349 */
1350 if (adapter->msix_entries) {
1351 u32 regval = rd32(E1000_EIAM);
1352 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1353 wr32(E1000_EIMC, adapter->eims_enable_mask);
1354 regval = rd32(E1000_EIAC);
1355 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1356 }
1357
1358 wr32(E1000_IAM, 0);
1359 wr32(E1000_IMC, ~0);
1360 wrfl();
1361 if (adapter->msix_entries) {
1362 int i;
1363 for (i = 0; i < adapter->num_q_vectors; i++)
1364 synchronize_irq(adapter->msix_entries[i].vector);
1365 } else {
1366 synchronize_irq(adapter->pdev->irq);
1367 }
1368 }
1369
1370 /**
1371 * igb_irq_enable - Enable default interrupt generation settings
1372 * @adapter: board private structure
1373 **/
1374 static void igb_irq_enable(struct igb_adapter *adapter)
1375 {
1376 struct e1000_hw *hw = &adapter->hw;
1377
1378 if (adapter->msix_entries) {
1379 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
1380 u32 regval = rd32(E1000_EIAC);
1381 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1382 regval = rd32(E1000_EIAM);
1383 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1384 wr32(E1000_EIMS, adapter->eims_enable_mask);
1385 if (adapter->vfs_allocated_count) {
1386 wr32(E1000_MBVFIMR, 0xFF);
1387 ims |= E1000_IMS_VMMB;
1388 }
1389 if (adapter->hw.mac.type == e1000_82580)
1390 ims |= E1000_IMS_DRSTA;
1391
1392 wr32(E1000_IMS, ims);
1393 } else {
1394 wr32(E1000_IMS, IMS_ENABLE_MASK |
1395 E1000_IMS_DRSTA);
1396 wr32(E1000_IAM, IMS_ENABLE_MASK |
1397 E1000_IMS_DRSTA);
1398 }
1399 }
1400
1401 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1402 {
1403 struct e1000_hw *hw = &adapter->hw;
1404 u16 vid = adapter->hw.mng_cookie.vlan_id;
1405 u16 old_vid = adapter->mng_vlan_id;
1406
1407 if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1408 /* add VID to filter table */
1409 igb_vfta_set(hw, vid, true);
1410 adapter->mng_vlan_id = vid;
1411 } else {
1412 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1413 }
1414
1415 if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1416 (vid != old_vid) &&
1417 !test_bit(old_vid, adapter->active_vlans)) {
1418 /* remove VID from filter table */
1419 igb_vfta_set(hw, old_vid, false);
1420 }
1421 }
1422
1423 /**
1424 * igb_release_hw_control - release control of the h/w to f/w
1425 * @adapter: address of board private structure
1426 *
1427 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1428 * For ASF and Pass Through versions of f/w this means that the
1429 * driver is no longer loaded.
1430 *
1431 **/
1432 static void igb_release_hw_control(struct igb_adapter *adapter)
1433 {
1434 struct e1000_hw *hw = &adapter->hw;
1435 u32 ctrl_ext;
1436
1437 /* Let firmware take over control of h/w */
1438 ctrl_ext = rd32(E1000_CTRL_EXT);
1439 wr32(E1000_CTRL_EXT,
1440 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1441 }
1442
1443 /**
1444 * igb_get_hw_control - get control of the h/w from f/w
1445 * @adapter: address of board private structure
1446 *
1447 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1448 * For ASF and Pass Through versions of f/w this means that
1449 * the driver is loaded.
1450 *
1451 **/
1452 static void igb_get_hw_control(struct igb_adapter *adapter)
1453 {
1454 struct e1000_hw *hw = &adapter->hw;
1455 u32 ctrl_ext;
1456
1457 /* Let firmware know the driver has taken over */
1458 ctrl_ext = rd32(E1000_CTRL_EXT);
1459 wr32(E1000_CTRL_EXT,
1460 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1461 }
1462
1463 /**
1464 * igb_configure - configure the hardware for RX and TX
1465 * @adapter: private board structure
1466 **/
1467 static void igb_configure(struct igb_adapter *adapter)
1468 {
1469 struct net_device *netdev = adapter->netdev;
1470 int i;
1471
1472 igb_get_hw_control(adapter);
1473 igb_set_rx_mode(netdev);
1474
1475 igb_restore_vlan(adapter);
1476
1477 igb_setup_tctl(adapter);
1478 igb_setup_mrqc(adapter);
1479 igb_setup_rctl(adapter);
1480
1481 igb_configure_tx(adapter);
1482 igb_configure_rx(adapter);
1483
1484 igb_rx_fifo_flush_82575(&adapter->hw);
1485
1486 /* call igb_desc_unused which always leaves
1487 * at least 1 descriptor unused to make sure
1488 * next_to_use != next_to_clean */
1489 for (i = 0; i < adapter->num_rx_queues; i++) {
1490 struct igb_ring *ring = adapter->rx_ring[i];
1491 igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1492 }
1493 }
1494
1495 /**
1496 * igb_power_up_link - Power up the phy/serdes link
1497 * @adapter: address of board private structure
1498 **/
1499 void igb_power_up_link(struct igb_adapter *adapter)
1500 {
1501 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1502 igb_power_up_phy_copper(&adapter->hw);
1503 else
1504 igb_power_up_serdes_link_82575(&adapter->hw);
1505 }
1506
1507 /**
1508 * igb_power_down_link - Power down the phy/serdes link
1509 * @adapter: address of board private structure
1510 */
1511 static void igb_power_down_link(struct igb_adapter *adapter)
1512 {
1513 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1514 igb_power_down_phy_copper_82575(&adapter->hw);
1515 else
1516 igb_shutdown_serdes_link_82575(&adapter->hw);
1517 }
1518
1519 /**
1520 * igb_up - Open the interface and prepare it to handle traffic
1521 * @adapter: board private structure
1522 **/
1523 int igb_up(struct igb_adapter *adapter)
1524 {
1525 struct e1000_hw *hw = &adapter->hw;
1526 int i;
1527
1528 /* hardware has been reset, we need to reload some things */
1529 igb_configure(adapter);
1530
1531 clear_bit(__IGB_DOWN, &adapter->state);
1532
1533 for (i = 0; i < adapter->num_q_vectors; i++) {
1534 struct igb_q_vector *q_vector = adapter->q_vector[i];
1535 napi_enable(&q_vector->napi);
1536 }
1537 if (adapter->msix_entries)
1538 igb_configure_msix(adapter);
1539 else
1540 igb_assign_vector(adapter->q_vector[0], 0);
1541
1542 /* Clear any pending interrupts. */
1543 rd32(E1000_ICR);
1544 igb_irq_enable(adapter);
1545
1546 /* notify VFs that reset has been completed */
1547 if (adapter->vfs_allocated_count) {
1548 u32 reg_data = rd32(E1000_CTRL_EXT);
1549 reg_data |= E1000_CTRL_EXT_PFRSTD;
1550 wr32(E1000_CTRL_EXT, reg_data);
1551 }
1552
1553 netif_tx_start_all_queues(adapter->netdev);
1554
1555 /* start the watchdog. */
1556 hw->mac.get_link_status = 1;
1557 schedule_work(&adapter->watchdog_task);
1558
1559 return 0;
1560 }
1561
1562 void igb_down(struct igb_adapter *adapter)
1563 {
1564 struct net_device *netdev = adapter->netdev;
1565 struct e1000_hw *hw = &adapter->hw;
1566 u32 tctl, rctl;
1567 int i;
1568
1569 /* signal that we're down so the interrupt handler does not
1570 * reschedule our watchdog timer */
1571 set_bit(__IGB_DOWN, &adapter->state);
1572
1573 /* disable receives in the hardware */
1574 rctl = rd32(E1000_RCTL);
1575 wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1576 /* flush and sleep below */
1577
1578 netif_tx_stop_all_queues(netdev);
1579
1580 /* disable transmits in the hardware */
1581 tctl = rd32(E1000_TCTL);
1582 tctl &= ~E1000_TCTL_EN;
1583 wr32(E1000_TCTL, tctl);
1584 /* flush both disables and wait for them to finish */
1585 wrfl();
1586 msleep(10);
1587
1588 for (i = 0; i < adapter->num_q_vectors; i++) {
1589 struct igb_q_vector *q_vector = adapter->q_vector[i];
1590 napi_disable(&q_vector->napi);
1591 }
1592
1593 igb_irq_disable(adapter);
1594
1595 del_timer_sync(&adapter->watchdog_timer);
1596 del_timer_sync(&adapter->phy_info_timer);
1597
1598 netif_carrier_off(netdev);
1599
1600 /* record the stats before reset*/
1601 spin_lock(&adapter->stats64_lock);
1602 igb_update_stats(adapter, &adapter->stats64);
1603 spin_unlock(&adapter->stats64_lock);
1604
1605 adapter->link_speed = 0;
1606 adapter->link_duplex = 0;
1607
1608 if (!pci_channel_offline(adapter->pdev))
1609 igb_reset(adapter);
1610 igb_clean_all_tx_rings(adapter);
1611 igb_clean_all_rx_rings(adapter);
1612 #ifdef CONFIG_IGB_DCA
1613
1614 /* since we reset the hardware DCA settings were cleared */
1615 igb_setup_dca(adapter);
1616 #endif
1617 }
1618
1619 void igb_reinit_locked(struct igb_adapter *adapter)
1620 {
1621 WARN_ON(in_interrupt());
1622 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1623 msleep(1);
1624 igb_down(adapter);
1625 igb_up(adapter);
1626 clear_bit(__IGB_RESETTING, &adapter->state);
1627 }
1628
1629 void igb_reset(struct igb_adapter *adapter)
1630 {
1631 struct pci_dev *pdev = adapter->pdev;
1632 struct e1000_hw *hw = &adapter->hw;
1633 struct e1000_mac_info *mac = &hw->mac;
1634 struct e1000_fc_info *fc = &hw->fc;
1635 u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1636 u16 hwm;
1637
1638 /* Repartition Pba for greater than 9k mtu
1639 * To take effect CTRL.RST is required.
1640 */
1641 switch (mac->type) {
1642 case e1000_i350:
1643 case e1000_82580:
1644 pba = rd32(E1000_RXPBS);
1645 pba = igb_rxpbs_adjust_82580(pba);
1646 break;
1647 case e1000_82576:
1648 pba = rd32(E1000_RXPBS);
1649 pba &= E1000_RXPBS_SIZE_MASK_82576;
1650 break;
1651 case e1000_82575:
1652 default:
1653 pba = E1000_PBA_34K;
1654 break;
1655 }
1656
1657 if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1658 (mac->type < e1000_82576)) {
1659 /* adjust PBA for jumbo frames */
1660 wr32(E1000_PBA, pba);
1661
1662 /* To maintain wire speed transmits, the Tx FIFO should be
1663 * large enough to accommodate two full transmit packets,
1664 * rounded up to the next 1KB and expressed in KB. Likewise,
1665 * the Rx FIFO should be large enough to accommodate at least
1666 * one full receive packet and is similarly rounded up and
1667 * expressed in KB. */
1668 pba = rd32(E1000_PBA);
1669 /* upper 16 bits has Tx packet buffer allocation size in KB */
1670 tx_space = pba >> 16;
1671 /* lower 16 bits has Rx packet buffer allocation size in KB */
1672 pba &= 0xffff;
1673 /* the tx fifo also stores 16 bytes of information about the tx
1674 * but don't include ethernet FCS because hardware appends it */
1675 min_tx_space = (adapter->max_frame_size +
1676 sizeof(union e1000_adv_tx_desc) -
1677 ETH_FCS_LEN) * 2;
1678 min_tx_space = ALIGN(min_tx_space, 1024);
1679 min_tx_space >>= 10;
1680 /* software strips receive CRC, so leave room for it */
1681 min_rx_space = adapter->max_frame_size;
1682 min_rx_space = ALIGN(min_rx_space, 1024);
1683 min_rx_space >>= 10;
1684
1685 /* If current Tx allocation is less than the min Tx FIFO size,
1686 * and the min Tx FIFO size is less than the current Rx FIFO
1687 * allocation, take space away from current Rx allocation */
1688 if (tx_space < min_tx_space &&
1689 ((min_tx_space - tx_space) < pba)) {
1690 pba = pba - (min_tx_space - tx_space);
1691
1692 /* if short on rx space, rx wins and must trump tx
1693 * adjustment */
1694 if (pba < min_rx_space)
1695 pba = min_rx_space;
1696 }
1697 wr32(E1000_PBA, pba);
1698 }
1699
1700 /* flow control settings */
1701 /* The high water mark must be low enough to fit one full frame
1702 * (or the size used for early receive) above it in the Rx FIFO.
1703 * Set it to the lower of:
1704 * - 90% of the Rx FIFO size, or
1705 * - the full Rx FIFO size minus one full frame */
1706 hwm = min(((pba << 10) * 9 / 10),
1707 ((pba << 10) - 2 * adapter->max_frame_size));
1708
1709 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
1710 fc->low_water = fc->high_water - 16;
1711 fc->pause_time = 0xFFFF;
1712 fc->send_xon = 1;
1713 fc->current_mode = fc->requested_mode;
1714
1715 /* disable receive for all VFs and wait one second */
1716 if (adapter->vfs_allocated_count) {
1717 int i;
1718 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1719 adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1720
1721 /* ping all the active vfs to let them know we are going down */
1722 igb_ping_all_vfs(adapter);
1723
1724 /* disable transmits and receives */
1725 wr32(E1000_VFRE, 0);
1726 wr32(E1000_VFTE, 0);
1727 }
1728
1729 /* Allow time for pending master requests to run */
1730 hw->mac.ops.reset_hw(hw);
1731 wr32(E1000_WUC, 0);
1732
1733 if (hw->mac.ops.init_hw(hw))
1734 dev_err(&pdev->dev, "Hardware Error\n");
1735 if (hw->mac.type > e1000_82580) {
1736 if (adapter->flags & IGB_FLAG_DMAC) {
1737 u32 reg;
1738
1739 /*
1740 * DMA Coalescing high water mark needs to be higher
1741 * than * the * Rx threshold. The Rx threshold is
1742 * currently * pba - 6, so we * should use a high water
1743 * mark of pba * - 4. */
1744 hwm = (pba - 4) << 10;
1745
1746 reg = (((pba-6) << E1000_DMACR_DMACTHR_SHIFT)
1747 & E1000_DMACR_DMACTHR_MASK);
1748
1749 /* transition to L0x or L1 if available..*/
1750 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
1751
1752 /* watchdog timer= +-1000 usec in 32usec intervals */
1753 reg |= (1000 >> 5);
1754 wr32(E1000_DMACR, reg);
1755
1756 /* no lower threshold to disable coalescing(smart fifb)
1757 * -UTRESH=0*/
1758 wr32(E1000_DMCRTRH, 0);
1759
1760 /* set hwm to PBA - 2 * max frame size */
1761 wr32(E1000_FCRTC, hwm);
1762
1763 /*
1764 * This sets the time to wait before requesting tran-
1765 * sition to * low power state to number of usecs needed
1766 * to receive 1 512 * byte frame at gigabit line rate
1767 */
1768 reg = rd32(E1000_DMCTLX);
1769 reg |= IGB_DMCTLX_DCFLUSH_DIS;
1770
1771 /* Delay 255 usec before entering Lx state. */
1772 reg |= 0xFF;
1773 wr32(E1000_DMCTLX, reg);
1774
1775 /* free space in Tx packet buffer to wake from DMAC */
1776 wr32(E1000_DMCTXTH,
1777 (IGB_MIN_TXPBSIZE -
1778 (IGB_TX_BUF_4096 + adapter->max_frame_size))
1779 >> 6);
1780
1781 /* make low power state decision controlled by DMAC */
1782 reg = rd32(E1000_PCIEMISC);
1783 reg |= E1000_PCIEMISC_LX_DECISION;
1784 wr32(E1000_PCIEMISC, reg);
1785 } /* end if IGB_FLAG_DMAC set */
1786 }
1787 if (hw->mac.type == e1000_82580) {
1788 u32 reg = rd32(E1000_PCIEMISC);
1789 wr32(E1000_PCIEMISC,
1790 reg & ~E1000_PCIEMISC_LX_DECISION);
1791 }
1792 if (!netif_running(adapter->netdev))
1793 igb_power_down_link(adapter);
1794
1795 igb_update_mng_vlan(adapter);
1796
1797 /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1798 wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1799
1800 igb_get_phy_info(hw);
1801 }
1802
1803 static u32 igb_fix_features(struct net_device *netdev, u32 features)
1804 {
1805 /*
1806 * Since there is no support for separate rx/tx vlan accel
1807 * enable/disable make sure tx flag is always in same state as rx.
1808 */
1809 if (features & NETIF_F_HW_VLAN_RX)
1810 features |= NETIF_F_HW_VLAN_TX;
1811 else
1812 features &= ~NETIF_F_HW_VLAN_TX;
1813
1814 return features;
1815 }
1816
1817 static int igb_set_features(struct net_device *netdev, u32 features)
1818 {
1819 struct igb_adapter *adapter = netdev_priv(netdev);
1820 int i;
1821 u32 changed = netdev->features ^ features;
1822
1823 for (i = 0; i < adapter->num_rx_queues; i++) {
1824 if (features & NETIF_F_RXCSUM)
1825 adapter->rx_ring[i]->flags |= IGB_RING_FLAG_RX_CSUM;
1826 else
1827 adapter->rx_ring[i]->flags &= ~IGB_RING_FLAG_RX_CSUM;
1828 }
1829
1830 if (changed & NETIF_F_HW_VLAN_RX)
1831 igb_vlan_mode(netdev, features);
1832
1833 return 0;
1834 }
1835
1836 static const struct net_device_ops igb_netdev_ops = {
1837 .ndo_open = igb_open,
1838 .ndo_stop = igb_close,
1839 .ndo_start_xmit = igb_xmit_frame,
1840 .ndo_get_stats64 = igb_get_stats64,
1841 .ndo_set_rx_mode = igb_set_rx_mode,
1842 .ndo_set_mac_address = igb_set_mac,
1843 .ndo_change_mtu = igb_change_mtu,
1844 .ndo_do_ioctl = igb_ioctl,
1845 .ndo_tx_timeout = igb_tx_timeout,
1846 .ndo_validate_addr = eth_validate_addr,
1847 .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid,
1848 .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid,
1849 .ndo_set_vf_mac = igb_ndo_set_vf_mac,
1850 .ndo_set_vf_vlan = igb_ndo_set_vf_vlan,
1851 .ndo_set_vf_tx_rate = igb_ndo_set_vf_bw,
1852 .ndo_get_vf_config = igb_ndo_get_vf_config,
1853 #ifdef CONFIG_NET_POLL_CONTROLLER
1854 .ndo_poll_controller = igb_netpoll,
1855 #endif
1856 .ndo_fix_features = igb_fix_features,
1857 .ndo_set_features = igb_set_features,
1858 };
1859
1860 /**
1861 * igb_probe - Device Initialization Routine
1862 * @pdev: PCI device information struct
1863 * @ent: entry in igb_pci_tbl
1864 *
1865 * Returns 0 on success, negative on failure
1866 *
1867 * igb_probe initializes an adapter identified by a pci_dev structure.
1868 * The OS initialization, configuring of the adapter private structure,
1869 * and a hardware reset occur.
1870 **/
1871 static int __devinit igb_probe(struct pci_dev *pdev,
1872 const struct pci_device_id *ent)
1873 {
1874 struct net_device *netdev;
1875 struct igb_adapter *adapter;
1876 struct e1000_hw *hw;
1877 u16 eeprom_data = 0;
1878 s32 ret_val;
1879 static int global_quad_port_a; /* global quad port a indication */
1880 const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1881 unsigned long mmio_start, mmio_len;
1882 int err, pci_using_dac;
1883 u16 eeprom_apme_mask = IGB_EEPROM_APME;
1884 u8 part_str[E1000_PBANUM_LENGTH];
1885
1886 /* Catch broken hardware that put the wrong VF device ID in
1887 * the PCIe SR-IOV capability.
1888 */
1889 if (pdev->is_virtfn) {
1890 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1891 pci_name(pdev), pdev->vendor, pdev->device);
1892 return -EINVAL;
1893 }
1894
1895 err = pci_enable_device_mem(pdev);
1896 if (err)
1897 return err;
1898
1899 pci_using_dac = 0;
1900 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1901 if (!err) {
1902 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1903 if (!err)
1904 pci_using_dac = 1;
1905 } else {
1906 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1907 if (err) {
1908 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1909 if (err) {
1910 dev_err(&pdev->dev, "No usable DMA "
1911 "configuration, aborting\n");
1912 goto err_dma;
1913 }
1914 }
1915 }
1916
1917 err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1918 IORESOURCE_MEM),
1919 igb_driver_name);
1920 if (err)
1921 goto err_pci_reg;
1922
1923 pci_enable_pcie_error_reporting(pdev);
1924
1925 pci_set_master(pdev);
1926 pci_save_state(pdev);
1927
1928 err = -ENOMEM;
1929 netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1930 IGB_MAX_TX_QUEUES);
1931 if (!netdev)
1932 goto err_alloc_etherdev;
1933
1934 SET_NETDEV_DEV(netdev, &pdev->dev);
1935
1936 pci_set_drvdata(pdev, netdev);
1937 adapter = netdev_priv(netdev);
1938 adapter->netdev = netdev;
1939 adapter->pdev = pdev;
1940 hw = &adapter->hw;
1941 hw->back = adapter;
1942 adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1943
1944 mmio_start = pci_resource_start(pdev, 0);
1945 mmio_len = pci_resource_len(pdev, 0);
1946
1947 err = -EIO;
1948 hw->hw_addr = ioremap(mmio_start, mmio_len);
1949 if (!hw->hw_addr)
1950 goto err_ioremap;
1951
1952 netdev->netdev_ops = &igb_netdev_ops;
1953 igb_set_ethtool_ops(netdev);
1954 netdev->watchdog_timeo = 5 * HZ;
1955
1956 strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1957
1958 netdev->mem_start = mmio_start;
1959 netdev->mem_end = mmio_start + mmio_len;
1960
1961 /* PCI config space info */
1962 hw->vendor_id = pdev->vendor;
1963 hw->device_id = pdev->device;
1964 hw->revision_id = pdev->revision;
1965 hw->subsystem_vendor_id = pdev->subsystem_vendor;
1966 hw->subsystem_device_id = pdev->subsystem_device;
1967
1968 /* Copy the default MAC, PHY and NVM function pointers */
1969 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1970 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1971 memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1972 /* Initialize skew-specific constants */
1973 err = ei->get_invariants(hw);
1974 if (err)
1975 goto err_sw_init;
1976
1977 /* setup the private structure */
1978 err = igb_sw_init(adapter);
1979 if (err)
1980 goto err_sw_init;
1981
1982 igb_get_bus_info_pcie(hw);
1983
1984 hw->phy.autoneg_wait_to_complete = false;
1985
1986 /* Copper options */
1987 if (hw->phy.media_type == e1000_media_type_copper) {
1988 hw->phy.mdix = AUTO_ALL_MODES;
1989 hw->phy.disable_polarity_correction = false;
1990 hw->phy.ms_type = e1000_ms_hw_default;
1991 }
1992
1993 if (igb_check_reset_block(hw))
1994 dev_info(&pdev->dev,
1995 "PHY reset is blocked due to SOL/IDER session.\n");
1996
1997 netdev->hw_features = NETIF_F_SG |
1998 NETIF_F_IP_CSUM |
1999 NETIF_F_IPV6_CSUM |
2000 NETIF_F_TSO |
2001 NETIF_F_TSO6 |
2002 NETIF_F_RXCSUM |
2003 NETIF_F_HW_VLAN_RX;
2004
2005 netdev->features = netdev->hw_features |
2006 NETIF_F_HW_VLAN_TX |
2007 NETIF_F_HW_VLAN_FILTER;
2008
2009 netdev->vlan_features |= NETIF_F_TSO;
2010 netdev->vlan_features |= NETIF_F_TSO6;
2011 netdev->vlan_features |= NETIF_F_IP_CSUM;
2012 netdev->vlan_features |= NETIF_F_IPV6_CSUM;
2013 netdev->vlan_features |= NETIF_F_SG;
2014
2015 if (pci_using_dac) {
2016 netdev->features |= NETIF_F_HIGHDMA;
2017 netdev->vlan_features |= NETIF_F_HIGHDMA;
2018 }
2019
2020 if (hw->mac.type >= e1000_82576) {
2021 netdev->hw_features |= NETIF_F_SCTP_CSUM;
2022 netdev->features |= NETIF_F_SCTP_CSUM;
2023 }
2024
2025 netdev->priv_flags |= IFF_UNICAST_FLT;
2026
2027 adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
2028
2029 /* before reading the NVM, reset the controller to put the device in a
2030 * known good starting state */
2031 hw->mac.ops.reset_hw(hw);
2032
2033 /* make sure the NVM is good */
2034 if (hw->nvm.ops.validate(hw) < 0) {
2035 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
2036 err = -EIO;
2037 goto err_eeprom;
2038 }
2039
2040 /* copy the MAC address out of the NVM */
2041 if (hw->mac.ops.read_mac_addr(hw))
2042 dev_err(&pdev->dev, "NVM Read Error\n");
2043
2044 memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
2045 memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
2046
2047 if (!is_valid_ether_addr(netdev->perm_addr)) {
2048 dev_err(&pdev->dev, "Invalid MAC Address\n");
2049 err = -EIO;
2050 goto err_eeprom;
2051 }
2052
2053 setup_timer(&adapter->watchdog_timer, igb_watchdog,
2054 (unsigned long) adapter);
2055 setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2056 (unsigned long) adapter);
2057
2058 INIT_WORK(&adapter->reset_task, igb_reset_task);
2059 INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2060
2061 /* Initialize link properties that are user-changeable */
2062 adapter->fc_autoneg = true;
2063 hw->mac.autoneg = true;
2064 hw->phy.autoneg_advertised = 0x2f;
2065
2066 hw->fc.requested_mode = e1000_fc_default;
2067 hw->fc.current_mode = e1000_fc_default;
2068
2069 igb_validate_mdi_setting(hw);
2070
2071 /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2072 * enable the ACPI Magic Packet filter
2073 */
2074
2075 if (hw->bus.func == 0)
2076 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2077 else if (hw->mac.type >= e1000_82580)
2078 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2079 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2080 &eeprom_data);
2081 else if (hw->bus.func == 1)
2082 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2083
2084 if (eeprom_data & eeprom_apme_mask)
2085 adapter->eeprom_wol |= E1000_WUFC_MAG;
2086
2087 /* now that we have the eeprom settings, apply the special cases where
2088 * the eeprom may be wrong or the board simply won't support wake on
2089 * lan on a particular port */
2090 switch (pdev->device) {
2091 case E1000_DEV_ID_82575GB_QUAD_COPPER:
2092 adapter->eeprom_wol = 0;
2093 break;
2094 case E1000_DEV_ID_82575EB_FIBER_SERDES:
2095 case E1000_DEV_ID_82576_FIBER:
2096 case E1000_DEV_ID_82576_SERDES:
2097 /* Wake events only supported on port A for dual fiber
2098 * regardless of eeprom setting */
2099 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2100 adapter->eeprom_wol = 0;
2101 break;
2102 case E1000_DEV_ID_82576_QUAD_COPPER:
2103 case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2104 /* if quad port adapter, disable WoL on all but port A */
2105 if (global_quad_port_a != 0)
2106 adapter->eeprom_wol = 0;
2107 else
2108 adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2109 /* Reset for multiple quad port adapters */
2110 if (++global_quad_port_a == 4)
2111 global_quad_port_a = 0;
2112 break;
2113 }
2114
2115 /* initialize the wol settings based on the eeprom settings */
2116 adapter->wol = adapter->eeprom_wol;
2117 device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2118
2119 /* reset the hardware with the new settings */
2120 igb_reset(adapter);
2121
2122 /* let the f/w know that the h/w is now under the control of the
2123 * driver. */
2124 igb_get_hw_control(adapter);
2125
2126 strcpy(netdev->name, "eth%d");
2127 err = register_netdev(netdev);
2128 if (err)
2129 goto err_register;
2130
2131 igb_vlan_mode(netdev, netdev->features);
2132
2133 /* carrier off reporting is important to ethtool even BEFORE open */
2134 netif_carrier_off(netdev);
2135
2136 #ifdef CONFIG_IGB_DCA
2137 if (dca_add_requester(&pdev->dev) == 0) {
2138 adapter->flags |= IGB_FLAG_DCA_ENABLED;
2139 dev_info(&pdev->dev, "DCA enabled\n");
2140 igb_setup_dca(adapter);
2141 }
2142
2143 #endif
2144 /* do hw tstamp init after resetting */
2145 igb_init_hw_timer(adapter);
2146
2147 dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2148 /* print bus type/speed/width info */
2149 dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2150 netdev->name,
2151 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2152 (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2153 "unknown"),
2154 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2155 (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2156 (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2157 "unknown"),
2158 netdev->dev_addr);
2159
2160 ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2161 if (ret_val)
2162 strcpy(part_str, "Unknown");
2163 dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2164 dev_info(&pdev->dev,
2165 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2166 adapter->msix_entries ? "MSI-X" :
2167 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2168 adapter->num_rx_queues, adapter->num_tx_queues);
2169 switch (hw->mac.type) {
2170 case e1000_i350:
2171 igb_set_eee_i350(hw);
2172 break;
2173 default:
2174 break;
2175 }
2176 return 0;
2177
2178 err_register:
2179 igb_release_hw_control(adapter);
2180 err_eeprom:
2181 if (!igb_check_reset_block(hw))
2182 igb_reset_phy(hw);
2183
2184 if (hw->flash_address)
2185 iounmap(hw->flash_address);
2186 err_sw_init:
2187 igb_clear_interrupt_scheme(adapter);
2188 iounmap(hw->hw_addr);
2189 err_ioremap:
2190 free_netdev(netdev);
2191 err_alloc_etherdev:
2192 pci_release_selected_regions(pdev,
2193 pci_select_bars(pdev, IORESOURCE_MEM));
2194 err_pci_reg:
2195 err_dma:
2196 pci_disable_device(pdev);
2197 return err;
2198 }
2199
2200 /**
2201 * igb_remove - Device Removal Routine
2202 * @pdev: PCI device information struct
2203 *
2204 * igb_remove is called by the PCI subsystem to alert the driver
2205 * that it should release a PCI device. The could be caused by a
2206 * Hot-Plug event, or because the driver is going to be removed from
2207 * memory.
2208 **/
2209 static void __devexit igb_remove(struct pci_dev *pdev)
2210 {
2211 struct net_device *netdev = pci_get_drvdata(pdev);
2212 struct igb_adapter *adapter = netdev_priv(netdev);
2213 struct e1000_hw *hw = &adapter->hw;
2214
2215 /*
2216 * The watchdog timer may be rescheduled, so explicitly
2217 * disable watchdog from being rescheduled.
2218 */
2219 set_bit(__IGB_DOWN, &adapter->state);
2220 del_timer_sync(&adapter->watchdog_timer);
2221 del_timer_sync(&adapter->phy_info_timer);
2222
2223 cancel_work_sync(&adapter->reset_task);
2224 cancel_work_sync(&adapter->watchdog_task);
2225
2226 #ifdef CONFIG_IGB_DCA
2227 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2228 dev_info(&pdev->dev, "DCA disabled\n");
2229 dca_remove_requester(&pdev->dev);
2230 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2231 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2232 }
2233 #endif
2234
2235 /* Release control of h/w to f/w. If f/w is AMT enabled, this
2236 * would have already happened in close and is redundant. */
2237 igb_release_hw_control(adapter);
2238
2239 unregister_netdev(netdev);
2240
2241 igb_clear_interrupt_scheme(adapter);
2242
2243 #ifdef CONFIG_PCI_IOV
2244 /* reclaim resources allocated to VFs */
2245 if (adapter->vf_data) {
2246 /* disable iov and allow time for transactions to clear */
2247 pci_disable_sriov(pdev);
2248 msleep(500);
2249
2250 kfree(adapter->vf_data);
2251 adapter->vf_data = NULL;
2252 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2253 wrfl();
2254 msleep(100);
2255 dev_info(&pdev->dev, "IOV Disabled\n");
2256 }
2257 #endif
2258
2259 iounmap(hw->hw_addr);
2260 if (hw->flash_address)
2261 iounmap(hw->flash_address);
2262 pci_release_selected_regions(pdev,
2263 pci_select_bars(pdev, IORESOURCE_MEM));
2264
2265 free_netdev(netdev);
2266
2267 pci_disable_pcie_error_reporting(pdev);
2268
2269 pci_disable_device(pdev);
2270 }
2271
2272 /**
2273 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2274 * @adapter: board private structure to initialize
2275 *
2276 * This function initializes the vf specific data storage and then attempts to
2277 * allocate the VFs. The reason for ordering it this way is because it is much
2278 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2279 * the memory for the VFs.
2280 **/
2281 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2282 {
2283 #ifdef CONFIG_PCI_IOV
2284 struct pci_dev *pdev = adapter->pdev;
2285
2286 if (adapter->vfs_allocated_count) {
2287 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2288 sizeof(struct vf_data_storage),
2289 GFP_KERNEL);
2290 /* if allocation failed then we do not support SR-IOV */
2291 if (!adapter->vf_data) {
2292 adapter->vfs_allocated_count = 0;
2293 dev_err(&pdev->dev, "Unable to allocate memory for VF "
2294 "Data Storage\n");
2295 }
2296 }
2297
2298 if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2299 kfree(adapter->vf_data);
2300 adapter->vf_data = NULL;
2301 #endif /* CONFIG_PCI_IOV */
2302 adapter->vfs_allocated_count = 0;
2303 #ifdef CONFIG_PCI_IOV
2304 } else {
2305 unsigned char mac_addr[ETH_ALEN];
2306 int i;
2307 dev_info(&pdev->dev, "%d vfs allocated\n",
2308 adapter->vfs_allocated_count);
2309 for (i = 0; i < adapter->vfs_allocated_count; i++) {
2310 random_ether_addr(mac_addr);
2311 igb_set_vf_mac(adapter, i, mac_addr);
2312 }
2313 /* DMA Coalescing is not supported in IOV mode. */
2314 if (adapter->flags & IGB_FLAG_DMAC)
2315 adapter->flags &= ~IGB_FLAG_DMAC;
2316 }
2317 #endif /* CONFIG_PCI_IOV */
2318 }
2319
2320
2321 /**
2322 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2323 * @adapter: board private structure to initialize
2324 *
2325 * igb_init_hw_timer initializes the function pointer and values for the hw
2326 * timer found in hardware.
2327 **/
2328 static void igb_init_hw_timer(struct igb_adapter *adapter)
2329 {
2330 struct e1000_hw *hw = &adapter->hw;
2331
2332 switch (hw->mac.type) {
2333 case e1000_i350:
2334 case e1000_82580:
2335 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2336 adapter->cycles.read = igb_read_clock;
2337 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2338 adapter->cycles.mult = 1;
2339 /*
2340 * The 82580 timesync updates the system timer every 8ns by 8ns
2341 * and the value cannot be shifted. Instead we need to shift
2342 * the registers to generate a 64bit timer value. As a result
2343 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2344 * 24 in order to generate a larger value for synchronization.
2345 */
2346 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2347 /* disable system timer temporarily by setting bit 31 */
2348 wr32(E1000_TSAUXC, 0x80000000);
2349 wrfl();
2350
2351 /* Set registers so that rollover occurs soon to test this. */
2352 wr32(E1000_SYSTIMR, 0x00000000);
2353 wr32(E1000_SYSTIML, 0x80000000);
2354 wr32(E1000_SYSTIMH, 0x000000FF);
2355 wrfl();
2356
2357 /* enable system timer by clearing bit 31 */
2358 wr32(E1000_TSAUXC, 0x0);
2359 wrfl();
2360
2361 timecounter_init(&adapter->clock,
2362 &adapter->cycles,
2363 ktime_to_ns(ktime_get_real()));
2364 /*
2365 * Synchronize our NIC clock against system wall clock. NIC
2366 * time stamp reading requires ~3us per sample, each sample
2367 * was pretty stable even under load => only require 10
2368 * samples for each offset comparison.
2369 */
2370 memset(&adapter->compare, 0, sizeof(adapter->compare));
2371 adapter->compare.source = &adapter->clock;
2372 adapter->compare.target = ktime_get_real;
2373 adapter->compare.num_samples = 10;
2374 timecompare_update(&adapter->compare, 0);
2375 break;
2376 case e1000_82576:
2377 /*
2378 * Initialize hardware timer: we keep it running just in case
2379 * that some program needs it later on.
2380 */
2381 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2382 adapter->cycles.read = igb_read_clock;
2383 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2384 adapter->cycles.mult = 1;
2385 /**
2386 * Scale the NIC clock cycle by a large factor so that
2387 * relatively small clock corrections can be added or
2388 * subtracted at each clock tick. The drawbacks of a large
2389 * factor are a) that the clock register overflows more quickly
2390 * (not such a big deal) and b) that the increment per tick has
2391 * to fit into 24 bits. As a result we need to use a shift of
2392 * 19 so we can fit a value of 16 into the TIMINCA register.
2393 */
2394 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2395 wr32(E1000_TIMINCA,
2396 (1 << E1000_TIMINCA_16NS_SHIFT) |
2397 (16 << IGB_82576_TSYNC_SHIFT));
2398
2399 /* Set registers so that rollover occurs soon to test this. */
2400 wr32(E1000_SYSTIML, 0x00000000);
2401 wr32(E1000_SYSTIMH, 0xFF800000);
2402 wrfl();
2403
2404 timecounter_init(&adapter->clock,
2405 &adapter->cycles,
2406 ktime_to_ns(ktime_get_real()));
2407 /*
2408 * Synchronize our NIC clock against system wall clock. NIC
2409 * time stamp reading requires ~3us per sample, each sample
2410 * was pretty stable even under load => only require 10
2411 * samples for each offset comparison.
2412 */
2413 memset(&adapter->compare, 0, sizeof(adapter->compare));
2414 adapter->compare.source = &adapter->clock;
2415 adapter->compare.target = ktime_get_real;
2416 adapter->compare.num_samples = 10;
2417 timecompare_update(&adapter->compare, 0);
2418 break;
2419 case e1000_82575:
2420 /* 82575 does not support timesync */
2421 default:
2422 break;
2423 }
2424
2425 }
2426
2427 /**
2428 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2429 * @adapter: board private structure to initialize
2430 *
2431 * igb_sw_init initializes the Adapter private data structure.
2432 * Fields are initialized based on PCI device information and
2433 * OS network device settings (MTU size).
2434 **/
2435 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2436 {
2437 struct e1000_hw *hw = &adapter->hw;
2438 struct net_device *netdev = adapter->netdev;
2439 struct pci_dev *pdev = adapter->pdev;
2440
2441 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2442
2443 /* set default ring sizes */
2444 adapter->tx_ring_count = IGB_DEFAULT_TXD;
2445 adapter->rx_ring_count = IGB_DEFAULT_RXD;
2446
2447 /* set default ITR values */
2448 adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2449 adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2450
2451 /* set default work limits */
2452 adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
2453
2454 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2455 VLAN_HLEN;
2456 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2457
2458 adapter->node = -1;
2459
2460 spin_lock_init(&adapter->stats64_lock);
2461 #ifdef CONFIG_PCI_IOV
2462 switch (hw->mac.type) {
2463 case e1000_82576:
2464 case e1000_i350:
2465 if (max_vfs > 7) {
2466 dev_warn(&pdev->dev,
2467 "Maximum of 7 VFs per PF, using max\n");
2468 adapter->vfs_allocated_count = 7;
2469 } else
2470 adapter->vfs_allocated_count = max_vfs;
2471 break;
2472 default:
2473 break;
2474 }
2475 #endif /* CONFIG_PCI_IOV */
2476 adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2477 /* i350 cannot do RSS and SR-IOV at the same time */
2478 if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2479 adapter->rss_queues = 1;
2480
2481 /*
2482 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2483 * then we should combine the queues into a queue pair in order to
2484 * conserve interrupts due to limited supply
2485 */
2486 if ((adapter->rss_queues > 4) ||
2487 ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2488 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2489
2490 /* This call may decrease the number of queues */
2491 if (igb_init_interrupt_scheme(adapter)) {
2492 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2493 return -ENOMEM;
2494 }
2495
2496 igb_probe_vfs(adapter);
2497
2498 /* Explicitly disable IRQ since the NIC can be in any state. */
2499 igb_irq_disable(adapter);
2500
2501 if (hw->mac.type == e1000_i350)
2502 adapter->flags &= ~IGB_FLAG_DMAC;
2503
2504 set_bit(__IGB_DOWN, &adapter->state);
2505 return 0;
2506 }
2507
2508 /**
2509 * igb_open - Called when a network interface is made active
2510 * @netdev: network interface device structure
2511 *
2512 * Returns 0 on success, negative value on failure
2513 *
2514 * The open entry point is called when a network interface is made
2515 * active by the system (IFF_UP). At this point all resources needed
2516 * for transmit and receive operations are allocated, the interrupt
2517 * handler is registered with the OS, the watchdog timer is started,
2518 * and the stack is notified that the interface is ready.
2519 **/
2520 static int igb_open(struct net_device *netdev)
2521 {
2522 struct igb_adapter *adapter = netdev_priv(netdev);
2523 struct e1000_hw *hw = &adapter->hw;
2524 int err;
2525 int i;
2526
2527 /* disallow open during test */
2528 if (test_bit(__IGB_TESTING, &adapter->state))
2529 return -EBUSY;
2530
2531 netif_carrier_off(netdev);
2532
2533 /* allocate transmit descriptors */
2534 err = igb_setup_all_tx_resources(adapter);
2535 if (err)
2536 goto err_setup_tx;
2537
2538 /* allocate receive descriptors */
2539 err = igb_setup_all_rx_resources(adapter);
2540 if (err)
2541 goto err_setup_rx;
2542
2543 igb_power_up_link(adapter);
2544
2545 /* before we allocate an interrupt, we must be ready to handle it.
2546 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2547 * as soon as we call pci_request_irq, so we have to setup our
2548 * clean_rx handler before we do so. */
2549 igb_configure(adapter);
2550
2551 err = igb_request_irq(adapter);
2552 if (err)
2553 goto err_req_irq;
2554
2555 /* From here on the code is the same as igb_up() */
2556 clear_bit(__IGB_DOWN, &adapter->state);
2557
2558 for (i = 0; i < adapter->num_q_vectors; i++) {
2559 struct igb_q_vector *q_vector = adapter->q_vector[i];
2560 napi_enable(&q_vector->napi);
2561 }
2562
2563 /* Clear any pending interrupts. */
2564 rd32(E1000_ICR);
2565
2566 igb_irq_enable(adapter);
2567
2568 /* notify VFs that reset has been completed */
2569 if (adapter->vfs_allocated_count) {
2570 u32 reg_data = rd32(E1000_CTRL_EXT);
2571 reg_data |= E1000_CTRL_EXT_PFRSTD;
2572 wr32(E1000_CTRL_EXT, reg_data);
2573 }
2574
2575 netif_tx_start_all_queues(netdev);
2576
2577 /* start the watchdog. */
2578 hw->mac.get_link_status = 1;
2579 schedule_work(&adapter->watchdog_task);
2580
2581 return 0;
2582
2583 err_req_irq:
2584 igb_release_hw_control(adapter);
2585 igb_power_down_link(adapter);
2586 igb_free_all_rx_resources(adapter);
2587 err_setup_rx:
2588 igb_free_all_tx_resources(adapter);
2589 err_setup_tx:
2590 igb_reset(adapter);
2591
2592 return err;
2593 }
2594
2595 /**
2596 * igb_close - Disables a network interface
2597 * @netdev: network interface device structure
2598 *
2599 * Returns 0, this is not allowed to fail
2600 *
2601 * The close entry point is called when an interface is de-activated
2602 * by the OS. The hardware is still under the driver's control, but
2603 * needs to be disabled. A global MAC reset is issued to stop the
2604 * hardware, and all transmit and receive resources are freed.
2605 **/
2606 static int igb_close(struct net_device *netdev)
2607 {
2608 struct igb_adapter *adapter = netdev_priv(netdev);
2609
2610 WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2611 igb_down(adapter);
2612
2613 igb_free_irq(adapter);
2614
2615 igb_free_all_tx_resources(adapter);
2616 igb_free_all_rx_resources(adapter);
2617
2618 return 0;
2619 }
2620
2621 /**
2622 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2623 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2624 *
2625 * Return 0 on success, negative on failure
2626 **/
2627 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2628 {
2629 struct device *dev = tx_ring->dev;
2630 int orig_node = dev_to_node(dev);
2631 int size;
2632
2633 size = sizeof(struct igb_tx_buffer) * tx_ring->count;
2634 tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node);
2635 if (!tx_ring->tx_buffer_info)
2636 tx_ring->tx_buffer_info = vzalloc(size);
2637 if (!tx_ring->tx_buffer_info)
2638 goto err;
2639
2640 /* round up to nearest 4K */
2641 tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2642 tx_ring->size = ALIGN(tx_ring->size, 4096);
2643
2644 set_dev_node(dev, tx_ring->numa_node);
2645 tx_ring->desc = dma_alloc_coherent(dev,
2646 tx_ring->size,
2647 &tx_ring->dma,
2648 GFP_KERNEL);
2649 set_dev_node(dev, orig_node);
2650 if (!tx_ring->desc)
2651 tx_ring->desc = dma_alloc_coherent(dev,
2652 tx_ring->size,
2653 &tx_ring->dma,
2654 GFP_KERNEL);
2655
2656 if (!tx_ring->desc)
2657 goto err;
2658
2659 tx_ring->next_to_use = 0;
2660 tx_ring->next_to_clean = 0;
2661
2662 return 0;
2663
2664 err:
2665 vfree(tx_ring->tx_buffer_info);
2666 dev_err(dev,
2667 "Unable to allocate memory for the transmit descriptor ring\n");
2668 return -ENOMEM;
2669 }
2670
2671 /**
2672 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2673 * (Descriptors) for all queues
2674 * @adapter: board private structure
2675 *
2676 * Return 0 on success, negative on failure
2677 **/
2678 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2679 {
2680 struct pci_dev *pdev = adapter->pdev;
2681 int i, err = 0;
2682
2683 for (i = 0; i < adapter->num_tx_queues; i++) {
2684 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2685 if (err) {
2686 dev_err(&pdev->dev,
2687 "Allocation for Tx Queue %u failed\n", i);
2688 for (i--; i >= 0; i--)
2689 igb_free_tx_resources(adapter->tx_ring[i]);
2690 break;
2691 }
2692 }
2693
2694 return err;
2695 }
2696
2697 /**
2698 * igb_setup_tctl - configure the transmit control registers
2699 * @adapter: Board private structure
2700 **/
2701 void igb_setup_tctl(struct igb_adapter *adapter)
2702 {
2703 struct e1000_hw *hw = &adapter->hw;
2704 u32 tctl;
2705
2706 /* disable queue 0 which is enabled by default on 82575 and 82576 */
2707 wr32(E1000_TXDCTL(0), 0);
2708
2709 /* Program the Transmit Control Register */
2710 tctl = rd32(E1000_TCTL);
2711 tctl &= ~E1000_TCTL_CT;
2712 tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2713 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2714
2715 igb_config_collision_dist(hw);
2716
2717 /* Enable transmits */
2718 tctl |= E1000_TCTL_EN;
2719
2720 wr32(E1000_TCTL, tctl);
2721 }
2722
2723 /**
2724 * igb_configure_tx_ring - Configure transmit ring after Reset
2725 * @adapter: board private structure
2726 * @ring: tx ring to configure
2727 *
2728 * Configure a transmit ring after a reset.
2729 **/
2730 void igb_configure_tx_ring(struct igb_adapter *adapter,
2731 struct igb_ring *ring)
2732 {
2733 struct e1000_hw *hw = &adapter->hw;
2734 u32 txdctl = 0;
2735 u64 tdba = ring->dma;
2736 int reg_idx = ring->reg_idx;
2737
2738 /* disable the queue */
2739 wr32(E1000_TXDCTL(reg_idx), 0);
2740 wrfl();
2741 mdelay(10);
2742
2743 wr32(E1000_TDLEN(reg_idx),
2744 ring->count * sizeof(union e1000_adv_tx_desc));
2745 wr32(E1000_TDBAL(reg_idx),
2746 tdba & 0x00000000ffffffffULL);
2747 wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2748
2749 ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2750 wr32(E1000_TDH(reg_idx), 0);
2751 writel(0, ring->tail);
2752
2753 txdctl |= IGB_TX_PTHRESH;
2754 txdctl |= IGB_TX_HTHRESH << 8;
2755 txdctl |= IGB_TX_WTHRESH << 16;
2756
2757 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2758 wr32(E1000_TXDCTL(reg_idx), txdctl);
2759 }
2760
2761 /**
2762 * igb_configure_tx - Configure transmit Unit after Reset
2763 * @adapter: board private structure
2764 *
2765 * Configure the Tx unit of the MAC after a reset.
2766 **/
2767 static void igb_configure_tx(struct igb_adapter *adapter)
2768 {
2769 int i;
2770
2771 for (i = 0; i < adapter->num_tx_queues; i++)
2772 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2773 }
2774
2775 /**
2776 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2777 * @rx_ring: rx descriptor ring (for a specific queue) to setup
2778 *
2779 * Returns 0 on success, negative on failure
2780 **/
2781 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2782 {
2783 struct device *dev = rx_ring->dev;
2784 int orig_node = dev_to_node(dev);
2785 int size, desc_len;
2786
2787 size = sizeof(struct igb_rx_buffer) * rx_ring->count;
2788 rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node);
2789 if (!rx_ring->rx_buffer_info)
2790 rx_ring->rx_buffer_info = vzalloc(size);
2791 if (!rx_ring->rx_buffer_info)
2792 goto err;
2793
2794 desc_len = sizeof(union e1000_adv_rx_desc);
2795
2796 /* Round up to nearest 4K */
2797 rx_ring->size = rx_ring->count * desc_len;
2798 rx_ring->size = ALIGN(rx_ring->size, 4096);
2799
2800 set_dev_node(dev, rx_ring->numa_node);
2801 rx_ring->desc = dma_alloc_coherent(dev,
2802 rx_ring->size,
2803 &rx_ring->dma,
2804 GFP_KERNEL);
2805 set_dev_node(dev, orig_node);
2806 if (!rx_ring->desc)
2807 rx_ring->desc = dma_alloc_coherent(dev,
2808 rx_ring->size,
2809 &rx_ring->dma,
2810 GFP_KERNEL);
2811
2812 if (!rx_ring->desc)
2813 goto err;
2814
2815 rx_ring->next_to_clean = 0;
2816 rx_ring->next_to_use = 0;
2817
2818 return 0;
2819
2820 err:
2821 vfree(rx_ring->rx_buffer_info);
2822 rx_ring->rx_buffer_info = NULL;
2823 dev_err(dev, "Unable to allocate memory for the receive descriptor"
2824 " ring\n");
2825 return -ENOMEM;
2826 }
2827
2828 /**
2829 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2830 * (Descriptors) for all queues
2831 * @adapter: board private structure
2832 *
2833 * Return 0 on success, negative on failure
2834 **/
2835 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2836 {
2837 struct pci_dev *pdev = adapter->pdev;
2838 int i, err = 0;
2839
2840 for (i = 0; i < adapter->num_rx_queues; i++) {
2841 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2842 if (err) {
2843 dev_err(&pdev->dev,
2844 "Allocation for Rx Queue %u failed\n", i);
2845 for (i--; i >= 0; i--)
2846 igb_free_rx_resources(adapter->rx_ring[i]);
2847 break;
2848 }
2849 }
2850
2851 return err;
2852 }
2853
2854 /**
2855 * igb_setup_mrqc - configure the multiple receive queue control registers
2856 * @adapter: Board private structure
2857 **/
2858 static void igb_setup_mrqc(struct igb_adapter *adapter)
2859 {
2860 struct e1000_hw *hw = &adapter->hw;
2861 u32 mrqc, rxcsum;
2862 u32 j, num_rx_queues, shift = 0, shift2 = 0;
2863 union e1000_reta {
2864 u32 dword;
2865 u8 bytes[4];
2866 } reta;
2867 static const u8 rsshash[40] = {
2868 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2869 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2870 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2871 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2872
2873 /* Fill out hash function seeds */
2874 for (j = 0; j < 10; j++) {
2875 u32 rsskey = rsshash[(j * 4)];
2876 rsskey |= rsshash[(j * 4) + 1] << 8;
2877 rsskey |= rsshash[(j * 4) + 2] << 16;
2878 rsskey |= rsshash[(j * 4) + 3] << 24;
2879 array_wr32(E1000_RSSRK(0), j, rsskey);
2880 }
2881
2882 num_rx_queues = adapter->rss_queues;
2883
2884 if (adapter->vfs_allocated_count) {
2885 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2886 switch (hw->mac.type) {
2887 case e1000_i350:
2888 case e1000_82580:
2889 num_rx_queues = 1;
2890 shift = 0;
2891 break;
2892 case e1000_82576:
2893 shift = 3;
2894 num_rx_queues = 2;
2895 break;
2896 case e1000_82575:
2897 shift = 2;
2898 shift2 = 6;
2899 default:
2900 break;
2901 }
2902 } else {
2903 if (hw->mac.type == e1000_82575)
2904 shift = 6;
2905 }
2906
2907 for (j = 0; j < (32 * 4); j++) {
2908 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2909 if (shift2)
2910 reta.bytes[j & 3] |= num_rx_queues << shift2;
2911 if ((j & 3) == 3)
2912 wr32(E1000_RETA(j >> 2), reta.dword);
2913 }
2914
2915 /*
2916 * Disable raw packet checksumming so that RSS hash is placed in
2917 * descriptor on writeback. No need to enable TCP/UDP/IP checksum
2918 * offloads as they are enabled by default
2919 */
2920 rxcsum = rd32(E1000_RXCSUM);
2921 rxcsum |= E1000_RXCSUM_PCSD;
2922
2923 if (adapter->hw.mac.type >= e1000_82576)
2924 /* Enable Receive Checksum Offload for SCTP */
2925 rxcsum |= E1000_RXCSUM_CRCOFL;
2926
2927 /* Don't need to set TUOFL or IPOFL, they default to 1 */
2928 wr32(E1000_RXCSUM, rxcsum);
2929
2930 /* If VMDq is enabled then we set the appropriate mode for that, else
2931 * we default to RSS so that an RSS hash is calculated per packet even
2932 * if we are only using one queue */
2933 if (adapter->vfs_allocated_count) {
2934 if (hw->mac.type > e1000_82575) {
2935 /* Set the default pool for the PF's first queue */
2936 u32 vtctl = rd32(E1000_VT_CTL);
2937 vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2938 E1000_VT_CTL_DISABLE_DEF_POOL);
2939 vtctl |= adapter->vfs_allocated_count <<
2940 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2941 wr32(E1000_VT_CTL, vtctl);
2942 }
2943 if (adapter->rss_queues > 1)
2944 mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2945 else
2946 mrqc = E1000_MRQC_ENABLE_VMDQ;
2947 } else {
2948 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2949 }
2950 igb_vmm_control(adapter);
2951
2952 /*
2953 * Generate RSS hash based on TCP port numbers and/or
2954 * IPv4/v6 src and dst addresses since UDP cannot be
2955 * hashed reliably due to IP fragmentation
2956 */
2957 mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2958 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2959 E1000_MRQC_RSS_FIELD_IPV6 |
2960 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2961 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2962
2963 wr32(E1000_MRQC, mrqc);
2964 }
2965
2966 /**
2967 * igb_setup_rctl - configure the receive control registers
2968 * @adapter: Board private structure
2969 **/
2970 void igb_setup_rctl(struct igb_adapter *adapter)
2971 {
2972 struct e1000_hw *hw = &adapter->hw;
2973 u32 rctl;
2974
2975 rctl = rd32(E1000_RCTL);
2976
2977 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2978 rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2979
2980 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2981 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2982
2983 /*
2984 * enable stripping of CRC. It's unlikely this will break BMC
2985 * redirection as it did with e1000. Newer features require
2986 * that the HW strips the CRC.
2987 */
2988 rctl |= E1000_RCTL_SECRC;
2989
2990 /* disable store bad packets and clear size bits. */
2991 rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2992
2993 /* enable LPE to prevent packets larger than max_frame_size */
2994 rctl |= E1000_RCTL_LPE;
2995
2996 /* disable queue 0 to prevent tail write w/o re-config */
2997 wr32(E1000_RXDCTL(0), 0);
2998
2999 /* Attention!!! For SR-IOV PF driver operations you must enable
3000 * queue drop for all VF and PF queues to prevent head of line blocking
3001 * if an un-trusted VF does not provide descriptors to hardware.
3002 */
3003 if (adapter->vfs_allocated_count) {
3004 /* set all queue drop enable bits */
3005 wr32(E1000_QDE, ALL_QUEUES);
3006 }
3007
3008 wr32(E1000_RCTL, rctl);
3009 }
3010
3011 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
3012 int vfn)
3013 {
3014 struct e1000_hw *hw = &adapter->hw;
3015 u32 vmolr;
3016
3017 /* if it isn't the PF check to see if VFs are enabled and
3018 * increase the size to support vlan tags */
3019 if (vfn < adapter->vfs_allocated_count &&
3020 adapter->vf_data[vfn].vlans_enabled)
3021 size += VLAN_TAG_SIZE;
3022
3023 vmolr = rd32(E1000_VMOLR(vfn));
3024 vmolr &= ~E1000_VMOLR_RLPML_MASK;
3025 vmolr |= size | E1000_VMOLR_LPE;
3026 wr32(E1000_VMOLR(vfn), vmolr);
3027
3028 return 0;
3029 }
3030
3031 /**
3032 * igb_rlpml_set - set maximum receive packet size
3033 * @adapter: board private structure
3034 *
3035 * Configure maximum receivable packet size.
3036 **/
3037 static void igb_rlpml_set(struct igb_adapter *adapter)
3038 {
3039 u32 max_frame_size = adapter->max_frame_size;
3040 struct e1000_hw *hw = &adapter->hw;
3041 u16 pf_id = adapter->vfs_allocated_count;
3042
3043 if (pf_id) {
3044 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
3045 /*
3046 * If we're in VMDQ or SR-IOV mode, then set global RLPML
3047 * to our max jumbo frame size, in case we need to enable
3048 * jumbo frames on one of the rings later.
3049 * This will not pass over-length frames into the default
3050 * queue because it's gated by the VMOLR.RLPML.
3051 */
3052 max_frame_size = MAX_JUMBO_FRAME_SIZE;
3053 }
3054
3055 wr32(E1000_RLPML, max_frame_size);
3056 }
3057
3058 static inline void igb_set_vmolr(struct igb_adapter *adapter,
3059 int vfn, bool aupe)
3060 {
3061 struct e1000_hw *hw = &adapter->hw;
3062 u32 vmolr;
3063
3064 /*
3065 * This register exists only on 82576 and newer so if we are older then
3066 * we should exit and do nothing
3067 */
3068 if (hw->mac.type < e1000_82576)
3069 return;
3070
3071 vmolr = rd32(E1000_VMOLR(vfn));
3072 vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */
3073 if (aupe)
3074 vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */
3075 else
3076 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3077
3078 /* clear all bits that might not be set */
3079 vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3080
3081 if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3082 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3083 /*
3084 * for VMDq only allow the VFs and pool 0 to accept broadcast and
3085 * multicast packets
3086 */
3087 if (vfn <= adapter->vfs_allocated_count)
3088 vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */
3089
3090 wr32(E1000_VMOLR(vfn), vmolr);
3091 }
3092
3093 /**
3094 * igb_configure_rx_ring - Configure a receive ring after Reset
3095 * @adapter: board private structure
3096 * @ring: receive ring to be configured
3097 *
3098 * Configure the Rx unit of the MAC after a reset.
3099 **/
3100 void igb_configure_rx_ring(struct igb_adapter *adapter,
3101 struct igb_ring *ring)
3102 {
3103 struct e1000_hw *hw = &adapter->hw;
3104 u64 rdba = ring->dma;
3105 int reg_idx = ring->reg_idx;
3106 u32 srrctl = 0, rxdctl = 0;
3107
3108 /* disable the queue */
3109 wr32(E1000_RXDCTL(reg_idx), 0);
3110
3111 /* Set DMA base address registers */
3112 wr32(E1000_RDBAL(reg_idx),
3113 rdba & 0x00000000ffffffffULL);
3114 wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3115 wr32(E1000_RDLEN(reg_idx),
3116 ring->count * sizeof(union e1000_adv_rx_desc));
3117
3118 /* initialize head and tail */
3119 ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3120 wr32(E1000_RDH(reg_idx), 0);
3121 writel(0, ring->tail);
3122
3123 /* set descriptor configuration */
3124 srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3125 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3126 srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3127 #else
3128 srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3129 #endif
3130 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3131 if (hw->mac.type == e1000_82580)
3132 srrctl |= E1000_SRRCTL_TIMESTAMP;
3133 /* Only set Drop Enable if we are supporting multiple queues */
3134 if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3135 srrctl |= E1000_SRRCTL_DROP_EN;
3136
3137 wr32(E1000_SRRCTL(reg_idx), srrctl);
3138
3139 /* set filtering for VMDQ pools */
3140 igb_set_vmolr(adapter, reg_idx & 0x7, true);
3141
3142 rxdctl |= IGB_RX_PTHRESH;
3143 rxdctl |= IGB_RX_HTHRESH << 8;
3144 rxdctl |= IGB_RX_WTHRESH << 16;
3145
3146 /* enable receive descriptor fetching */
3147 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3148 wr32(E1000_RXDCTL(reg_idx), rxdctl);
3149 }
3150
3151 /**
3152 * igb_configure_rx - Configure receive Unit after Reset
3153 * @adapter: board private structure
3154 *
3155 * Configure the Rx unit of the MAC after a reset.
3156 **/
3157 static void igb_configure_rx(struct igb_adapter *adapter)
3158 {
3159 int i;
3160
3161 /* set UTA to appropriate mode */
3162 igb_set_uta(adapter);
3163
3164 /* set the correct pool for the PF default MAC address in entry 0 */
3165 igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3166 adapter->vfs_allocated_count);
3167
3168 /* Setup the HW Rx Head and Tail Descriptor Pointers and
3169 * the Base and Length of the Rx Descriptor Ring */
3170 for (i = 0; i < adapter->num_rx_queues; i++)
3171 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3172 }
3173
3174 /**
3175 * igb_free_tx_resources - Free Tx Resources per Queue
3176 * @tx_ring: Tx descriptor ring for a specific queue
3177 *
3178 * Free all transmit software resources
3179 **/
3180 void igb_free_tx_resources(struct igb_ring *tx_ring)
3181 {
3182 igb_clean_tx_ring(tx_ring);
3183
3184 vfree(tx_ring->tx_buffer_info);
3185 tx_ring->tx_buffer_info = NULL;
3186
3187 /* if not set, then don't free */
3188 if (!tx_ring->desc)
3189 return;
3190
3191 dma_free_coherent(tx_ring->dev, tx_ring->size,
3192 tx_ring->desc, tx_ring->dma);
3193
3194 tx_ring->desc = NULL;
3195 }
3196
3197 /**
3198 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3199 * @adapter: board private structure
3200 *
3201 * Free all transmit software resources
3202 **/
3203 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3204 {
3205 int i;
3206
3207 for (i = 0; i < adapter->num_tx_queues; i++)
3208 igb_free_tx_resources(adapter->tx_ring[i]);
3209 }
3210
3211 void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3212 struct igb_tx_buffer *tx_buffer)
3213 {
3214 if (tx_buffer->skb) {
3215 dev_kfree_skb_any(tx_buffer->skb);
3216 if (tx_buffer->dma)
3217 dma_unmap_single(ring->dev,
3218 tx_buffer->dma,
3219 tx_buffer->length,
3220 DMA_TO_DEVICE);
3221 } else if (tx_buffer->dma) {
3222 dma_unmap_page(ring->dev,
3223 tx_buffer->dma,
3224 tx_buffer->length,
3225 DMA_TO_DEVICE);
3226 }
3227 tx_buffer->next_to_watch = NULL;
3228 tx_buffer->skb = NULL;
3229 tx_buffer->dma = 0;
3230 /* buffer_info must be completely set up in the transmit path */
3231 }
3232
3233 /**
3234 * igb_clean_tx_ring - Free Tx Buffers
3235 * @tx_ring: ring to be cleaned
3236 **/
3237 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3238 {
3239 struct igb_tx_buffer *buffer_info;
3240 unsigned long size;
3241 u16 i;
3242
3243 if (!tx_ring->tx_buffer_info)
3244 return;
3245 /* Free all the Tx ring sk_buffs */
3246
3247 for (i = 0; i < tx_ring->count; i++) {
3248 buffer_info = &tx_ring->tx_buffer_info[i];
3249 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3250 }
3251
3252 size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3253 memset(tx_ring->tx_buffer_info, 0, size);
3254
3255 /* Zero out the descriptor ring */
3256 memset(tx_ring->desc, 0, tx_ring->size);
3257
3258 tx_ring->next_to_use = 0;
3259 tx_ring->next_to_clean = 0;
3260 }
3261
3262 /**
3263 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3264 * @adapter: board private structure
3265 **/
3266 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3267 {
3268 int i;
3269
3270 for (i = 0; i < adapter->num_tx_queues; i++)
3271 igb_clean_tx_ring(adapter->tx_ring[i]);
3272 }
3273
3274 /**
3275 * igb_free_rx_resources - Free Rx Resources
3276 * @rx_ring: ring to clean the resources from
3277 *
3278 * Free all receive software resources
3279 **/
3280 void igb_free_rx_resources(struct igb_ring *rx_ring)
3281 {
3282 igb_clean_rx_ring(rx_ring);
3283
3284 vfree(rx_ring->rx_buffer_info);
3285 rx_ring->rx_buffer_info = NULL;
3286
3287 /* if not set, then don't free */
3288 if (!rx_ring->desc)
3289 return;
3290
3291 dma_free_coherent(rx_ring->dev, rx_ring->size,
3292 rx_ring->desc, rx_ring->dma);
3293
3294 rx_ring->desc = NULL;
3295 }
3296
3297 /**
3298 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3299 * @adapter: board private structure
3300 *
3301 * Free all receive software resources
3302 **/
3303 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3304 {
3305 int i;
3306
3307 for (i = 0; i < adapter->num_rx_queues; i++)
3308 igb_free_rx_resources(adapter->rx_ring[i]);
3309 }
3310
3311 /**
3312 * igb_clean_rx_ring - Free Rx Buffers per Queue
3313 * @rx_ring: ring to free buffers from
3314 **/
3315 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3316 {
3317 unsigned long size;
3318 u16 i;
3319
3320 if (!rx_ring->rx_buffer_info)
3321 return;
3322
3323 /* Free all the Rx ring sk_buffs */
3324 for (i = 0; i < rx_ring->count; i++) {
3325 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3326 if (buffer_info->dma) {
3327 dma_unmap_single(rx_ring->dev,
3328 buffer_info->dma,
3329 IGB_RX_HDR_LEN,
3330 DMA_FROM_DEVICE);
3331 buffer_info->dma = 0;
3332 }
3333
3334 if (buffer_info->skb) {
3335 dev_kfree_skb(buffer_info->skb);
3336 buffer_info->skb = NULL;
3337 }
3338 if (buffer_info->page_dma) {
3339 dma_unmap_page(rx_ring->dev,
3340 buffer_info->page_dma,
3341 PAGE_SIZE / 2,
3342 DMA_FROM_DEVICE);
3343 buffer_info->page_dma = 0;
3344 }
3345 if (buffer_info->page) {
3346 put_page(buffer_info->page);
3347 buffer_info->page = NULL;
3348 buffer_info->page_offset = 0;
3349 }
3350 }
3351
3352 size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3353 memset(rx_ring->rx_buffer_info, 0, size);
3354
3355 /* Zero out the descriptor ring */
3356 memset(rx_ring->desc, 0, rx_ring->size);
3357
3358 rx_ring->next_to_clean = 0;
3359 rx_ring->next_to_use = 0;
3360 }
3361
3362 /**
3363 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3364 * @adapter: board private structure
3365 **/
3366 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3367 {
3368 int i;
3369
3370 for (i = 0; i < adapter->num_rx_queues; i++)
3371 igb_clean_rx_ring(adapter->rx_ring[i]);
3372 }
3373
3374 /**
3375 * igb_set_mac - Change the Ethernet Address of the NIC
3376 * @netdev: network interface device structure
3377 * @p: pointer to an address structure
3378 *
3379 * Returns 0 on success, negative on failure
3380 **/
3381 static int igb_set_mac(struct net_device *netdev, void *p)
3382 {
3383 struct igb_adapter *adapter = netdev_priv(netdev);
3384 struct e1000_hw *hw = &adapter->hw;
3385 struct sockaddr *addr = p;
3386
3387 if (!is_valid_ether_addr(addr->sa_data))
3388 return -EADDRNOTAVAIL;
3389
3390 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3391 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3392
3393 /* set the correct pool for the new PF MAC address in entry 0 */
3394 igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3395 adapter->vfs_allocated_count);
3396
3397 return 0;
3398 }
3399
3400 /**
3401 * igb_write_mc_addr_list - write multicast addresses to MTA
3402 * @netdev: network interface device structure
3403 *
3404 * Writes multicast address list to the MTA hash table.
3405 * Returns: -ENOMEM on failure
3406 * 0 on no addresses written
3407 * X on writing X addresses to MTA
3408 **/
3409 static int igb_write_mc_addr_list(struct net_device *netdev)
3410 {
3411 struct igb_adapter *adapter = netdev_priv(netdev);
3412 struct e1000_hw *hw = &adapter->hw;
3413 struct netdev_hw_addr *ha;
3414 u8 *mta_list;
3415 int i;
3416
3417 if (netdev_mc_empty(netdev)) {
3418 /* nothing to program, so clear mc list */
3419 igb_update_mc_addr_list(hw, NULL, 0);
3420 igb_restore_vf_multicasts(adapter);
3421 return 0;
3422 }
3423
3424 mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3425 if (!mta_list)
3426 return -ENOMEM;
3427
3428 /* The shared function expects a packed array of only addresses. */
3429 i = 0;
3430 netdev_for_each_mc_addr(ha, netdev)
3431 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3432
3433 igb_update_mc_addr_list(hw, mta_list, i);
3434 kfree(mta_list);
3435
3436 return netdev_mc_count(netdev);
3437 }
3438
3439 /**
3440 * igb_write_uc_addr_list - write unicast addresses to RAR table
3441 * @netdev: network interface device structure
3442 *
3443 * Writes unicast address list to the RAR table.
3444 * Returns: -ENOMEM on failure/insufficient address space
3445 * 0 on no addresses written
3446 * X on writing X addresses to the RAR table
3447 **/
3448 static int igb_write_uc_addr_list(struct net_device *netdev)
3449 {
3450 struct igb_adapter *adapter = netdev_priv(netdev);
3451 struct e1000_hw *hw = &adapter->hw;
3452 unsigned int vfn = adapter->vfs_allocated_count;
3453 unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3454 int count = 0;
3455
3456 /* return ENOMEM indicating insufficient memory for addresses */
3457 if (netdev_uc_count(netdev) > rar_entries)
3458 return -ENOMEM;
3459
3460 if (!netdev_uc_empty(netdev) && rar_entries) {
3461 struct netdev_hw_addr *ha;
3462
3463 netdev_for_each_uc_addr(ha, netdev) {
3464 if (!rar_entries)
3465 break;
3466 igb_rar_set_qsel(adapter, ha->addr,
3467 rar_entries--,
3468 vfn);
3469 count++;
3470 }
3471 }
3472 /* write the addresses in reverse order to avoid write combining */
3473 for (; rar_entries > 0 ; rar_entries--) {
3474 wr32(E1000_RAH(rar_entries), 0);
3475 wr32(E1000_RAL(rar_entries), 0);
3476 }
3477 wrfl();
3478
3479 return count;
3480 }
3481
3482 /**
3483 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3484 * @netdev: network interface device structure
3485 *
3486 * The set_rx_mode entry point is called whenever the unicast or multicast
3487 * address lists or the network interface flags are updated. This routine is
3488 * responsible for configuring the hardware for proper unicast, multicast,
3489 * promiscuous mode, and all-multi behavior.
3490 **/
3491 static void igb_set_rx_mode(struct net_device *netdev)
3492 {
3493 struct igb_adapter *adapter = netdev_priv(netdev);
3494 struct e1000_hw *hw = &adapter->hw;
3495 unsigned int vfn = adapter->vfs_allocated_count;
3496 u32 rctl, vmolr = 0;
3497 int count;
3498
3499 /* Check for Promiscuous and All Multicast modes */
3500 rctl = rd32(E1000_RCTL);
3501
3502 /* clear the effected bits */
3503 rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3504
3505 if (netdev->flags & IFF_PROMISC) {
3506 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3507 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3508 } else {
3509 if (netdev->flags & IFF_ALLMULTI) {
3510 rctl |= E1000_RCTL_MPE;
3511 vmolr |= E1000_VMOLR_MPME;
3512 } else {
3513 /*
3514 * Write addresses to the MTA, if the attempt fails
3515 * then we should just turn on promiscuous mode so
3516 * that we can at least receive multicast traffic
3517 */
3518 count = igb_write_mc_addr_list(netdev);
3519 if (count < 0) {
3520 rctl |= E1000_RCTL_MPE;
3521 vmolr |= E1000_VMOLR_MPME;
3522 } else if (count) {
3523 vmolr |= E1000_VMOLR_ROMPE;
3524 }
3525 }
3526 /*
3527 * Write addresses to available RAR registers, if there is not
3528 * sufficient space to store all the addresses then enable
3529 * unicast promiscuous mode
3530 */
3531 count = igb_write_uc_addr_list(netdev);
3532 if (count < 0) {
3533 rctl |= E1000_RCTL_UPE;
3534 vmolr |= E1000_VMOLR_ROPE;
3535 }
3536 rctl |= E1000_RCTL_VFE;
3537 }
3538 wr32(E1000_RCTL, rctl);
3539
3540 /*
3541 * In order to support SR-IOV and eventually VMDq it is necessary to set
3542 * the VMOLR to enable the appropriate modes. Without this workaround
3543 * we will have issues with VLAN tag stripping not being done for frames
3544 * that are only arriving because we are the default pool
3545 */
3546 if (hw->mac.type < e1000_82576)
3547 return;
3548
3549 vmolr |= rd32(E1000_VMOLR(vfn)) &
3550 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3551 wr32(E1000_VMOLR(vfn), vmolr);
3552 igb_restore_vf_multicasts(adapter);
3553 }
3554
3555 static void igb_check_wvbr(struct igb_adapter *adapter)
3556 {
3557 struct e1000_hw *hw = &adapter->hw;
3558 u32 wvbr = 0;
3559
3560 switch (hw->mac.type) {
3561 case e1000_82576:
3562 case e1000_i350:
3563 if (!(wvbr = rd32(E1000_WVBR)))
3564 return;
3565 break;
3566 default:
3567 break;
3568 }
3569
3570 adapter->wvbr |= wvbr;
3571 }
3572
3573 #define IGB_STAGGERED_QUEUE_OFFSET 8
3574
3575 static void igb_spoof_check(struct igb_adapter *adapter)
3576 {
3577 int j;
3578
3579 if (!adapter->wvbr)
3580 return;
3581
3582 for(j = 0; j < adapter->vfs_allocated_count; j++) {
3583 if (adapter->wvbr & (1 << j) ||
3584 adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3585 dev_warn(&adapter->pdev->dev,
3586 "Spoof event(s) detected on VF %d\n", j);
3587 adapter->wvbr &=
3588 ~((1 << j) |
3589 (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3590 }
3591 }
3592 }
3593
3594 /* Need to wait a few seconds after link up to get diagnostic information from
3595 * the phy */
3596 static void igb_update_phy_info(unsigned long data)
3597 {
3598 struct igb_adapter *adapter = (struct igb_adapter *) data;
3599 igb_get_phy_info(&adapter->hw);
3600 }
3601
3602 /**
3603 * igb_has_link - check shared code for link and determine up/down
3604 * @adapter: pointer to driver private info
3605 **/
3606 bool igb_has_link(struct igb_adapter *adapter)
3607 {
3608 struct e1000_hw *hw = &adapter->hw;
3609 bool link_active = false;
3610 s32 ret_val = 0;
3611
3612 /* get_link_status is set on LSC (link status) interrupt or
3613 * rx sequence error interrupt. get_link_status will stay
3614 * false until the e1000_check_for_link establishes link
3615 * for copper adapters ONLY
3616 */
3617 switch (hw->phy.media_type) {
3618 case e1000_media_type_copper:
3619 if (hw->mac.get_link_status) {
3620 ret_val = hw->mac.ops.check_for_link(hw);
3621 link_active = !hw->mac.get_link_status;
3622 } else {
3623 link_active = true;
3624 }
3625 break;
3626 case e1000_media_type_internal_serdes:
3627 ret_val = hw->mac.ops.check_for_link(hw);
3628 link_active = hw->mac.serdes_has_link;
3629 break;
3630 default:
3631 case e1000_media_type_unknown:
3632 break;
3633 }
3634
3635 return link_active;
3636 }
3637
3638 static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3639 {
3640 bool ret = false;
3641 u32 ctrl_ext, thstat;
3642
3643 /* check for thermal sensor event on i350, copper only */
3644 if (hw->mac.type == e1000_i350) {
3645 thstat = rd32(E1000_THSTAT);
3646 ctrl_ext = rd32(E1000_CTRL_EXT);
3647
3648 if ((hw->phy.media_type == e1000_media_type_copper) &&
3649 !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3650 ret = !!(thstat & event);
3651 }
3652 }
3653
3654 return ret;
3655 }
3656
3657 /**
3658 * igb_watchdog - Timer Call-back
3659 * @data: pointer to adapter cast into an unsigned long
3660 **/
3661 static void igb_watchdog(unsigned long data)
3662 {
3663 struct igb_adapter *adapter = (struct igb_adapter *)data;
3664 /* Do the rest outside of interrupt context */
3665 schedule_work(&adapter->watchdog_task);
3666 }
3667
3668 static void igb_watchdog_task(struct work_struct *work)
3669 {
3670 struct igb_adapter *adapter = container_of(work,
3671 struct igb_adapter,
3672 watchdog_task);
3673 struct e1000_hw *hw = &adapter->hw;
3674 struct net_device *netdev = adapter->netdev;
3675 u32 link;
3676 int i;
3677
3678 link = igb_has_link(adapter);
3679 if (link) {
3680 if (!netif_carrier_ok(netdev)) {
3681 u32 ctrl;
3682 hw->mac.ops.get_speed_and_duplex(hw,
3683 &adapter->link_speed,
3684 &adapter->link_duplex);
3685
3686 ctrl = rd32(E1000_CTRL);
3687 /* Links status message must follow this format */
3688 printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3689 "Flow Control: %s\n",
3690 netdev->name,
3691 adapter->link_speed,
3692 adapter->link_duplex == FULL_DUPLEX ?
3693 "Full Duplex" : "Half Duplex",
3694 ((ctrl & E1000_CTRL_TFCE) &&
3695 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3696 ((ctrl & E1000_CTRL_RFCE) ? "RX" :
3697 ((ctrl & E1000_CTRL_TFCE) ? "TX" : "None")));
3698
3699 /* check for thermal sensor event */
3700 if (igb_thermal_sensor_event(hw, E1000_THSTAT_LINK_THROTTLE)) {
3701 printk(KERN_INFO "igb: %s The network adapter "
3702 "link speed was downshifted "
3703 "because it overheated.\n",
3704 netdev->name);
3705 }
3706
3707 /* adjust timeout factor according to speed/duplex */
3708 adapter->tx_timeout_factor = 1;
3709 switch (adapter->link_speed) {
3710 case SPEED_10:
3711 adapter->tx_timeout_factor = 14;
3712 break;
3713 case SPEED_100:
3714 /* maybe add some timeout factor ? */
3715 break;
3716 }
3717
3718 netif_carrier_on(netdev);
3719
3720 igb_ping_all_vfs(adapter);
3721 igb_check_vf_rate_limit(adapter);
3722
3723 /* link state has changed, schedule phy info update */
3724 if (!test_bit(__IGB_DOWN, &adapter->state))
3725 mod_timer(&adapter->phy_info_timer,
3726 round_jiffies(jiffies + 2 * HZ));
3727 }
3728 } else {
3729 if (netif_carrier_ok(netdev)) {
3730 adapter->link_speed = 0;
3731 adapter->link_duplex = 0;
3732
3733 /* check for thermal sensor event */
3734 if (igb_thermal_sensor_event(hw, E1000_THSTAT_PWR_DOWN)) {
3735 printk(KERN_ERR "igb: %s The network adapter "
3736 "was stopped because it "
3737 "overheated.\n",
3738 netdev->name);
3739 }
3740
3741 /* Links status message must follow this format */
3742 printk(KERN_INFO "igb: %s NIC Link is Down\n",
3743 netdev->name);
3744 netif_carrier_off(netdev);
3745
3746 igb_ping_all_vfs(adapter);
3747
3748 /* link state has changed, schedule phy info update */
3749 if (!test_bit(__IGB_DOWN, &adapter->state))
3750 mod_timer(&adapter->phy_info_timer,
3751 round_jiffies(jiffies + 2 * HZ));
3752 }
3753 }
3754
3755 spin_lock(&adapter->stats64_lock);
3756 igb_update_stats(adapter, &adapter->stats64);
3757 spin_unlock(&adapter->stats64_lock);
3758
3759 for (i = 0; i < adapter->num_tx_queues; i++) {
3760 struct igb_ring *tx_ring = adapter->tx_ring[i];
3761 if (!netif_carrier_ok(netdev)) {
3762 /* We've lost link, so the controller stops DMA,
3763 * but we've got queued Tx work that's never going
3764 * to get done, so reset controller to flush Tx.
3765 * (Do the reset outside of interrupt context). */
3766 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3767 adapter->tx_timeout_count++;
3768 schedule_work(&adapter->reset_task);
3769 /* return immediately since reset is imminent */
3770 return;
3771 }
3772 }
3773
3774 /* Force detection of hung controller every watchdog period */
3775 tx_ring->detect_tx_hung = true;
3776 }
3777
3778 /* Cause software interrupt to ensure rx ring is cleaned */
3779 if (adapter->msix_entries) {
3780 u32 eics = 0;
3781 for (i = 0; i < adapter->num_q_vectors; i++) {
3782 struct igb_q_vector *q_vector = adapter->q_vector[i];
3783 eics |= q_vector->eims_value;
3784 }
3785 wr32(E1000_EICS, eics);
3786 } else {
3787 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3788 }
3789
3790 igb_spoof_check(adapter);
3791
3792 /* Reset the timer */
3793 if (!test_bit(__IGB_DOWN, &adapter->state))
3794 mod_timer(&adapter->watchdog_timer,
3795 round_jiffies(jiffies + 2 * HZ));
3796 }
3797
3798 enum latency_range {
3799 lowest_latency = 0,
3800 low_latency = 1,
3801 bulk_latency = 2,
3802 latency_invalid = 255
3803 };
3804
3805 /**
3806 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3807 *
3808 * Stores a new ITR value based on strictly on packet size. This
3809 * algorithm is less sophisticated than that used in igb_update_itr,
3810 * due to the difficulty of synchronizing statistics across multiple
3811 * receive rings. The divisors and thresholds used by this function
3812 * were determined based on theoretical maximum wire speed and testing
3813 * data, in order to minimize response time while increasing bulk
3814 * throughput.
3815 * This functionality is controlled by the InterruptThrottleRate module
3816 * parameter (see igb_param.c)
3817 * NOTE: This function is called only when operating in a multiqueue
3818 * receive environment.
3819 * @q_vector: pointer to q_vector
3820 **/
3821 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3822 {
3823 int new_val = q_vector->itr_val;
3824 int avg_wire_size = 0;
3825 struct igb_adapter *adapter = q_vector->adapter;
3826 struct igb_ring *ring;
3827 unsigned int packets;
3828
3829 /* For non-gigabit speeds, just fix the interrupt rate at 4000
3830 * ints/sec - ITR timer value of 120 ticks.
3831 */
3832 if (adapter->link_speed != SPEED_1000) {
3833 new_val = 976;
3834 goto set_itr_val;
3835 }
3836
3837 ring = q_vector->rx_ring;
3838 if (ring) {
3839 packets = ACCESS_ONCE(ring->total_packets);
3840
3841 if (packets)
3842 avg_wire_size = ring->total_bytes / packets;
3843 }
3844
3845 ring = q_vector->tx_ring;
3846 if (ring) {
3847 packets = ACCESS_ONCE(ring->total_packets);
3848
3849 if (packets)
3850 avg_wire_size = max_t(u32, avg_wire_size,
3851 ring->total_bytes / packets);
3852 }
3853
3854 /* if avg_wire_size isn't set no work was done */
3855 if (!avg_wire_size)
3856 goto clear_counts;
3857
3858 /* Add 24 bytes to size to account for CRC, preamble, and gap */
3859 avg_wire_size += 24;
3860
3861 /* Don't starve jumbo frames */
3862 avg_wire_size = min(avg_wire_size, 3000);
3863
3864 /* Give a little boost to mid-size frames */
3865 if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3866 new_val = avg_wire_size / 3;
3867 else
3868 new_val = avg_wire_size / 2;
3869
3870 /* when in itr mode 3 do not exceed 20K ints/sec */
3871 if (adapter->rx_itr_setting == 3 && new_val < 196)
3872 new_val = 196;
3873
3874 set_itr_val:
3875 if (new_val != q_vector->itr_val) {
3876 q_vector->itr_val = new_val;
3877 q_vector->set_itr = 1;
3878 }
3879 clear_counts:
3880 if (q_vector->rx_ring) {
3881 q_vector->rx_ring->total_bytes = 0;
3882 q_vector->rx_ring->total_packets = 0;
3883 }
3884 if (q_vector->tx_ring) {
3885 q_vector->tx_ring->total_bytes = 0;
3886 q_vector->tx_ring->total_packets = 0;
3887 }
3888 }
3889
3890 /**
3891 * igb_update_itr - update the dynamic ITR value based on statistics
3892 * Stores a new ITR value based on packets and byte
3893 * counts during the last interrupt. The advantage of per interrupt
3894 * computation is faster updates and more accurate ITR for the current
3895 * traffic pattern. Constants in this function were computed
3896 * based on theoretical maximum wire speed and thresholds were set based
3897 * on testing data as well as attempting to minimize response time
3898 * while increasing bulk throughput.
3899 * this functionality is controlled by the InterruptThrottleRate module
3900 * parameter (see igb_param.c)
3901 * NOTE: These calculations are only valid when operating in a single-
3902 * queue environment.
3903 * @adapter: pointer to adapter
3904 * @itr_setting: current q_vector->itr_val
3905 * @packets: the number of packets during this measurement interval
3906 * @bytes: the number of bytes during this measurement interval
3907 **/
3908 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3909 int packets, int bytes)
3910 {
3911 unsigned int retval = itr_setting;
3912
3913 if (packets == 0)
3914 goto update_itr_done;
3915
3916 switch (itr_setting) {
3917 case lowest_latency:
3918 /* handle TSO and jumbo frames */
3919 if (bytes/packets > 8000)
3920 retval = bulk_latency;
3921 else if ((packets < 5) && (bytes > 512))
3922 retval = low_latency;
3923 break;
3924 case low_latency: /* 50 usec aka 20000 ints/s */
3925 if (bytes > 10000) {
3926 /* this if handles the TSO accounting */
3927 if (bytes/packets > 8000) {
3928 retval = bulk_latency;
3929 } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3930 retval = bulk_latency;
3931 } else if ((packets > 35)) {
3932 retval = lowest_latency;
3933 }
3934 } else if (bytes/packets > 2000) {
3935 retval = bulk_latency;
3936 } else if (packets <= 2 && bytes < 512) {
3937 retval = lowest_latency;
3938 }
3939 break;
3940 case bulk_latency: /* 250 usec aka 4000 ints/s */
3941 if (bytes > 25000) {
3942 if (packets > 35)
3943 retval = low_latency;
3944 } else if (bytes < 1500) {
3945 retval = low_latency;
3946 }
3947 break;
3948 }
3949
3950 update_itr_done:
3951 return retval;
3952 }
3953
3954 static void igb_set_itr(struct igb_adapter *adapter)
3955 {
3956 struct igb_q_vector *q_vector = adapter->q_vector[0];
3957 u16 current_itr;
3958 u32 new_itr = q_vector->itr_val;
3959
3960 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3961 if (adapter->link_speed != SPEED_1000) {
3962 current_itr = 0;
3963 new_itr = 4000;
3964 goto set_itr_now;
3965 }
3966
3967 adapter->rx_itr = igb_update_itr(adapter,
3968 adapter->rx_itr,
3969 q_vector->rx_ring->total_packets,
3970 q_vector->rx_ring->total_bytes);
3971
3972 adapter->tx_itr = igb_update_itr(adapter,
3973 adapter->tx_itr,
3974 q_vector->tx_ring->total_packets,
3975 q_vector->tx_ring->total_bytes);
3976 current_itr = max(adapter->rx_itr, adapter->tx_itr);
3977
3978 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3979 if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3980 current_itr = low_latency;
3981
3982 switch (current_itr) {
3983 /* counts and packets in update_itr are dependent on these numbers */
3984 case lowest_latency:
3985 new_itr = 56; /* aka 70,000 ints/sec */
3986 break;
3987 case low_latency:
3988 new_itr = 196; /* aka 20,000 ints/sec */
3989 break;
3990 case bulk_latency:
3991 new_itr = 980; /* aka 4,000 ints/sec */
3992 break;
3993 default:
3994 break;
3995 }
3996
3997 set_itr_now:
3998 q_vector->rx_ring->total_bytes = 0;
3999 q_vector->rx_ring->total_packets = 0;
4000 q_vector->tx_ring->total_bytes = 0;
4001 q_vector->tx_ring->total_packets = 0;
4002
4003 if (new_itr != q_vector->itr_val) {
4004 /* this attempts to bias the interrupt rate towards Bulk
4005 * by adding intermediate steps when interrupt rate is
4006 * increasing */
4007 new_itr = new_itr > q_vector->itr_val ?
4008 max((new_itr * q_vector->itr_val) /
4009 (new_itr + (q_vector->itr_val >> 2)),
4010 new_itr) :
4011 new_itr;
4012 /* Don't write the value here; it resets the adapter's
4013 * internal timer, and causes us to delay far longer than
4014 * we should between interrupts. Instead, we write the ITR
4015 * value at the beginning of the next interrupt so the timing
4016 * ends up being correct.
4017 */
4018 q_vector->itr_val = new_itr;
4019 q_vector->set_itr = 1;
4020 }
4021 }
4022
4023 void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
4024 u32 type_tucmd, u32 mss_l4len_idx)
4025 {
4026 struct e1000_adv_tx_context_desc *context_desc;
4027 u16 i = tx_ring->next_to_use;
4028
4029 context_desc = IGB_TX_CTXTDESC(tx_ring, i);
4030
4031 i++;
4032 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
4033
4034 /* set bits to identify this as an advanced context descriptor */
4035 type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
4036
4037 /* For 82575, context index must be unique per ring. */
4038 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
4039 mss_l4len_idx |= tx_ring->reg_idx << 4;
4040
4041 context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens);
4042 context_desc->seqnum_seed = 0;
4043 context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd);
4044 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
4045 }
4046
4047 static int igb_tso(struct igb_ring *tx_ring,
4048 struct igb_tx_buffer *first,
4049 u8 *hdr_len)
4050 {
4051 struct sk_buff *skb = first->skb;
4052 u32 vlan_macip_lens, type_tucmd;
4053 u32 mss_l4len_idx, l4len;
4054
4055 if (!skb_is_gso(skb))
4056 return 0;
4057
4058 if (skb_header_cloned(skb)) {
4059 int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
4060 if (err)
4061 return err;
4062 }
4063
4064 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4065 type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
4066
4067 if (first->protocol == __constant_htons(ETH_P_IP)) {
4068 struct iphdr *iph = ip_hdr(skb);
4069 iph->tot_len = 0;
4070 iph->check = 0;
4071 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4072 iph->daddr, 0,
4073 IPPROTO_TCP,
4074 0);
4075 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4076 first->tx_flags |= IGB_TX_FLAGS_TSO |
4077 IGB_TX_FLAGS_CSUM |
4078 IGB_TX_FLAGS_IPV4;
4079 } else if (skb_is_gso_v6(skb)) {
4080 ipv6_hdr(skb)->payload_len = 0;
4081 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4082 &ipv6_hdr(skb)->daddr,
4083 0, IPPROTO_TCP, 0);
4084 first->tx_flags |= IGB_TX_FLAGS_TSO |
4085 IGB_TX_FLAGS_CSUM;
4086 }
4087
4088 /* compute header lengths */
4089 l4len = tcp_hdrlen(skb);
4090 *hdr_len = skb_transport_offset(skb) + l4len;
4091
4092 /* update gso size and bytecount with header size */
4093 first->gso_segs = skb_shinfo(skb)->gso_segs;
4094 first->bytecount += (first->gso_segs - 1) * *hdr_len;
4095
4096 /* MSS L4LEN IDX */
4097 mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
4098 mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4099
4100 /* VLAN MACLEN IPLEN */
4101 vlan_macip_lens = skb_network_header_len(skb);
4102 vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4103 vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4104
4105 igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4106
4107 return 1;
4108 }
4109
4110 static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
4111 {
4112 struct sk_buff *skb = first->skb;
4113 u32 vlan_macip_lens = 0;
4114 u32 mss_l4len_idx = 0;
4115 u32 type_tucmd = 0;
4116
4117 if (skb->ip_summed != CHECKSUM_PARTIAL) {
4118 if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
4119 return;
4120 } else {
4121 u8 l4_hdr = 0;
4122 switch (first->protocol) {
4123 case __constant_htons(ETH_P_IP):
4124 vlan_macip_lens |= skb_network_header_len(skb);
4125 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4126 l4_hdr = ip_hdr(skb)->protocol;
4127 break;
4128 case __constant_htons(ETH_P_IPV6):
4129 vlan_macip_lens |= skb_network_header_len(skb);
4130 l4_hdr = ipv6_hdr(skb)->nexthdr;
4131 break;
4132 default:
4133 if (unlikely(net_ratelimit())) {
4134 dev_warn(tx_ring->dev,
4135 "partial checksum but proto=%x!\n",
4136 first->protocol);
4137 }
4138 break;
4139 }
4140
4141 switch (l4_hdr) {
4142 case IPPROTO_TCP:
4143 type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4144 mss_l4len_idx = tcp_hdrlen(skb) <<
4145 E1000_ADVTXD_L4LEN_SHIFT;
4146 break;
4147 case IPPROTO_SCTP:
4148 type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4149 mss_l4len_idx = sizeof(struct sctphdr) <<
4150 E1000_ADVTXD_L4LEN_SHIFT;
4151 break;
4152 case IPPROTO_UDP:
4153 mss_l4len_idx = sizeof(struct udphdr) <<
4154 E1000_ADVTXD_L4LEN_SHIFT;
4155 break;
4156 default:
4157 if (unlikely(net_ratelimit())) {
4158 dev_warn(tx_ring->dev,
4159 "partial checksum but l4 proto=%x!\n",
4160 l4_hdr);
4161 }
4162 break;
4163 }
4164
4165 /* update TX checksum flag */
4166 first->tx_flags |= IGB_TX_FLAGS_CSUM;
4167 }
4168
4169 vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4170 vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4171
4172 igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4173 }
4174
4175 static __le32 igb_tx_cmd_type(u32 tx_flags)
4176 {
4177 /* set type for advanced descriptor with frame checksum insertion */
4178 __le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA |
4179 E1000_ADVTXD_DCMD_IFCS |
4180 E1000_ADVTXD_DCMD_DEXT);
4181
4182 /* set HW vlan bit if vlan is present */
4183 if (tx_flags & IGB_TX_FLAGS_VLAN)
4184 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE);
4185
4186 /* set timestamp bit if present */
4187 if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4188 cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP);
4189
4190 /* set segmentation bits for TSO */
4191 if (tx_flags & IGB_TX_FLAGS_TSO)
4192 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE);
4193
4194 return cmd_type;
4195 }
4196
4197 static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
4198 union e1000_adv_tx_desc *tx_desc,
4199 u32 tx_flags, unsigned int paylen)
4200 {
4201 u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
4202
4203 /* 82575 requires a unique index per ring if any offload is enabled */
4204 if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) &&
4205 (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX))
4206 olinfo_status |= tx_ring->reg_idx << 4;
4207
4208 /* insert L4 checksum */
4209 if (tx_flags & IGB_TX_FLAGS_CSUM) {
4210 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4211
4212 /* insert IPv4 checksum */
4213 if (tx_flags & IGB_TX_FLAGS_IPV4)
4214 olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4215 }
4216
4217 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4218 }
4219
4220 /*
4221 * The largest size we can write to the descriptor is 65535. In order to
4222 * maintain a power of two alignment we have to limit ourselves to 32K.
4223 */
4224 #define IGB_MAX_TXD_PWR 15
4225 #define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR)
4226
4227 static void igb_tx_map(struct igb_ring *tx_ring,
4228 struct igb_tx_buffer *first,
4229 const u8 hdr_len)
4230 {
4231 struct sk_buff *skb = first->skb;
4232 struct igb_tx_buffer *tx_buffer_info;
4233 union e1000_adv_tx_desc *tx_desc;
4234 dma_addr_t dma;
4235 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
4236 unsigned int data_len = skb->data_len;
4237 unsigned int size = skb_headlen(skb);
4238 unsigned int paylen = skb->len - hdr_len;
4239 __le32 cmd_type;
4240 u32 tx_flags = first->tx_flags;
4241 u16 i = tx_ring->next_to_use;
4242
4243 tx_desc = IGB_TX_DESC(tx_ring, i);
4244
4245 igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen);
4246 cmd_type = igb_tx_cmd_type(tx_flags);
4247
4248 dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4249 if (dma_mapping_error(tx_ring->dev, dma))
4250 goto dma_error;
4251
4252 /* record length, and DMA address */
4253 first->length = size;
4254 first->dma = dma;
4255 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4256
4257 for (;;) {
4258 while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
4259 tx_desc->read.cmd_type_len =
4260 cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD);
4261
4262 i++;
4263 tx_desc++;
4264 if (i == tx_ring->count) {
4265 tx_desc = IGB_TX_DESC(tx_ring, 0);
4266 i = 0;
4267 }
4268
4269 dma += IGB_MAX_DATA_PER_TXD;
4270 size -= IGB_MAX_DATA_PER_TXD;
4271
4272 tx_desc->read.olinfo_status = 0;
4273 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4274 }
4275
4276 if (likely(!data_len))
4277 break;
4278
4279 tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
4280
4281 i++;
4282 tx_desc++;
4283 if (i == tx_ring->count) {
4284 tx_desc = IGB_TX_DESC(tx_ring, 0);
4285 i = 0;
4286 }
4287
4288 size = frag->size;
4289 data_len -= size;
4290
4291 dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
4292 size, DMA_TO_DEVICE);
4293 if (dma_mapping_error(tx_ring->dev, dma))
4294 goto dma_error;
4295
4296 tx_buffer_info = &tx_ring->tx_buffer_info[i];
4297 tx_buffer_info->length = size;
4298 tx_buffer_info->dma = dma;
4299
4300 tx_desc->read.olinfo_status = 0;
4301 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4302
4303 frag++;
4304 }
4305
4306 /* write last descriptor with RS and EOP bits */
4307 cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD);
4308 tx_desc->read.cmd_type_len = cmd_type;
4309
4310 /* set the timestamp */
4311 first->time_stamp = jiffies;
4312
4313 /*
4314 * Force memory writes to complete before letting h/w know there
4315 * are new descriptors to fetch. (Only applicable for weak-ordered
4316 * memory model archs, such as IA-64).
4317 *
4318 * We also need this memory barrier to make certain all of the
4319 * status bits have been updated before next_to_watch is written.
4320 */
4321 wmb();
4322
4323 /* set next_to_watch value indicating a packet is present */
4324 first->next_to_watch = tx_desc;
4325
4326 i++;
4327 if (i == tx_ring->count)
4328 i = 0;
4329
4330 tx_ring->next_to_use = i;
4331
4332 writel(i, tx_ring->tail);
4333
4334 /* we need this if more than one processor can write to our tail
4335 * at a time, it syncronizes IO on IA64/Altix systems */
4336 mmiowb();
4337
4338 return;
4339
4340 dma_error:
4341 dev_err(tx_ring->dev, "TX DMA map failed\n");
4342
4343 /* clear dma mappings for failed tx_buffer_info map */
4344 for (;;) {
4345 tx_buffer_info = &tx_ring->tx_buffer_info[i];
4346 igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
4347 if (tx_buffer_info == first)
4348 break;
4349 if (i == 0)
4350 i = tx_ring->count;
4351 i--;
4352 }
4353
4354 tx_ring->next_to_use = i;
4355 }
4356
4357 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4358 {
4359 struct net_device *netdev = tx_ring->netdev;
4360
4361 netif_stop_subqueue(netdev, tx_ring->queue_index);
4362
4363 /* Herbert's original patch had:
4364 * smp_mb__after_netif_stop_queue();
4365 * but since that doesn't exist yet, just open code it. */
4366 smp_mb();
4367
4368 /* We need to check again in a case another CPU has just
4369 * made room available. */
4370 if (igb_desc_unused(tx_ring) < size)
4371 return -EBUSY;
4372
4373 /* A reprieve! */
4374 netif_wake_subqueue(netdev, tx_ring->queue_index);
4375
4376 u64_stats_update_begin(&tx_ring->tx_syncp2);
4377 tx_ring->tx_stats.restart_queue2++;
4378 u64_stats_update_end(&tx_ring->tx_syncp2);
4379
4380 return 0;
4381 }
4382
4383 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4384 {
4385 if (igb_desc_unused(tx_ring) >= size)
4386 return 0;
4387 return __igb_maybe_stop_tx(tx_ring, size);
4388 }
4389
4390 netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4391 struct igb_ring *tx_ring)
4392 {
4393 struct igb_tx_buffer *first;
4394 int tso;
4395 u32 tx_flags = 0;
4396 __be16 protocol = vlan_get_protocol(skb);
4397 u8 hdr_len = 0;
4398
4399 /* need: 1 descriptor per page,
4400 * + 2 desc gap to keep tail from touching head,
4401 * + 1 desc for skb->data,
4402 * + 1 desc for context descriptor,
4403 * otherwise try next time */
4404 if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4405 /* this is a hard error */
4406 return NETDEV_TX_BUSY;
4407 }
4408
4409 /* record the location of the first descriptor for this packet */
4410 first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
4411 first->skb = skb;
4412 first->bytecount = skb->len;
4413 first->gso_segs = 1;
4414
4415 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4416 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4417 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4418 }
4419
4420 if (vlan_tx_tag_present(skb)) {
4421 tx_flags |= IGB_TX_FLAGS_VLAN;
4422 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4423 }
4424
4425 /* record initial flags and protocol */
4426 first->tx_flags = tx_flags;
4427 first->protocol = protocol;
4428
4429 tso = igb_tso(tx_ring, first, &hdr_len);
4430 if (tso < 0)
4431 goto out_drop;
4432 else if (!tso)
4433 igb_tx_csum(tx_ring, first);
4434
4435 igb_tx_map(tx_ring, first, hdr_len);
4436
4437 /* Make sure there is space in the ring for the next send. */
4438 igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4439
4440 return NETDEV_TX_OK;
4441
4442 out_drop:
4443 igb_unmap_and_free_tx_resource(tx_ring, first);
4444
4445 return NETDEV_TX_OK;
4446 }
4447
4448 static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4449 struct sk_buff *skb)
4450 {
4451 unsigned int r_idx = skb->queue_mapping;
4452
4453 if (r_idx >= adapter->num_tx_queues)
4454 r_idx = r_idx % adapter->num_tx_queues;
4455
4456 return adapter->tx_ring[r_idx];
4457 }
4458
4459 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4460 struct net_device *netdev)
4461 {
4462 struct igb_adapter *adapter = netdev_priv(netdev);
4463
4464 if (test_bit(__IGB_DOWN, &adapter->state)) {
4465 dev_kfree_skb_any(skb);
4466 return NETDEV_TX_OK;
4467 }
4468
4469 if (skb->len <= 0) {
4470 dev_kfree_skb_any(skb);
4471 return NETDEV_TX_OK;
4472 }
4473
4474 /*
4475 * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4476 * in order to meet this minimum size requirement.
4477 */
4478 if (skb->len < 17) {
4479 if (skb_padto(skb, 17))
4480 return NETDEV_TX_OK;
4481 skb->len = 17;
4482 }
4483
4484 return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4485 }
4486
4487 /**
4488 * igb_tx_timeout - Respond to a Tx Hang
4489 * @netdev: network interface device structure
4490 **/
4491 static void igb_tx_timeout(struct net_device *netdev)
4492 {
4493 struct igb_adapter *adapter = netdev_priv(netdev);
4494 struct e1000_hw *hw = &adapter->hw;
4495
4496 /* Do the reset outside of interrupt context */
4497 adapter->tx_timeout_count++;
4498
4499 if (hw->mac.type == e1000_82580)
4500 hw->dev_spec._82575.global_device_reset = true;
4501
4502 schedule_work(&adapter->reset_task);
4503 wr32(E1000_EICS,
4504 (adapter->eims_enable_mask & ~adapter->eims_other));
4505 }
4506
4507 static void igb_reset_task(struct work_struct *work)
4508 {
4509 struct igb_adapter *adapter;
4510 adapter = container_of(work, struct igb_adapter, reset_task);
4511
4512 igb_dump(adapter);
4513 netdev_err(adapter->netdev, "Reset adapter\n");
4514 igb_reinit_locked(adapter);
4515 }
4516
4517 /**
4518 * igb_get_stats64 - Get System Network Statistics
4519 * @netdev: network interface device structure
4520 * @stats: rtnl_link_stats64 pointer
4521 *
4522 **/
4523 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4524 struct rtnl_link_stats64 *stats)
4525 {
4526 struct igb_adapter *adapter = netdev_priv(netdev);
4527
4528 spin_lock(&adapter->stats64_lock);
4529 igb_update_stats(adapter, &adapter->stats64);
4530 memcpy(stats, &adapter->stats64, sizeof(*stats));
4531 spin_unlock(&adapter->stats64_lock);
4532
4533 return stats;
4534 }
4535
4536 /**
4537 * igb_change_mtu - Change the Maximum Transfer Unit
4538 * @netdev: network interface device structure
4539 * @new_mtu: new value for maximum frame size
4540 *
4541 * Returns 0 on success, negative on failure
4542 **/
4543 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4544 {
4545 struct igb_adapter *adapter = netdev_priv(netdev);
4546 struct pci_dev *pdev = adapter->pdev;
4547 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4548
4549 if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4550 dev_err(&pdev->dev, "Invalid MTU setting\n");
4551 return -EINVAL;
4552 }
4553
4554 #define MAX_STD_JUMBO_FRAME_SIZE 9238
4555 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4556 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4557 return -EINVAL;
4558 }
4559
4560 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4561 msleep(1);
4562
4563 /* igb_down has a dependency on max_frame_size */
4564 adapter->max_frame_size = max_frame;
4565
4566 if (netif_running(netdev))
4567 igb_down(adapter);
4568
4569 dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4570 netdev->mtu, new_mtu);
4571 netdev->mtu = new_mtu;
4572
4573 if (netif_running(netdev))
4574 igb_up(adapter);
4575 else
4576 igb_reset(adapter);
4577
4578 clear_bit(__IGB_RESETTING, &adapter->state);
4579
4580 return 0;
4581 }
4582
4583 /**
4584 * igb_update_stats - Update the board statistics counters
4585 * @adapter: board private structure
4586 **/
4587
4588 void igb_update_stats(struct igb_adapter *adapter,
4589 struct rtnl_link_stats64 *net_stats)
4590 {
4591 struct e1000_hw *hw = &adapter->hw;
4592 struct pci_dev *pdev = adapter->pdev;
4593 u32 reg, mpc;
4594 u16 phy_tmp;
4595 int i;
4596 u64 bytes, packets;
4597 unsigned int start;
4598 u64 _bytes, _packets;
4599
4600 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4601
4602 /*
4603 * Prevent stats update while adapter is being reset, or if the pci
4604 * connection is down.
4605 */
4606 if (adapter->link_speed == 0)
4607 return;
4608 if (pci_channel_offline(pdev))
4609 return;
4610
4611 bytes = 0;
4612 packets = 0;
4613 for (i = 0; i < adapter->num_rx_queues; i++) {
4614 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4615 struct igb_ring *ring = adapter->rx_ring[i];
4616
4617 ring->rx_stats.drops += rqdpc_tmp;
4618 net_stats->rx_fifo_errors += rqdpc_tmp;
4619
4620 do {
4621 start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4622 _bytes = ring->rx_stats.bytes;
4623 _packets = ring->rx_stats.packets;
4624 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4625 bytes += _bytes;
4626 packets += _packets;
4627 }
4628
4629 net_stats->rx_bytes = bytes;
4630 net_stats->rx_packets = packets;
4631
4632 bytes = 0;
4633 packets = 0;
4634 for (i = 0; i < adapter->num_tx_queues; i++) {
4635 struct igb_ring *ring = adapter->tx_ring[i];
4636 do {
4637 start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4638 _bytes = ring->tx_stats.bytes;
4639 _packets = ring->tx_stats.packets;
4640 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4641 bytes += _bytes;
4642 packets += _packets;
4643 }
4644 net_stats->tx_bytes = bytes;
4645 net_stats->tx_packets = packets;
4646
4647 /* read stats registers */
4648 adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4649 adapter->stats.gprc += rd32(E1000_GPRC);
4650 adapter->stats.gorc += rd32(E1000_GORCL);
4651 rd32(E1000_GORCH); /* clear GORCL */
4652 adapter->stats.bprc += rd32(E1000_BPRC);
4653 adapter->stats.mprc += rd32(E1000_MPRC);
4654 adapter->stats.roc += rd32(E1000_ROC);
4655
4656 adapter->stats.prc64 += rd32(E1000_PRC64);
4657 adapter->stats.prc127 += rd32(E1000_PRC127);
4658 adapter->stats.prc255 += rd32(E1000_PRC255);
4659 adapter->stats.prc511 += rd32(E1000_PRC511);
4660 adapter->stats.prc1023 += rd32(E1000_PRC1023);
4661 adapter->stats.prc1522 += rd32(E1000_PRC1522);
4662 adapter->stats.symerrs += rd32(E1000_SYMERRS);
4663 adapter->stats.sec += rd32(E1000_SEC);
4664
4665 mpc = rd32(E1000_MPC);
4666 adapter->stats.mpc += mpc;
4667 net_stats->rx_fifo_errors += mpc;
4668 adapter->stats.scc += rd32(E1000_SCC);
4669 adapter->stats.ecol += rd32(E1000_ECOL);
4670 adapter->stats.mcc += rd32(E1000_MCC);
4671 adapter->stats.latecol += rd32(E1000_LATECOL);
4672 adapter->stats.dc += rd32(E1000_DC);
4673 adapter->stats.rlec += rd32(E1000_RLEC);
4674 adapter->stats.xonrxc += rd32(E1000_XONRXC);
4675 adapter->stats.xontxc += rd32(E1000_XONTXC);
4676 adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4677 adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4678 adapter->stats.fcruc += rd32(E1000_FCRUC);
4679 adapter->stats.gptc += rd32(E1000_GPTC);
4680 adapter->stats.gotc += rd32(E1000_GOTCL);
4681 rd32(E1000_GOTCH); /* clear GOTCL */
4682 adapter->stats.rnbc += rd32(E1000_RNBC);
4683 adapter->stats.ruc += rd32(E1000_RUC);
4684 adapter->stats.rfc += rd32(E1000_RFC);
4685 adapter->stats.rjc += rd32(E1000_RJC);
4686 adapter->stats.tor += rd32(E1000_TORH);
4687 adapter->stats.tot += rd32(E1000_TOTH);
4688 adapter->stats.tpr += rd32(E1000_TPR);
4689
4690 adapter->stats.ptc64 += rd32(E1000_PTC64);
4691 adapter->stats.ptc127 += rd32(E1000_PTC127);
4692 adapter->stats.ptc255 += rd32(E1000_PTC255);
4693 adapter->stats.ptc511 += rd32(E1000_PTC511);
4694 adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4695 adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4696
4697 adapter->stats.mptc += rd32(E1000_MPTC);
4698 adapter->stats.bptc += rd32(E1000_BPTC);
4699
4700 adapter->stats.tpt += rd32(E1000_TPT);
4701 adapter->stats.colc += rd32(E1000_COLC);
4702
4703 adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4704 /* read internal phy specific stats */
4705 reg = rd32(E1000_CTRL_EXT);
4706 if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4707 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4708 adapter->stats.tncrs += rd32(E1000_TNCRS);
4709 }
4710
4711 adapter->stats.tsctc += rd32(E1000_TSCTC);
4712 adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4713
4714 adapter->stats.iac += rd32(E1000_IAC);
4715 adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4716 adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4717 adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4718 adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4719 adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4720 adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4721 adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4722 adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4723
4724 /* Fill out the OS statistics structure */
4725 net_stats->multicast = adapter->stats.mprc;
4726 net_stats->collisions = adapter->stats.colc;
4727
4728 /* Rx Errors */
4729
4730 /* RLEC on some newer hardware can be incorrect so build
4731 * our own version based on RUC and ROC */
4732 net_stats->rx_errors = adapter->stats.rxerrc +
4733 adapter->stats.crcerrs + adapter->stats.algnerrc +
4734 adapter->stats.ruc + adapter->stats.roc +
4735 adapter->stats.cexterr;
4736 net_stats->rx_length_errors = adapter->stats.ruc +
4737 adapter->stats.roc;
4738 net_stats->rx_crc_errors = adapter->stats.crcerrs;
4739 net_stats->rx_frame_errors = adapter->stats.algnerrc;
4740 net_stats->rx_missed_errors = adapter->stats.mpc;
4741
4742 /* Tx Errors */
4743 net_stats->tx_errors = adapter->stats.ecol +
4744 adapter->stats.latecol;
4745 net_stats->tx_aborted_errors = adapter->stats.ecol;
4746 net_stats->tx_window_errors = adapter->stats.latecol;
4747 net_stats->tx_carrier_errors = adapter->stats.tncrs;
4748
4749 /* Tx Dropped needs to be maintained elsewhere */
4750
4751 /* Phy Stats */
4752 if (hw->phy.media_type == e1000_media_type_copper) {
4753 if ((adapter->link_speed == SPEED_1000) &&
4754 (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4755 phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4756 adapter->phy_stats.idle_errors += phy_tmp;
4757 }
4758 }
4759
4760 /* Management Stats */
4761 adapter->stats.mgptc += rd32(E1000_MGTPTC);
4762 adapter->stats.mgprc += rd32(E1000_MGTPRC);
4763 adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4764
4765 /* OS2BMC Stats */
4766 reg = rd32(E1000_MANC);
4767 if (reg & E1000_MANC_EN_BMC2OS) {
4768 adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4769 adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4770 adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4771 adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4772 }
4773 }
4774
4775 static irqreturn_t igb_msix_other(int irq, void *data)
4776 {
4777 struct igb_adapter *adapter = data;
4778 struct e1000_hw *hw = &adapter->hw;
4779 u32 icr = rd32(E1000_ICR);
4780 /* reading ICR causes bit 31 of EICR to be cleared */
4781
4782 if (icr & E1000_ICR_DRSTA)
4783 schedule_work(&adapter->reset_task);
4784
4785 if (icr & E1000_ICR_DOUTSYNC) {
4786 /* HW is reporting DMA is out of sync */
4787 adapter->stats.doosync++;
4788 /* The DMA Out of Sync is also indication of a spoof event
4789 * in IOV mode. Check the Wrong VM Behavior register to
4790 * see if it is really a spoof event. */
4791 igb_check_wvbr(adapter);
4792 }
4793
4794 /* Check for a mailbox event */
4795 if (icr & E1000_ICR_VMMB)
4796 igb_msg_task(adapter);
4797
4798 if (icr & E1000_ICR_LSC) {
4799 hw->mac.get_link_status = 1;
4800 /* guard against interrupt when we're going down */
4801 if (!test_bit(__IGB_DOWN, &adapter->state))
4802 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4803 }
4804
4805 if (adapter->vfs_allocated_count)
4806 wr32(E1000_IMS, E1000_IMS_LSC |
4807 E1000_IMS_VMMB |
4808 E1000_IMS_DOUTSYNC);
4809 else
4810 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4811 wr32(E1000_EIMS, adapter->eims_other);
4812
4813 return IRQ_HANDLED;
4814 }
4815
4816 static void igb_write_itr(struct igb_q_vector *q_vector)
4817 {
4818 struct igb_adapter *adapter = q_vector->adapter;
4819 u32 itr_val = q_vector->itr_val & 0x7FFC;
4820
4821 if (!q_vector->set_itr)
4822 return;
4823
4824 if (!itr_val)
4825 itr_val = 0x4;
4826
4827 if (adapter->hw.mac.type == e1000_82575)
4828 itr_val |= itr_val << 16;
4829 else
4830 itr_val |= 0x8000000;
4831
4832 writel(itr_val, q_vector->itr_register);
4833 q_vector->set_itr = 0;
4834 }
4835
4836 static irqreturn_t igb_msix_ring(int irq, void *data)
4837 {
4838 struct igb_q_vector *q_vector = data;
4839
4840 /* Write the ITR value calculated from the previous interrupt. */
4841 igb_write_itr(q_vector);
4842
4843 napi_schedule(&q_vector->napi);
4844
4845 return IRQ_HANDLED;
4846 }
4847
4848 #ifdef CONFIG_IGB_DCA
4849 static void igb_update_dca(struct igb_q_vector *q_vector)
4850 {
4851 struct igb_adapter *adapter = q_vector->adapter;
4852 struct e1000_hw *hw = &adapter->hw;
4853 int cpu = get_cpu();
4854
4855 if (q_vector->cpu == cpu)
4856 goto out_no_update;
4857
4858 if (q_vector->tx_ring) {
4859 int q = q_vector->tx_ring->reg_idx;
4860 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4861 if (hw->mac.type == e1000_82575) {
4862 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4863 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4864 } else {
4865 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4866 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4867 E1000_DCA_TXCTRL_CPUID_SHIFT;
4868 }
4869 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4870 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4871 }
4872 if (q_vector->rx_ring) {
4873 int q = q_vector->rx_ring->reg_idx;
4874 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4875 if (hw->mac.type == e1000_82575) {
4876 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4877 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4878 } else {
4879 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4880 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4881 E1000_DCA_RXCTRL_CPUID_SHIFT;
4882 }
4883 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4884 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4885 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4886 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4887 }
4888 q_vector->cpu = cpu;
4889 out_no_update:
4890 put_cpu();
4891 }
4892
4893 static void igb_setup_dca(struct igb_adapter *adapter)
4894 {
4895 struct e1000_hw *hw = &adapter->hw;
4896 int i;
4897
4898 if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4899 return;
4900
4901 /* Always use CB2 mode, difference is masked in the CB driver. */
4902 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4903
4904 for (i = 0; i < adapter->num_q_vectors; i++) {
4905 adapter->q_vector[i]->cpu = -1;
4906 igb_update_dca(adapter->q_vector[i]);
4907 }
4908 }
4909
4910 static int __igb_notify_dca(struct device *dev, void *data)
4911 {
4912 struct net_device *netdev = dev_get_drvdata(dev);
4913 struct igb_adapter *adapter = netdev_priv(netdev);
4914 struct pci_dev *pdev = adapter->pdev;
4915 struct e1000_hw *hw = &adapter->hw;
4916 unsigned long event = *(unsigned long *)data;
4917
4918 switch (event) {
4919 case DCA_PROVIDER_ADD:
4920 /* if already enabled, don't do it again */
4921 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4922 break;
4923 if (dca_add_requester(dev) == 0) {
4924 adapter->flags |= IGB_FLAG_DCA_ENABLED;
4925 dev_info(&pdev->dev, "DCA enabled\n");
4926 igb_setup_dca(adapter);
4927 break;
4928 }
4929 /* Fall Through since DCA is disabled. */
4930 case DCA_PROVIDER_REMOVE:
4931 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4932 /* without this a class_device is left
4933 * hanging around in the sysfs model */
4934 dca_remove_requester(dev);
4935 dev_info(&pdev->dev, "DCA disabled\n");
4936 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4937 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4938 }
4939 break;
4940 }
4941
4942 return 0;
4943 }
4944
4945 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4946 void *p)
4947 {
4948 int ret_val;
4949
4950 ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4951 __igb_notify_dca);
4952
4953 return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4954 }
4955 #endif /* CONFIG_IGB_DCA */
4956
4957 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4958 {
4959 struct e1000_hw *hw = &adapter->hw;
4960 u32 ping;
4961 int i;
4962
4963 for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4964 ping = E1000_PF_CONTROL_MSG;
4965 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4966 ping |= E1000_VT_MSGTYPE_CTS;
4967 igb_write_mbx(hw, &ping, 1, i);
4968 }
4969 }
4970
4971 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4972 {
4973 struct e1000_hw *hw = &adapter->hw;
4974 u32 vmolr = rd32(E1000_VMOLR(vf));
4975 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4976
4977 vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
4978 IGB_VF_FLAG_MULTI_PROMISC);
4979 vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4980
4981 if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4982 vmolr |= E1000_VMOLR_MPME;
4983 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
4984 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4985 } else {
4986 /*
4987 * if we have hashes and we are clearing a multicast promisc
4988 * flag we need to write the hashes to the MTA as this step
4989 * was previously skipped
4990 */
4991 if (vf_data->num_vf_mc_hashes > 30) {
4992 vmolr |= E1000_VMOLR_MPME;
4993 } else if (vf_data->num_vf_mc_hashes) {
4994 int j;
4995 vmolr |= E1000_VMOLR_ROMPE;
4996 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4997 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4998 }
4999 }
5000
5001 wr32(E1000_VMOLR(vf), vmolr);
5002
5003 /* there are flags left unprocessed, likely not supported */
5004 if (*msgbuf & E1000_VT_MSGINFO_MASK)
5005 return -EINVAL;
5006
5007 return 0;
5008
5009 }
5010
5011 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
5012 u32 *msgbuf, u32 vf)
5013 {
5014 int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5015 u16 *hash_list = (u16 *)&msgbuf[1];
5016 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5017 int i;
5018
5019 /* salt away the number of multicast addresses assigned
5020 * to this VF for later use to restore when the PF multi cast
5021 * list changes
5022 */
5023 vf_data->num_vf_mc_hashes = n;
5024
5025 /* only up to 30 hash values supported */
5026 if (n > 30)
5027 n = 30;
5028
5029 /* store the hashes for later use */
5030 for (i = 0; i < n; i++)
5031 vf_data->vf_mc_hashes[i] = hash_list[i];
5032
5033 /* Flush and reset the mta with the new values */
5034 igb_set_rx_mode(adapter->netdev);
5035
5036 return 0;
5037 }
5038
5039 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
5040 {
5041 struct e1000_hw *hw = &adapter->hw;
5042 struct vf_data_storage *vf_data;
5043 int i, j;
5044
5045 for (i = 0; i < adapter->vfs_allocated_count; i++) {
5046 u32 vmolr = rd32(E1000_VMOLR(i));
5047 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5048
5049 vf_data = &adapter->vf_data[i];
5050
5051 if ((vf_data->num_vf_mc_hashes > 30) ||
5052 (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5053 vmolr |= E1000_VMOLR_MPME;
5054 } else if (vf_data->num_vf_mc_hashes) {
5055 vmolr |= E1000_VMOLR_ROMPE;
5056 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5057 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5058 }
5059 wr32(E1000_VMOLR(i), vmolr);
5060 }
5061 }
5062
5063 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5064 {
5065 struct e1000_hw *hw = &adapter->hw;
5066 u32 pool_mask, reg, vid;
5067 int i;
5068
5069 pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5070
5071 /* Find the vlan filter for this id */
5072 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5073 reg = rd32(E1000_VLVF(i));
5074
5075 /* remove the vf from the pool */
5076 reg &= ~pool_mask;
5077
5078 /* if pool is empty then remove entry from vfta */
5079 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5080 (reg & E1000_VLVF_VLANID_ENABLE)) {
5081 reg = 0;
5082 vid = reg & E1000_VLVF_VLANID_MASK;
5083 igb_vfta_set(hw, vid, false);
5084 }
5085
5086 wr32(E1000_VLVF(i), reg);
5087 }
5088
5089 adapter->vf_data[vf].vlans_enabled = 0;
5090 }
5091
5092 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5093 {
5094 struct e1000_hw *hw = &adapter->hw;
5095 u32 reg, i;
5096
5097 /* The vlvf table only exists on 82576 hardware and newer */
5098 if (hw->mac.type < e1000_82576)
5099 return -1;
5100
5101 /* we only need to do this if VMDq is enabled */
5102 if (!adapter->vfs_allocated_count)
5103 return -1;
5104
5105 /* Find the vlan filter for this id */
5106 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5107 reg = rd32(E1000_VLVF(i));
5108 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5109 vid == (reg & E1000_VLVF_VLANID_MASK))
5110 break;
5111 }
5112
5113 if (add) {
5114 if (i == E1000_VLVF_ARRAY_SIZE) {
5115 /* Did not find a matching VLAN ID entry that was
5116 * enabled. Search for a free filter entry, i.e.
5117 * one without the enable bit set
5118 */
5119 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5120 reg = rd32(E1000_VLVF(i));
5121 if (!(reg & E1000_VLVF_VLANID_ENABLE))
5122 break;
5123 }
5124 }
5125 if (i < E1000_VLVF_ARRAY_SIZE) {
5126 /* Found an enabled/available entry */
5127 reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5128
5129 /* if !enabled we need to set this up in vfta */
5130 if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5131 /* add VID to filter table */
5132 igb_vfta_set(hw, vid, true);
5133 reg |= E1000_VLVF_VLANID_ENABLE;
5134 }
5135 reg &= ~E1000_VLVF_VLANID_MASK;
5136 reg |= vid;
5137 wr32(E1000_VLVF(i), reg);
5138
5139 /* do not modify RLPML for PF devices */
5140 if (vf >= adapter->vfs_allocated_count)
5141 return 0;
5142
5143 if (!adapter->vf_data[vf].vlans_enabled) {
5144 u32 size;
5145 reg = rd32(E1000_VMOLR(vf));
5146 size = reg & E1000_VMOLR_RLPML_MASK;
5147 size += 4;
5148 reg &= ~E1000_VMOLR_RLPML_MASK;
5149 reg |= size;
5150 wr32(E1000_VMOLR(vf), reg);
5151 }
5152
5153 adapter->vf_data[vf].vlans_enabled++;
5154 return 0;
5155 }
5156 } else {
5157 if (i < E1000_VLVF_ARRAY_SIZE) {
5158 /* remove vf from the pool */
5159 reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5160 /* if pool is empty then remove entry from vfta */
5161 if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5162 reg = 0;
5163 igb_vfta_set(hw, vid, false);
5164 }
5165 wr32(E1000_VLVF(i), reg);
5166
5167 /* do not modify RLPML for PF devices */
5168 if (vf >= adapter->vfs_allocated_count)
5169 return 0;
5170
5171 adapter->vf_data[vf].vlans_enabled--;
5172 if (!adapter->vf_data[vf].vlans_enabled) {
5173 u32 size;
5174 reg = rd32(E1000_VMOLR(vf));
5175 size = reg & E1000_VMOLR_RLPML_MASK;
5176 size -= 4;
5177 reg &= ~E1000_VMOLR_RLPML_MASK;
5178 reg |= size;
5179 wr32(E1000_VMOLR(vf), reg);
5180 }
5181 }
5182 }
5183 return 0;
5184 }
5185
5186 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5187 {
5188 struct e1000_hw *hw = &adapter->hw;
5189
5190 if (vid)
5191 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5192 else
5193 wr32(E1000_VMVIR(vf), 0);
5194 }
5195
5196 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5197 int vf, u16 vlan, u8 qos)
5198 {
5199 int err = 0;
5200 struct igb_adapter *adapter = netdev_priv(netdev);
5201
5202 if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5203 return -EINVAL;
5204 if (vlan || qos) {
5205 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5206 if (err)
5207 goto out;
5208 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5209 igb_set_vmolr(adapter, vf, !vlan);
5210 adapter->vf_data[vf].pf_vlan = vlan;
5211 adapter->vf_data[vf].pf_qos = qos;
5212 dev_info(&adapter->pdev->dev,
5213 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5214 if (test_bit(__IGB_DOWN, &adapter->state)) {
5215 dev_warn(&adapter->pdev->dev,
5216 "The VF VLAN has been set,"
5217 " but the PF device is not up.\n");
5218 dev_warn(&adapter->pdev->dev,
5219 "Bring the PF device up before"
5220 " attempting to use the VF device.\n");
5221 }
5222 } else {
5223 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5224 false, vf);
5225 igb_set_vmvir(adapter, vlan, vf);
5226 igb_set_vmolr(adapter, vf, true);
5227 adapter->vf_data[vf].pf_vlan = 0;
5228 adapter->vf_data[vf].pf_qos = 0;
5229 }
5230 out:
5231 return err;
5232 }
5233
5234 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5235 {
5236 int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5237 int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5238
5239 return igb_vlvf_set(adapter, vid, add, vf);
5240 }
5241
5242 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5243 {
5244 /* clear flags - except flag that indicates PF has set the MAC */
5245 adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5246 adapter->vf_data[vf].last_nack = jiffies;
5247
5248 /* reset offloads to defaults */
5249 igb_set_vmolr(adapter, vf, true);
5250
5251 /* reset vlans for device */
5252 igb_clear_vf_vfta(adapter, vf);
5253 if (adapter->vf_data[vf].pf_vlan)
5254 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5255 adapter->vf_data[vf].pf_vlan,
5256 adapter->vf_data[vf].pf_qos);
5257 else
5258 igb_clear_vf_vfta(adapter, vf);
5259
5260 /* reset multicast table array for vf */
5261 adapter->vf_data[vf].num_vf_mc_hashes = 0;
5262
5263 /* Flush and reset the mta with the new values */
5264 igb_set_rx_mode(adapter->netdev);
5265 }
5266
5267 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5268 {
5269 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5270
5271 /* generate a new mac address as we were hotplug removed/added */
5272 if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5273 random_ether_addr(vf_mac);
5274
5275 /* process remaining reset events */
5276 igb_vf_reset(adapter, vf);
5277 }
5278
5279 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5280 {
5281 struct e1000_hw *hw = &adapter->hw;
5282 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5283 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5284 u32 reg, msgbuf[3];
5285 u8 *addr = (u8 *)(&msgbuf[1]);
5286
5287 /* process all the same items cleared in a function level reset */
5288 igb_vf_reset(adapter, vf);
5289
5290 /* set vf mac address */
5291 igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5292
5293 /* enable transmit and receive for vf */
5294 reg = rd32(E1000_VFTE);
5295 wr32(E1000_VFTE, reg | (1 << vf));
5296 reg = rd32(E1000_VFRE);
5297 wr32(E1000_VFRE, reg | (1 << vf));
5298
5299 adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5300
5301 /* reply to reset with ack and vf mac address */
5302 msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5303 memcpy(addr, vf_mac, 6);
5304 igb_write_mbx(hw, msgbuf, 3, vf);
5305 }
5306
5307 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5308 {
5309 /*
5310 * The VF MAC Address is stored in a packed array of bytes
5311 * starting at the second 32 bit word of the msg array
5312 */
5313 unsigned char *addr = (char *)&msg[1];
5314 int err = -1;
5315
5316 if (is_valid_ether_addr(addr))
5317 err = igb_set_vf_mac(adapter, vf, addr);
5318
5319 return err;
5320 }
5321
5322 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5323 {
5324 struct e1000_hw *hw = &adapter->hw;
5325 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5326 u32 msg = E1000_VT_MSGTYPE_NACK;
5327
5328 /* if device isn't clear to send it shouldn't be reading either */
5329 if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5330 time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5331 igb_write_mbx(hw, &msg, 1, vf);
5332 vf_data->last_nack = jiffies;
5333 }
5334 }
5335
5336 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5337 {
5338 struct pci_dev *pdev = adapter->pdev;
5339 u32 msgbuf[E1000_VFMAILBOX_SIZE];
5340 struct e1000_hw *hw = &adapter->hw;
5341 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5342 s32 retval;
5343
5344 retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5345
5346 if (retval) {
5347 /* if receive failed revoke VF CTS stats and restart init */
5348 dev_err(&pdev->dev, "Error receiving message from VF\n");
5349 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5350 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5351 return;
5352 goto out;
5353 }
5354
5355 /* this is a message we already processed, do nothing */
5356 if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5357 return;
5358
5359 /*
5360 * until the vf completes a reset it should not be
5361 * allowed to start any configuration.
5362 */
5363
5364 if (msgbuf[0] == E1000_VF_RESET) {
5365 igb_vf_reset_msg(adapter, vf);
5366 return;
5367 }
5368
5369 if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5370 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5371 return;
5372 retval = -1;
5373 goto out;
5374 }
5375
5376 switch ((msgbuf[0] & 0xFFFF)) {
5377 case E1000_VF_SET_MAC_ADDR:
5378 retval = -EINVAL;
5379 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5380 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5381 else
5382 dev_warn(&pdev->dev,
5383 "VF %d attempted to override administratively "
5384 "set MAC address\nReload the VF driver to "
5385 "resume operations\n", vf);
5386 break;
5387 case E1000_VF_SET_PROMISC:
5388 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5389 break;
5390 case E1000_VF_SET_MULTICAST:
5391 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5392 break;
5393 case E1000_VF_SET_LPE:
5394 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5395 break;
5396 case E1000_VF_SET_VLAN:
5397 retval = -1;
5398 if (vf_data->pf_vlan)
5399 dev_warn(&pdev->dev,
5400 "VF %d attempted to override administratively "
5401 "set VLAN tag\nReload the VF driver to "
5402 "resume operations\n", vf);
5403 else
5404 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5405 break;
5406 default:
5407 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5408 retval = -1;
5409 break;
5410 }
5411
5412 msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5413 out:
5414 /* notify the VF of the results of what it sent us */
5415 if (retval)
5416 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5417 else
5418 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5419
5420 igb_write_mbx(hw, msgbuf, 1, vf);
5421 }
5422
5423 static void igb_msg_task(struct igb_adapter *adapter)
5424 {
5425 struct e1000_hw *hw = &adapter->hw;
5426 u32 vf;
5427
5428 for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5429 /* process any reset requests */
5430 if (!igb_check_for_rst(hw, vf))
5431 igb_vf_reset_event(adapter, vf);
5432
5433 /* process any messages pending */
5434 if (!igb_check_for_msg(hw, vf))
5435 igb_rcv_msg_from_vf(adapter, vf);
5436
5437 /* process any acks */
5438 if (!igb_check_for_ack(hw, vf))
5439 igb_rcv_ack_from_vf(adapter, vf);
5440 }
5441 }
5442
5443 /**
5444 * igb_set_uta - Set unicast filter table address
5445 * @adapter: board private structure
5446 *
5447 * The unicast table address is a register array of 32-bit registers.
5448 * The table is meant to be used in a way similar to how the MTA is used
5449 * however due to certain limitations in the hardware it is necessary to
5450 * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5451 * enable bit to allow vlan tag stripping when promiscuous mode is enabled
5452 **/
5453 static void igb_set_uta(struct igb_adapter *adapter)
5454 {
5455 struct e1000_hw *hw = &adapter->hw;
5456 int i;
5457
5458 /* The UTA table only exists on 82576 hardware and newer */
5459 if (hw->mac.type < e1000_82576)
5460 return;
5461
5462 /* we only need to do this if VMDq is enabled */
5463 if (!adapter->vfs_allocated_count)
5464 return;
5465
5466 for (i = 0; i < hw->mac.uta_reg_count; i++)
5467 array_wr32(E1000_UTA, i, ~0);
5468 }
5469
5470 /**
5471 * igb_intr_msi - Interrupt Handler
5472 * @irq: interrupt number
5473 * @data: pointer to a network interface device structure
5474 **/
5475 static irqreturn_t igb_intr_msi(int irq, void *data)
5476 {
5477 struct igb_adapter *adapter = data;
5478 struct igb_q_vector *q_vector = adapter->q_vector[0];
5479 struct e1000_hw *hw = &adapter->hw;
5480 /* read ICR disables interrupts using IAM */
5481 u32 icr = rd32(E1000_ICR);
5482
5483 igb_write_itr(q_vector);
5484
5485 if (icr & E1000_ICR_DRSTA)
5486 schedule_work(&adapter->reset_task);
5487
5488 if (icr & E1000_ICR_DOUTSYNC) {
5489 /* HW is reporting DMA is out of sync */
5490 adapter->stats.doosync++;
5491 }
5492
5493 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5494 hw->mac.get_link_status = 1;
5495 if (!test_bit(__IGB_DOWN, &adapter->state))
5496 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5497 }
5498
5499 napi_schedule(&q_vector->napi);
5500
5501 return IRQ_HANDLED;
5502 }
5503
5504 /**
5505 * igb_intr - Legacy Interrupt Handler
5506 * @irq: interrupt number
5507 * @data: pointer to a network interface device structure
5508 **/
5509 static irqreturn_t igb_intr(int irq, void *data)
5510 {
5511 struct igb_adapter *adapter = data;
5512 struct igb_q_vector *q_vector = adapter->q_vector[0];
5513 struct e1000_hw *hw = &adapter->hw;
5514 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No
5515 * need for the IMC write */
5516 u32 icr = rd32(E1000_ICR);
5517 if (!icr)
5518 return IRQ_NONE; /* Not our interrupt */
5519
5520 igb_write_itr(q_vector);
5521
5522 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5523 * not set, then the adapter didn't send an interrupt */
5524 if (!(icr & E1000_ICR_INT_ASSERTED))
5525 return IRQ_NONE;
5526
5527 if (icr & E1000_ICR_DRSTA)
5528 schedule_work(&adapter->reset_task);
5529
5530 if (icr & E1000_ICR_DOUTSYNC) {
5531 /* HW is reporting DMA is out of sync */
5532 adapter->stats.doosync++;
5533 }
5534
5535 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5536 hw->mac.get_link_status = 1;
5537 /* guard against interrupt when we're going down */
5538 if (!test_bit(__IGB_DOWN, &adapter->state))
5539 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5540 }
5541
5542 napi_schedule(&q_vector->napi);
5543
5544 return IRQ_HANDLED;
5545 }
5546
5547 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5548 {
5549 struct igb_adapter *adapter = q_vector->adapter;
5550 struct e1000_hw *hw = &adapter->hw;
5551
5552 if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
5553 (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
5554 if (!adapter->msix_entries)
5555 igb_set_itr(adapter);
5556 else
5557 igb_update_ring_itr(q_vector);
5558 }
5559
5560 if (!test_bit(__IGB_DOWN, &adapter->state)) {
5561 if (adapter->msix_entries)
5562 wr32(E1000_EIMS, q_vector->eims_value);
5563 else
5564 igb_irq_enable(adapter);
5565 }
5566 }
5567
5568 /**
5569 * igb_poll - NAPI Rx polling callback
5570 * @napi: napi polling structure
5571 * @budget: count of how many packets we should handle
5572 **/
5573 static int igb_poll(struct napi_struct *napi, int budget)
5574 {
5575 struct igb_q_vector *q_vector = container_of(napi,
5576 struct igb_q_vector,
5577 napi);
5578 bool clean_complete = true;
5579
5580 #ifdef CONFIG_IGB_DCA
5581 if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5582 igb_update_dca(q_vector);
5583 #endif
5584 if (q_vector->tx_ring)
5585 clean_complete = igb_clean_tx_irq(q_vector);
5586
5587 if (q_vector->rx_ring)
5588 clean_complete &= igb_clean_rx_irq(q_vector, budget);
5589
5590 /* If all work not completed, return budget and keep polling */
5591 if (!clean_complete)
5592 return budget;
5593
5594 /* If not enough Rx work done, exit the polling mode */
5595 napi_complete(napi);
5596 igb_ring_irq_enable(q_vector);
5597
5598 return 0;
5599 }
5600
5601 /**
5602 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5603 * @adapter: board private structure
5604 * @shhwtstamps: timestamp structure to update
5605 * @regval: unsigned 64bit system time value.
5606 *
5607 * We need to convert the system time value stored in the RX/TXSTMP registers
5608 * into a hwtstamp which can be used by the upper level timestamping functions
5609 */
5610 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5611 struct skb_shared_hwtstamps *shhwtstamps,
5612 u64 regval)
5613 {
5614 u64 ns;
5615
5616 /*
5617 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5618 * 24 to match clock shift we setup earlier.
5619 */
5620 if (adapter->hw.mac.type == e1000_82580)
5621 regval <<= IGB_82580_TSYNC_SHIFT;
5622
5623 ns = timecounter_cyc2time(&adapter->clock, regval);
5624 timecompare_update(&adapter->compare, ns);
5625 memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5626 shhwtstamps->hwtstamp = ns_to_ktime(ns);
5627 shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5628 }
5629
5630 /**
5631 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5632 * @q_vector: pointer to q_vector containing needed info
5633 * @buffer: pointer to igb_tx_buffer structure
5634 *
5635 * If we were asked to do hardware stamping and such a time stamp is
5636 * available, then it must have been for this skb here because we only
5637 * allow only one such packet into the queue.
5638 */
5639 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector,
5640 struct igb_tx_buffer *buffer_info)
5641 {
5642 struct igb_adapter *adapter = q_vector->adapter;
5643 struct e1000_hw *hw = &adapter->hw;
5644 struct skb_shared_hwtstamps shhwtstamps;
5645 u64 regval;
5646
5647 /* if skb does not support hw timestamp or TX stamp not valid exit */
5648 if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) ||
5649 !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5650 return;
5651
5652 regval = rd32(E1000_TXSTMPL);
5653 regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5654
5655 igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5656 skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5657 }
5658
5659 /**
5660 * igb_clean_tx_irq - Reclaim resources after transmit completes
5661 * @q_vector: pointer to q_vector containing needed info
5662 * returns true if ring is completely cleaned
5663 **/
5664 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5665 {
5666 struct igb_adapter *adapter = q_vector->adapter;
5667 struct igb_ring *tx_ring = q_vector->tx_ring;
5668 struct igb_tx_buffer *tx_buffer;
5669 union e1000_adv_tx_desc *tx_desc, *eop_desc;
5670 unsigned int total_bytes = 0, total_packets = 0;
5671 unsigned int budget = q_vector->tx_work_limit;
5672 unsigned int i = tx_ring->next_to_clean;
5673
5674 if (test_bit(__IGB_DOWN, &adapter->state))
5675 return true;
5676
5677 tx_buffer = &tx_ring->tx_buffer_info[i];
5678 tx_desc = IGB_TX_DESC(tx_ring, i);
5679 i -= tx_ring->count;
5680
5681 for (; budget; budget--) {
5682 eop_desc = tx_buffer->next_to_watch;
5683
5684 /* prevent any other reads prior to eop_desc */
5685 rmb();
5686
5687 /* if next_to_watch is not set then there is no work pending */
5688 if (!eop_desc)
5689 break;
5690
5691 /* if DD is not set pending work has not been completed */
5692 if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
5693 break;
5694
5695 /* clear next_to_watch to prevent false hangs */
5696 tx_buffer->next_to_watch = NULL;
5697
5698 /* update the statistics for this packet */
5699 total_bytes += tx_buffer->bytecount;
5700 total_packets += tx_buffer->gso_segs;
5701
5702 /* retrieve hardware timestamp */
5703 igb_tx_hwtstamp(q_vector, tx_buffer);
5704
5705 /* free the skb */
5706 dev_kfree_skb_any(tx_buffer->skb);
5707 tx_buffer->skb = NULL;
5708
5709 /* unmap skb header data */
5710 dma_unmap_single(tx_ring->dev,
5711 tx_buffer->dma,
5712 tx_buffer->length,
5713 DMA_TO_DEVICE);
5714
5715 /* clear last DMA location and unmap remaining buffers */
5716 while (tx_desc != eop_desc) {
5717 tx_buffer->dma = 0;
5718
5719 tx_buffer++;
5720 tx_desc++;
5721 i++;
5722 if (unlikely(!i)) {
5723 i -= tx_ring->count;
5724 tx_buffer = tx_ring->tx_buffer_info;
5725 tx_desc = IGB_TX_DESC(tx_ring, 0);
5726 }
5727
5728 /* unmap any remaining paged data */
5729 if (tx_buffer->dma) {
5730 dma_unmap_page(tx_ring->dev,
5731 tx_buffer->dma,
5732 tx_buffer->length,
5733 DMA_TO_DEVICE);
5734 }
5735 }
5736
5737 /* clear last DMA location */
5738 tx_buffer->dma = 0;
5739
5740 /* move us one more past the eop_desc for start of next pkt */
5741 tx_buffer++;
5742 tx_desc++;
5743 i++;
5744 if (unlikely(!i)) {
5745 i -= tx_ring->count;
5746 tx_buffer = tx_ring->tx_buffer_info;
5747 tx_desc = IGB_TX_DESC(tx_ring, 0);
5748 }
5749 }
5750
5751 i += tx_ring->count;
5752 tx_ring->next_to_clean = i;
5753 u64_stats_update_begin(&tx_ring->tx_syncp);
5754 tx_ring->tx_stats.bytes += total_bytes;
5755 tx_ring->tx_stats.packets += total_packets;
5756 u64_stats_update_end(&tx_ring->tx_syncp);
5757 tx_ring->total_bytes += total_bytes;
5758 tx_ring->total_packets += total_packets;
5759
5760 if (tx_ring->detect_tx_hung) {
5761 struct e1000_hw *hw = &adapter->hw;
5762
5763 eop_desc = tx_buffer->next_to_watch;
5764
5765 /* Detect a transmit hang in hardware, this serializes the
5766 * check with the clearing of time_stamp and movement of i */
5767 tx_ring->detect_tx_hung = false;
5768 if (eop_desc &&
5769 time_after(jiffies, tx_buffer->time_stamp +
5770 (adapter->tx_timeout_factor * HZ)) &&
5771 !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5772
5773 /* detected Tx unit hang */
5774 dev_err(tx_ring->dev,
5775 "Detected Tx Unit Hang\n"
5776 " Tx Queue <%d>\n"
5777 " TDH <%x>\n"
5778 " TDT <%x>\n"
5779 " next_to_use <%x>\n"
5780 " next_to_clean <%x>\n"
5781 "buffer_info[next_to_clean]\n"
5782 " time_stamp <%lx>\n"
5783 " next_to_watch <%p>\n"
5784 " jiffies <%lx>\n"
5785 " desc.status <%x>\n",
5786 tx_ring->queue_index,
5787 rd32(E1000_TDH(tx_ring->reg_idx)),
5788 readl(tx_ring->tail),
5789 tx_ring->next_to_use,
5790 tx_ring->next_to_clean,
5791 tx_buffer->time_stamp,
5792 eop_desc,
5793 jiffies,
5794 eop_desc->wb.status);
5795 netif_stop_subqueue(tx_ring->netdev,
5796 tx_ring->queue_index);
5797
5798 /* we are about to reset, no point in enabling stuff */
5799 return true;
5800 }
5801 }
5802
5803 if (unlikely(total_packets &&
5804 netif_carrier_ok(tx_ring->netdev) &&
5805 igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5806 /* Make sure that anybody stopping the queue after this
5807 * sees the new next_to_clean.
5808 */
5809 smp_mb();
5810 if (__netif_subqueue_stopped(tx_ring->netdev,
5811 tx_ring->queue_index) &&
5812 !(test_bit(__IGB_DOWN, &adapter->state))) {
5813 netif_wake_subqueue(tx_ring->netdev,
5814 tx_ring->queue_index);
5815
5816 u64_stats_update_begin(&tx_ring->tx_syncp);
5817 tx_ring->tx_stats.restart_queue++;
5818 u64_stats_update_end(&tx_ring->tx_syncp);
5819 }
5820 }
5821
5822 return !!budget;
5823 }
5824
5825 static inline void igb_rx_checksum(struct igb_ring *ring,
5826 u32 status_err, struct sk_buff *skb)
5827 {
5828 skb_checksum_none_assert(skb);
5829
5830 /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5831 if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5832 (status_err & E1000_RXD_STAT_IXSM))
5833 return;
5834
5835 /* TCP/UDP checksum error bit is set */
5836 if (status_err &
5837 (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5838 /*
5839 * work around errata with sctp packets where the TCPE aka
5840 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5841 * packets, (aka let the stack check the crc32c)
5842 */
5843 if ((skb->len == 60) &&
5844 (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM)) {
5845 u64_stats_update_begin(&ring->rx_syncp);
5846 ring->rx_stats.csum_err++;
5847 u64_stats_update_end(&ring->rx_syncp);
5848 }
5849 /* let the stack verify checksum errors */
5850 return;
5851 }
5852 /* It must be a TCP or UDP packet with a valid checksum */
5853 if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5854 skb->ip_summed = CHECKSUM_UNNECESSARY;
5855
5856 dev_dbg(ring->dev, "cksum success: bits %08X\n", status_err);
5857 }
5858
5859 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5860 struct sk_buff *skb)
5861 {
5862 struct igb_adapter *adapter = q_vector->adapter;
5863 struct e1000_hw *hw = &adapter->hw;
5864 u64 regval;
5865
5866 /*
5867 * If this bit is set, then the RX registers contain the time stamp. No
5868 * other packet will be time stamped until we read these registers, so
5869 * read the registers to make them available again. Because only one
5870 * packet can be time stamped at a time, we know that the register
5871 * values must belong to this one here and therefore we don't need to
5872 * compare any of the additional attributes stored for it.
5873 *
5874 * If nothing went wrong, then it should have a shared tx_flags that we
5875 * can turn into a skb_shared_hwtstamps.
5876 */
5877 if (staterr & E1000_RXDADV_STAT_TSIP) {
5878 u32 *stamp = (u32 *)skb->data;
5879 regval = le32_to_cpu(*(stamp + 2));
5880 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5881 skb_pull(skb, IGB_TS_HDR_LEN);
5882 } else {
5883 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5884 return;
5885
5886 regval = rd32(E1000_RXSTMPL);
5887 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5888 }
5889
5890 igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5891 }
5892 static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
5893 {
5894 /* HW will not DMA in data larger than the given buffer, even if it
5895 * parses the (NFS, of course) header to be larger. In that case, it
5896 * fills the header buffer and spills the rest into the page.
5897 */
5898 u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5899 E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5900 if (hlen > IGB_RX_HDR_LEN)
5901 hlen = IGB_RX_HDR_LEN;
5902 return hlen;
5903 }
5904
5905 static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
5906 {
5907 struct igb_ring *rx_ring = q_vector->rx_ring;
5908 union e1000_adv_rx_desc *rx_desc;
5909 const int current_node = numa_node_id();
5910 unsigned int total_bytes = 0, total_packets = 0;
5911 u32 staterr;
5912 u16 cleaned_count = igb_desc_unused(rx_ring);
5913 u16 i = rx_ring->next_to_clean;
5914
5915 rx_desc = IGB_RX_DESC(rx_ring, i);
5916 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5917
5918 while (staterr & E1000_RXD_STAT_DD) {
5919 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
5920 struct sk_buff *skb = buffer_info->skb;
5921 union e1000_adv_rx_desc *next_rxd;
5922
5923 buffer_info->skb = NULL;
5924 prefetch(skb->data);
5925
5926 i++;
5927 if (i == rx_ring->count)
5928 i = 0;
5929
5930 next_rxd = IGB_RX_DESC(rx_ring, i);
5931 prefetch(next_rxd);
5932
5933 /*
5934 * This memory barrier is needed to keep us from reading
5935 * any other fields out of the rx_desc until we know the
5936 * RXD_STAT_DD bit is set
5937 */
5938 rmb();
5939
5940 if (!skb_is_nonlinear(skb)) {
5941 __skb_put(skb, igb_get_hlen(rx_desc));
5942 dma_unmap_single(rx_ring->dev, buffer_info->dma,
5943 IGB_RX_HDR_LEN,
5944 DMA_FROM_DEVICE);
5945 buffer_info->dma = 0;
5946 }
5947
5948 if (rx_desc->wb.upper.length) {
5949 u16 length = le16_to_cpu(rx_desc->wb.upper.length);
5950
5951 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5952 buffer_info->page,
5953 buffer_info->page_offset,
5954 length);
5955
5956 skb->len += length;
5957 skb->data_len += length;
5958 skb->truesize += length;
5959
5960 if ((page_count(buffer_info->page) != 1) ||
5961 (page_to_nid(buffer_info->page) != current_node))
5962 buffer_info->page = NULL;
5963 else
5964 get_page(buffer_info->page);
5965
5966 dma_unmap_page(rx_ring->dev, buffer_info->page_dma,
5967 PAGE_SIZE / 2, DMA_FROM_DEVICE);
5968 buffer_info->page_dma = 0;
5969 }
5970
5971 if (!(staterr & E1000_RXD_STAT_EOP)) {
5972 struct igb_rx_buffer *next_buffer;
5973 next_buffer = &rx_ring->rx_buffer_info[i];
5974 buffer_info->skb = next_buffer->skb;
5975 buffer_info->dma = next_buffer->dma;
5976 next_buffer->skb = skb;
5977 next_buffer->dma = 0;
5978 goto next_desc;
5979 }
5980
5981 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5982 dev_kfree_skb_any(skb);
5983 goto next_desc;
5984 }
5985
5986 if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5987 igb_rx_hwtstamp(q_vector, staterr, skb);
5988 total_bytes += skb->len;
5989 total_packets++;
5990
5991 igb_rx_checksum(rx_ring, staterr, skb);
5992
5993 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
5994
5995 if (staterr & E1000_RXD_STAT_VP) {
5996 u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan);
5997
5998 __vlan_hwaccel_put_tag(skb, vid);
5999 }
6000 napi_gro_receive(&q_vector->napi, skb);
6001
6002 budget--;
6003 next_desc:
6004 if (!budget)
6005 break;
6006
6007 cleaned_count++;
6008 /* return some buffers to hardware, one at a time is too slow */
6009 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
6010 igb_alloc_rx_buffers(rx_ring, cleaned_count);
6011 cleaned_count = 0;
6012 }
6013
6014 /* use prefetched values */
6015 rx_desc = next_rxd;
6016 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
6017 }
6018
6019 rx_ring->next_to_clean = i;
6020 u64_stats_update_begin(&rx_ring->rx_syncp);
6021 rx_ring->rx_stats.packets += total_packets;
6022 rx_ring->rx_stats.bytes += total_bytes;
6023 u64_stats_update_end(&rx_ring->rx_syncp);
6024 rx_ring->total_packets += total_packets;
6025 rx_ring->total_bytes += total_bytes;
6026
6027 if (cleaned_count)
6028 igb_alloc_rx_buffers(rx_ring, cleaned_count);
6029
6030 return !!budget;
6031 }
6032
6033 static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
6034 struct igb_rx_buffer *bi)
6035 {
6036 struct sk_buff *skb = bi->skb;
6037 dma_addr_t dma = bi->dma;
6038
6039 if (dma)
6040 return true;
6041
6042 if (likely(!skb)) {
6043 skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
6044 IGB_RX_HDR_LEN);
6045 bi->skb = skb;
6046 if (!skb) {
6047 rx_ring->rx_stats.alloc_failed++;
6048 return false;
6049 }
6050
6051 /* initialize skb for ring */
6052 skb_record_rx_queue(skb, rx_ring->queue_index);
6053 }
6054
6055 dma = dma_map_single(rx_ring->dev, skb->data,
6056 IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
6057
6058 if (dma_mapping_error(rx_ring->dev, dma)) {
6059 rx_ring->rx_stats.alloc_failed++;
6060 return false;
6061 }
6062
6063 bi->dma = dma;
6064 return true;
6065 }
6066
6067 static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
6068 struct igb_rx_buffer *bi)
6069 {
6070 struct page *page = bi->page;
6071 dma_addr_t page_dma = bi->page_dma;
6072 unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
6073
6074 if (page_dma)
6075 return true;
6076
6077 if (!page) {
6078 page = netdev_alloc_page(rx_ring->netdev);
6079 bi->page = page;
6080 if (unlikely(!page)) {
6081 rx_ring->rx_stats.alloc_failed++;
6082 return false;
6083 }
6084 }
6085
6086 page_dma = dma_map_page(rx_ring->dev, page,
6087 page_offset, PAGE_SIZE / 2,
6088 DMA_FROM_DEVICE);
6089
6090 if (dma_mapping_error(rx_ring->dev, page_dma)) {
6091 rx_ring->rx_stats.alloc_failed++;
6092 return false;
6093 }
6094
6095 bi->page_dma = page_dma;
6096 bi->page_offset = page_offset;
6097 return true;
6098 }
6099
6100 /**
6101 * igb_alloc_rx_buffers - Replace used receive buffers; packet split
6102 * @adapter: address of board private structure
6103 **/
6104 void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
6105 {
6106 union e1000_adv_rx_desc *rx_desc;
6107 struct igb_rx_buffer *bi;
6108 u16 i = rx_ring->next_to_use;
6109
6110 rx_desc = IGB_RX_DESC(rx_ring, i);
6111 bi = &rx_ring->rx_buffer_info[i];
6112 i -= rx_ring->count;
6113
6114 while (cleaned_count--) {
6115 if (!igb_alloc_mapped_skb(rx_ring, bi))
6116 break;
6117
6118 /* Refresh the desc even if buffer_addrs didn't change
6119 * because each write-back erases this info. */
6120 rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
6121
6122 if (!igb_alloc_mapped_page(rx_ring, bi))
6123 break;
6124
6125 rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
6126
6127 rx_desc++;
6128 bi++;
6129 i++;
6130 if (unlikely(!i)) {
6131 rx_desc = IGB_RX_DESC(rx_ring, 0);
6132 bi = rx_ring->rx_buffer_info;
6133 i -= rx_ring->count;
6134 }
6135
6136 /* clear the hdr_addr for the next_to_use descriptor */
6137 rx_desc->read.hdr_addr = 0;
6138 }
6139
6140 i += rx_ring->count;
6141
6142 if (rx_ring->next_to_use != i) {
6143 rx_ring->next_to_use = i;
6144
6145 /* Force memory writes to complete before letting h/w
6146 * know there are new descriptors to fetch. (Only
6147 * applicable for weak-ordered memory model archs,
6148 * such as IA-64). */
6149 wmb();
6150 writel(i, rx_ring->tail);
6151 }
6152 }
6153
6154 /**
6155 * igb_mii_ioctl -
6156 * @netdev:
6157 * @ifreq:
6158 * @cmd:
6159 **/
6160 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6161 {
6162 struct igb_adapter *adapter = netdev_priv(netdev);
6163 struct mii_ioctl_data *data = if_mii(ifr);
6164
6165 if (adapter->hw.phy.media_type != e1000_media_type_copper)
6166 return -EOPNOTSUPP;
6167
6168 switch (cmd) {
6169 case SIOCGMIIPHY:
6170 data->phy_id = adapter->hw.phy.addr;
6171 break;
6172 case SIOCGMIIREG:
6173 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6174 &data->val_out))
6175 return -EIO;
6176 break;
6177 case SIOCSMIIREG:
6178 default:
6179 return -EOPNOTSUPP;
6180 }
6181 return 0;
6182 }
6183
6184 /**
6185 * igb_hwtstamp_ioctl - control hardware time stamping
6186 * @netdev:
6187 * @ifreq:
6188 * @cmd:
6189 *
6190 * Outgoing time stamping can be enabled and disabled. Play nice and
6191 * disable it when requested, although it shouldn't case any overhead
6192 * when no packet needs it. At most one packet in the queue may be
6193 * marked for time stamping, otherwise it would be impossible to tell
6194 * for sure to which packet the hardware time stamp belongs.
6195 *
6196 * Incoming time stamping has to be configured via the hardware
6197 * filters. Not all combinations are supported, in particular event
6198 * type has to be specified. Matching the kind of event packet is
6199 * not supported, with the exception of "all V2 events regardless of
6200 * level 2 or 4".
6201 *
6202 **/
6203 static int igb_hwtstamp_ioctl(struct net_device *netdev,
6204 struct ifreq *ifr, int cmd)
6205 {
6206 struct igb_adapter *adapter = netdev_priv(netdev);
6207 struct e1000_hw *hw = &adapter->hw;
6208 struct hwtstamp_config config;
6209 u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6210 u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6211 u32 tsync_rx_cfg = 0;
6212 bool is_l4 = false;
6213 bool is_l2 = false;
6214 u32 regval;
6215
6216 if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6217 return -EFAULT;
6218
6219 /* reserved for future extensions */
6220 if (config.flags)
6221 return -EINVAL;
6222
6223 switch (config.tx_type) {
6224 case HWTSTAMP_TX_OFF:
6225 tsync_tx_ctl = 0;
6226 case HWTSTAMP_TX_ON:
6227 break;
6228 default:
6229 return -ERANGE;
6230 }
6231
6232 switch (config.rx_filter) {
6233 case HWTSTAMP_FILTER_NONE:
6234 tsync_rx_ctl = 0;
6235 break;
6236 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6237 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6238 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6239 case HWTSTAMP_FILTER_ALL:
6240 /*
6241 * register TSYNCRXCFG must be set, therefore it is not
6242 * possible to time stamp both Sync and Delay_Req messages
6243 * => fall back to time stamping all packets
6244 */
6245 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6246 config.rx_filter = HWTSTAMP_FILTER_ALL;
6247 break;
6248 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6249 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6250 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6251 is_l4 = true;
6252 break;
6253 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6254 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6255 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6256 is_l4 = true;
6257 break;
6258 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6259 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6260 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6261 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6262 is_l2 = true;
6263 is_l4 = true;
6264 config.rx_filter = HWTSTAMP_FILTER_SOME;
6265 break;
6266 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6267 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6268 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6269 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6270 is_l2 = true;
6271 is_l4 = true;
6272 config.rx_filter = HWTSTAMP_FILTER_SOME;
6273 break;
6274 case HWTSTAMP_FILTER_PTP_V2_EVENT:
6275 case HWTSTAMP_FILTER_PTP_V2_SYNC:
6276 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6277 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6278 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6279 is_l2 = true;
6280 break;
6281 default:
6282 return -ERANGE;
6283 }
6284
6285 if (hw->mac.type == e1000_82575) {
6286 if (tsync_rx_ctl | tsync_tx_ctl)
6287 return -EINVAL;
6288 return 0;
6289 }
6290
6291 /*
6292 * Per-packet timestamping only works if all packets are
6293 * timestamped, so enable timestamping in all packets as
6294 * long as one rx filter was configured.
6295 */
6296 if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
6297 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6298 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6299 }
6300
6301 /* enable/disable TX */
6302 regval = rd32(E1000_TSYNCTXCTL);
6303 regval &= ~E1000_TSYNCTXCTL_ENABLED;
6304 regval |= tsync_tx_ctl;
6305 wr32(E1000_TSYNCTXCTL, regval);
6306
6307 /* enable/disable RX */
6308 regval = rd32(E1000_TSYNCRXCTL);
6309 regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6310 regval |= tsync_rx_ctl;
6311 wr32(E1000_TSYNCRXCTL, regval);
6312
6313 /* define which PTP packets are time stamped */
6314 wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6315
6316 /* define ethertype filter for timestamped packets */
6317 if (is_l2)
6318 wr32(E1000_ETQF(3),
6319 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6320 E1000_ETQF_1588 | /* enable timestamping */
6321 ETH_P_1588)); /* 1588 eth protocol type */
6322 else
6323 wr32(E1000_ETQF(3), 0);
6324
6325 #define PTP_PORT 319
6326 /* L4 Queue Filter[3]: filter by destination port and protocol */
6327 if (is_l4) {
6328 u32 ftqf = (IPPROTO_UDP /* UDP */
6329 | E1000_FTQF_VF_BP /* VF not compared */
6330 | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6331 | E1000_FTQF_MASK); /* mask all inputs */
6332 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6333
6334 wr32(E1000_IMIR(3), htons(PTP_PORT));
6335 wr32(E1000_IMIREXT(3),
6336 (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6337 if (hw->mac.type == e1000_82576) {
6338 /* enable source port check */
6339 wr32(E1000_SPQF(3), htons(PTP_PORT));
6340 ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6341 }
6342 wr32(E1000_FTQF(3), ftqf);
6343 } else {
6344 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6345 }
6346 wrfl();
6347
6348 adapter->hwtstamp_config = config;
6349
6350 /* clear TX/RX time stamp registers, just to be sure */
6351 regval = rd32(E1000_TXSTMPH);
6352 regval = rd32(E1000_RXSTMPH);
6353
6354 return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6355 -EFAULT : 0;
6356 }
6357
6358 /**
6359 * igb_ioctl -
6360 * @netdev:
6361 * @ifreq:
6362 * @cmd:
6363 **/
6364 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6365 {
6366 switch (cmd) {
6367 case SIOCGMIIPHY:
6368 case SIOCGMIIREG:
6369 case SIOCSMIIREG:
6370 return igb_mii_ioctl(netdev, ifr, cmd);
6371 case SIOCSHWTSTAMP:
6372 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6373 default:
6374 return -EOPNOTSUPP;
6375 }
6376 }
6377
6378 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6379 {
6380 struct igb_adapter *adapter = hw->back;
6381 u16 cap_offset;
6382
6383 cap_offset = adapter->pdev->pcie_cap;
6384 if (!cap_offset)
6385 return -E1000_ERR_CONFIG;
6386
6387 pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6388
6389 return 0;
6390 }
6391
6392 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6393 {
6394 struct igb_adapter *adapter = hw->back;
6395 u16 cap_offset;
6396
6397 cap_offset = adapter->pdev->pcie_cap;
6398 if (!cap_offset)
6399 return -E1000_ERR_CONFIG;
6400
6401 pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6402
6403 return 0;
6404 }
6405
6406 static void igb_vlan_mode(struct net_device *netdev, u32 features)
6407 {
6408 struct igb_adapter *adapter = netdev_priv(netdev);
6409 struct e1000_hw *hw = &adapter->hw;
6410 u32 ctrl, rctl;
6411
6412 igb_irq_disable(adapter);
6413
6414 if (features & NETIF_F_HW_VLAN_RX) {
6415 /* enable VLAN tag insert/strip */
6416 ctrl = rd32(E1000_CTRL);
6417 ctrl |= E1000_CTRL_VME;
6418 wr32(E1000_CTRL, ctrl);
6419
6420 /* Disable CFI check */
6421 rctl = rd32(E1000_RCTL);
6422 rctl &= ~E1000_RCTL_CFIEN;
6423 wr32(E1000_RCTL, rctl);
6424 } else {
6425 /* disable VLAN tag insert/strip */
6426 ctrl = rd32(E1000_CTRL);
6427 ctrl &= ~E1000_CTRL_VME;
6428 wr32(E1000_CTRL, ctrl);
6429 }
6430
6431 igb_rlpml_set(adapter);
6432
6433 if (!test_bit(__IGB_DOWN, &adapter->state))
6434 igb_irq_enable(adapter);
6435 }
6436
6437 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6438 {
6439 struct igb_adapter *adapter = netdev_priv(netdev);
6440 struct e1000_hw *hw = &adapter->hw;
6441 int pf_id = adapter->vfs_allocated_count;
6442
6443 /* attempt to add filter to vlvf array */
6444 igb_vlvf_set(adapter, vid, true, pf_id);
6445
6446 /* add the filter since PF can receive vlans w/o entry in vlvf */
6447 igb_vfta_set(hw, vid, true);
6448
6449 set_bit(vid, adapter->active_vlans);
6450 }
6451
6452 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6453 {
6454 struct igb_adapter *adapter = netdev_priv(netdev);
6455 struct e1000_hw *hw = &adapter->hw;
6456 int pf_id = adapter->vfs_allocated_count;
6457 s32 err;
6458
6459 igb_irq_disable(adapter);
6460
6461 if (!test_bit(__IGB_DOWN, &adapter->state))
6462 igb_irq_enable(adapter);
6463
6464 /* remove vlan from VLVF table array */
6465 err = igb_vlvf_set(adapter, vid, false, pf_id);
6466
6467 /* if vid was not present in VLVF just remove it from table */
6468 if (err)
6469 igb_vfta_set(hw, vid, false);
6470
6471 clear_bit(vid, adapter->active_vlans);
6472 }
6473
6474 static void igb_restore_vlan(struct igb_adapter *adapter)
6475 {
6476 u16 vid;
6477
6478 for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6479 igb_vlan_rx_add_vid(adapter->netdev, vid);
6480 }
6481
6482 int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6483 {
6484 struct pci_dev *pdev = adapter->pdev;
6485 struct e1000_mac_info *mac = &adapter->hw.mac;
6486
6487 mac->autoneg = 0;
6488
6489 /* Make sure dplx is at most 1 bit and lsb of speed is not set
6490 * for the switch() below to work */
6491 if ((spd & 1) || (dplx & ~1))
6492 goto err_inval;
6493
6494 /* Fiber NIC's only allow 1000 Gbps Full duplex */
6495 if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6496 spd != SPEED_1000 &&
6497 dplx != DUPLEX_FULL)
6498 goto err_inval;
6499
6500 switch (spd + dplx) {
6501 case SPEED_10 + DUPLEX_HALF:
6502 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6503 break;
6504 case SPEED_10 + DUPLEX_FULL:
6505 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6506 break;
6507 case SPEED_100 + DUPLEX_HALF:
6508 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6509 break;
6510 case SPEED_100 + DUPLEX_FULL:
6511 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6512 break;
6513 case SPEED_1000 + DUPLEX_FULL:
6514 mac->autoneg = 1;
6515 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6516 break;
6517 case SPEED_1000 + DUPLEX_HALF: /* not supported */
6518 default:
6519 goto err_inval;
6520 }
6521 return 0;
6522
6523 err_inval:
6524 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6525 return -EINVAL;
6526 }
6527
6528 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6529 {
6530 struct net_device *netdev = pci_get_drvdata(pdev);
6531 struct igb_adapter *adapter = netdev_priv(netdev);
6532 struct e1000_hw *hw = &adapter->hw;
6533 u32 ctrl, rctl, status;
6534 u32 wufc = adapter->wol;
6535 #ifdef CONFIG_PM
6536 int retval = 0;
6537 #endif
6538
6539 netif_device_detach(netdev);
6540
6541 if (netif_running(netdev))
6542 igb_close(netdev);
6543
6544 igb_clear_interrupt_scheme(adapter);
6545
6546 #ifdef CONFIG_PM
6547 retval = pci_save_state(pdev);
6548 if (retval)
6549 return retval;
6550 #endif
6551
6552 status = rd32(E1000_STATUS);
6553 if (status & E1000_STATUS_LU)
6554 wufc &= ~E1000_WUFC_LNKC;
6555
6556 if (wufc) {
6557 igb_setup_rctl(adapter);
6558 igb_set_rx_mode(netdev);
6559
6560 /* turn on all-multi mode if wake on multicast is enabled */
6561 if (wufc & E1000_WUFC_MC) {
6562 rctl = rd32(E1000_RCTL);
6563 rctl |= E1000_RCTL_MPE;
6564 wr32(E1000_RCTL, rctl);
6565 }
6566
6567 ctrl = rd32(E1000_CTRL);
6568 /* advertise wake from D3Cold */
6569 #define E1000_CTRL_ADVD3WUC 0x00100000
6570 /* phy power management enable */
6571 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6572 ctrl |= E1000_CTRL_ADVD3WUC;
6573 wr32(E1000_CTRL, ctrl);
6574
6575 /* Allow time for pending master requests to run */
6576 igb_disable_pcie_master(hw);
6577
6578 wr32(E1000_WUC, E1000_WUC_PME_EN);
6579 wr32(E1000_WUFC, wufc);
6580 } else {
6581 wr32(E1000_WUC, 0);
6582 wr32(E1000_WUFC, 0);
6583 }
6584
6585 *enable_wake = wufc || adapter->en_mng_pt;
6586 if (!*enable_wake)
6587 igb_power_down_link(adapter);
6588 else
6589 igb_power_up_link(adapter);
6590
6591 /* Release control of h/w to f/w. If f/w is AMT enabled, this
6592 * would have already happened in close and is redundant. */
6593 igb_release_hw_control(adapter);
6594
6595 pci_disable_device(pdev);
6596
6597 return 0;
6598 }
6599
6600 #ifdef CONFIG_PM
6601 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6602 {
6603 int retval;
6604 bool wake;
6605
6606 retval = __igb_shutdown(pdev, &wake);
6607 if (retval)
6608 return retval;
6609
6610 if (wake) {
6611 pci_prepare_to_sleep(pdev);
6612 } else {
6613 pci_wake_from_d3(pdev, false);
6614 pci_set_power_state(pdev, PCI_D3hot);
6615 }
6616
6617 return 0;
6618 }
6619
6620 static int igb_resume(struct pci_dev *pdev)
6621 {
6622 struct net_device *netdev = pci_get_drvdata(pdev);
6623 struct igb_adapter *adapter = netdev_priv(netdev);
6624 struct e1000_hw *hw = &adapter->hw;
6625 u32 err;
6626
6627 pci_set_power_state(pdev, PCI_D0);
6628 pci_restore_state(pdev);
6629 pci_save_state(pdev);
6630
6631 err = pci_enable_device_mem(pdev);
6632 if (err) {
6633 dev_err(&pdev->dev,
6634 "igb: Cannot enable PCI device from suspend\n");
6635 return err;
6636 }
6637 pci_set_master(pdev);
6638
6639 pci_enable_wake(pdev, PCI_D3hot, 0);
6640 pci_enable_wake(pdev, PCI_D3cold, 0);
6641
6642 if (igb_init_interrupt_scheme(adapter)) {
6643 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6644 return -ENOMEM;
6645 }
6646
6647 igb_reset(adapter);
6648
6649 /* let the f/w know that the h/w is now under the control of the
6650 * driver. */
6651 igb_get_hw_control(adapter);
6652
6653 wr32(E1000_WUS, ~0);
6654
6655 if (netif_running(netdev)) {
6656 err = igb_open(netdev);
6657 if (err)
6658 return err;
6659 }
6660
6661 netif_device_attach(netdev);
6662
6663 return 0;
6664 }
6665 #endif
6666
6667 static void igb_shutdown(struct pci_dev *pdev)
6668 {
6669 bool wake;
6670
6671 __igb_shutdown(pdev, &wake);
6672
6673 if (system_state == SYSTEM_POWER_OFF) {
6674 pci_wake_from_d3(pdev, wake);
6675 pci_set_power_state(pdev, PCI_D3hot);
6676 }
6677 }
6678
6679 #ifdef CONFIG_NET_POLL_CONTROLLER
6680 /*
6681 * Polling 'interrupt' - used by things like netconsole to send skbs
6682 * without having to re-enable interrupts. It's not called while
6683 * the interrupt routine is executing.
6684 */
6685 static void igb_netpoll(struct net_device *netdev)
6686 {
6687 struct igb_adapter *adapter = netdev_priv(netdev);
6688 struct e1000_hw *hw = &adapter->hw;
6689 int i;
6690
6691 if (!adapter->msix_entries) {
6692 struct igb_q_vector *q_vector = adapter->q_vector[0];
6693 igb_irq_disable(adapter);
6694 napi_schedule(&q_vector->napi);
6695 return;
6696 }
6697
6698 for (i = 0; i < adapter->num_q_vectors; i++) {
6699 struct igb_q_vector *q_vector = adapter->q_vector[i];
6700 wr32(E1000_EIMC, q_vector->eims_value);
6701 napi_schedule(&q_vector->napi);
6702 }
6703 }
6704 #endif /* CONFIG_NET_POLL_CONTROLLER */
6705
6706 /**
6707 * igb_io_error_detected - called when PCI error is detected
6708 * @pdev: Pointer to PCI device
6709 * @state: The current pci connection state
6710 *
6711 * This function is called after a PCI bus error affecting
6712 * this device has been detected.
6713 */
6714 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6715 pci_channel_state_t state)
6716 {
6717 struct net_device *netdev = pci_get_drvdata(pdev);
6718 struct igb_adapter *adapter = netdev_priv(netdev);
6719
6720 netif_device_detach(netdev);
6721
6722 if (state == pci_channel_io_perm_failure)
6723 return PCI_ERS_RESULT_DISCONNECT;
6724
6725 if (netif_running(netdev))
6726 igb_down(adapter);
6727 pci_disable_device(pdev);
6728
6729 /* Request a slot slot reset. */
6730 return PCI_ERS_RESULT_NEED_RESET;
6731 }
6732
6733 /**
6734 * igb_io_slot_reset - called after the pci bus has been reset.
6735 * @pdev: Pointer to PCI device
6736 *
6737 * Restart the card from scratch, as if from a cold-boot. Implementation
6738 * resembles the first-half of the igb_resume routine.
6739 */
6740 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6741 {
6742 struct net_device *netdev = pci_get_drvdata(pdev);
6743 struct igb_adapter *adapter = netdev_priv(netdev);
6744 struct e1000_hw *hw = &adapter->hw;
6745 pci_ers_result_t result;
6746 int err;
6747
6748 if (pci_enable_device_mem(pdev)) {
6749 dev_err(&pdev->dev,
6750 "Cannot re-enable PCI device after reset.\n");
6751 result = PCI_ERS_RESULT_DISCONNECT;
6752 } else {
6753 pci_set_master(pdev);
6754 pci_restore_state(pdev);
6755 pci_save_state(pdev);
6756
6757 pci_enable_wake(pdev, PCI_D3hot, 0);
6758 pci_enable_wake(pdev, PCI_D3cold, 0);
6759
6760 igb_reset(adapter);
6761 wr32(E1000_WUS, ~0);
6762 result = PCI_ERS_RESULT_RECOVERED;
6763 }
6764
6765 err = pci_cleanup_aer_uncorrect_error_status(pdev);
6766 if (err) {
6767 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6768 "failed 0x%0x\n", err);
6769 /* non-fatal, continue */
6770 }
6771
6772 return result;
6773 }
6774
6775 /**
6776 * igb_io_resume - called when traffic can start flowing again.
6777 * @pdev: Pointer to PCI device
6778 *
6779 * This callback is called when the error recovery driver tells us that
6780 * its OK to resume normal operation. Implementation resembles the
6781 * second-half of the igb_resume routine.
6782 */
6783 static void igb_io_resume(struct pci_dev *pdev)
6784 {
6785 struct net_device *netdev = pci_get_drvdata(pdev);
6786 struct igb_adapter *adapter = netdev_priv(netdev);
6787
6788 if (netif_running(netdev)) {
6789 if (igb_up(adapter)) {
6790 dev_err(&pdev->dev, "igb_up failed after reset\n");
6791 return;
6792 }
6793 }
6794
6795 netif_device_attach(netdev);
6796
6797 /* let the f/w know that the h/w is now under the control of the
6798 * driver. */
6799 igb_get_hw_control(adapter);
6800 }
6801
6802 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6803 u8 qsel)
6804 {
6805 u32 rar_low, rar_high;
6806 struct e1000_hw *hw = &adapter->hw;
6807
6808 /* HW expects these in little endian so we reverse the byte order
6809 * from network order (big endian) to little endian
6810 */
6811 rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6812 ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6813 rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6814
6815 /* Indicate to hardware the Address is Valid. */
6816 rar_high |= E1000_RAH_AV;
6817
6818 if (hw->mac.type == e1000_82575)
6819 rar_high |= E1000_RAH_POOL_1 * qsel;
6820 else
6821 rar_high |= E1000_RAH_POOL_1 << qsel;
6822
6823 wr32(E1000_RAL(index), rar_low);
6824 wrfl();
6825 wr32(E1000_RAH(index), rar_high);
6826 wrfl();
6827 }
6828
6829 static int igb_set_vf_mac(struct igb_adapter *adapter,
6830 int vf, unsigned char *mac_addr)
6831 {
6832 struct e1000_hw *hw = &adapter->hw;
6833 /* VF MAC addresses start at end of receive addresses and moves
6834 * torwards the first, as a result a collision should not be possible */
6835 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6836
6837 memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6838
6839 igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6840
6841 return 0;
6842 }
6843
6844 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6845 {
6846 struct igb_adapter *adapter = netdev_priv(netdev);
6847 if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6848 return -EINVAL;
6849 adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6850 dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6851 dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6852 " change effective.");
6853 if (test_bit(__IGB_DOWN, &adapter->state)) {
6854 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6855 " but the PF device is not up.\n");
6856 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6857 " attempting to use the VF device.\n");
6858 }
6859 return igb_set_vf_mac(adapter, vf, mac);
6860 }
6861
6862 static int igb_link_mbps(int internal_link_speed)
6863 {
6864 switch (internal_link_speed) {
6865 case SPEED_100:
6866 return 100;
6867 case SPEED_1000:
6868 return 1000;
6869 default:
6870 return 0;
6871 }
6872 }
6873
6874 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
6875 int link_speed)
6876 {
6877 int rf_dec, rf_int;
6878 u32 bcnrc_val;
6879
6880 if (tx_rate != 0) {
6881 /* Calculate the rate factor values to set */
6882 rf_int = link_speed / tx_rate;
6883 rf_dec = (link_speed - (rf_int * tx_rate));
6884 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
6885
6886 bcnrc_val = E1000_RTTBCNRC_RS_ENA;
6887 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
6888 E1000_RTTBCNRC_RF_INT_MASK);
6889 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
6890 } else {
6891 bcnrc_val = 0;
6892 }
6893
6894 wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
6895 wr32(E1000_RTTBCNRC, bcnrc_val);
6896 }
6897
6898 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
6899 {
6900 int actual_link_speed, i;
6901 bool reset_rate = false;
6902
6903 /* VF TX rate limit was not set or not supported */
6904 if ((adapter->vf_rate_link_speed == 0) ||
6905 (adapter->hw.mac.type != e1000_82576))
6906 return;
6907
6908 actual_link_speed = igb_link_mbps(adapter->link_speed);
6909 if (actual_link_speed != adapter->vf_rate_link_speed) {
6910 reset_rate = true;
6911 adapter->vf_rate_link_speed = 0;
6912 dev_info(&adapter->pdev->dev,
6913 "Link speed has been changed. VF Transmit "
6914 "rate is disabled\n");
6915 }
6916
6917 for (i = 0; i < adapter->vfs_allocated_count; i++) {
6918 if (reset_rate)
6919 adapter->vf_data[i].tx_rate = 0;
6920
6921 igb_set_vf_rate_limit(&adapter->hw, i,
6922 adapter->vf_data[i].tx_rate,
6923 actual_link_speed);
6924 }
6925 }
6926
6927 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6928 {
6929 struct igb_adapter *adapter = netdev_priv(netdev);
6930 struct e1000_hw *hw = &adapter->hw;
6931 int actual_link_speed;
6932
6933 if (hw->mac.type != e1000_82576)
6934 return -EOPNOTSUPP;
6935
6936 actual_link_speed = igb_link_mbps(adapter->link_speed);
6937 if ((vf >= adapter->vfs_allocated_count) ||
6938 (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
6939 (tx_rate < 0) || (tx_rate > actual_link_speed))
6940 return -EINVAL;
6941
6942 adapter->vf_rate_link_speed = actual_link_speed;
6943 adapter->vf_data[vf].tx_rate = (u16)tx_rate;
6944 igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
6945
6946 return 0;
6947 }
6948
6949 static int igb_ndo_get_vf_config(struct net_device *netdev,
6950 int vf, struct ifla_vf_info *ivi)
6951 {
6952 struct igb_adapter *adapter = netdev_priv(netdev);
6953 if (vf >= adapter->vfs_allocated_count)
6954 return -EINVAL;
6955 ivi->vf = vf;
6956 memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6957 ivi->tx_rate = adapter->vf_data[vf].tx_rate;
6958 ivi->vlan = adapter->vf_data[vf].pf_vlan;
6959 ivi->qos = adapter->vf_data[vf].pf_qos;
6960 return 0;
6961 }
6962
6963 static void igb_vmm_control(struct igb_adapter *adapter)
6964 {
6965 struct e1000_hw *hw = &adapter->hw;
6966 u32 reg;
6967
6968 switch (hw->mac.type) {
6969 case e1000_82575:
6970 default:
6971 /* replication is not supported for 82575 */
6972 return;
6973 case e1000_82576:
6974 /* notify HW that the MAC is adding vlan tags */
6975 reg = rd32(E1000_DTXCTL);
6976 reg |= E1000_DTXCTL_VLAN_ADDED;
6977 wr32(E1000_DTXCTL, reg);
6978 case e1000_82580:
6979 /* enable replication vlan tag stripping */
6980 reg = rd32(E1000_RPLOLR);
6981 reg |= E1000_RPLOLR_STRVLAN;
6982 wr32(E1000_RPLOLR, reg);
6983 case e1000_i350:
6984 /* none of the above registers are supported by i350 */
6985 break;
6986 }
6987
6988 if (adapter->vfs_allocated_count) {
6989 igb_vmdq_set_loopback_pf(hw, true);
6990 igb_vmdq_set_replication_pf(hw, true);
6991 igb_vmdq_set_anti_spoofing_pf(hw, true,
6992 adapter->vfs_allocated_count);
6993 } else {
6994 igb_vmdq_set_loopback_pf(hw, false);
6995 igb_vmdq_set_replication_pf(hw, false);
6996 }
6997 }
6998
6999 /* igb_main.c */