S: Supported
F: drivers/input/misc/adxl34x.c
-AEDSP16 DRIVER
-M: Riccardo Facchetti <fizban@tin.it>
-S: Maintained
-F: sound/oss/aedsp16.c
-
AF9013 MEDIA DRIVER
M: Antti Palosaari <crope@iki.fi>
L: linux-media@vger.kernel.org
F: include/linux/altera_jtaguart.h
AMAZON ETHERNET DRIVERS
- M: Netanel Belgazal <netanel@annapurnalabs.com>
- R: Saeed Bishara <saeed@annapurnalabs.com>
- R: Zorik Machulsky <zorik@annapurnalabs.com>
+ M: Netanel Belgazal <netanel@amazon.com>
+ R: Saeed Bishara <saeedb@amazon.com>
+ R: Zorik Machulsky <zorik@amazon.com>
L: netdev@vger.kernel.org
S: Supported
F: Documentation/networking/ena.txt
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
T: git git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-uniphier.git
S: Maintained
+F: Documentation/devicetree/bindings/gpio/gpio-uniphier.txt
F: arch/arm/boot/dts/uniphier*
F: arch/arm/include/asm/hardware/cache-uniphier.h
F: arch/arm/mach-uniphier/
F: arch/arm64/boot/dts/socionext/
F: drivers/bus/uniphier-system-bus.c
F: drivers/clk/uniphier/
+F: drivers/gpio/gpio-uniphier.c
F: drivers/i2c/busses/i2c-uniphier*
F: drivers/irqchip/irq-uniphier-aidet.c
F: drivers/pinctrl/uniphier/
F: include/linux/async_tx.h
AT24 EEPROM DRIVER
-M: Wolfram Sang <wsa@the-dreams.de>
+M: Bartosz Golaszewski <brgl@bgdev.pl>
L: linux-i2c@vger.kernel.org
S: Maintained
F: drivers/misc/eeprom/at24.c
F: drivers/net/hamradio/baycom*
BCACHE (BLOCK LAYER CACHE)
+M: Michael Lyle <mlyle@lyle.org>
M: Kent Overstreet <kent.overstreet@gmail.com>
L: linux-bcache@vger.kernel.org
W: http://bcache.evilpiepirate.org
-S: Orphan
+C: irc://irc.oftc.net/bcache
+S: Maintained
F: drivers/md/bcache/
BDISP ST MEDIA DRIVER
S: Supported
F: arch/x86/net/bpf_jit*
F: Documentation/networking/filter.txt
+ F: Documentation/bpf/
F: include/linux/bpf*
F: include/linux/filter.h
F: include/uapi/linux/bpf*
F: net/sched/act_bpf.c
F: net/sched/cls_bpf.c
F: samples/bpf/
- F: tools/net/bpf*
+ F: tools/bpf/
F: tools/testing/selftests/bpf/
BROADCOM B44 10/100 ETHERNET DRIVER
F: drivers/gpio/gpio-brcmstb.c
F: Documentation/devicetree/bindings/gpio/brcm,brcmstb-gpio.txt
+BROADCOM BRCMSTB USB2 and USB3 PHY DRIVER
+M: Al Cooper <alcooperx@gmail.com>
+L: linux-kernel@vger.kernel.org
+L: bcm-kernel-feedback-list@broadcom.com
+S: Maintained
+F: drivers/phy/broadcom/phy-brcm-usb*
+
BROADCOM GENET ETHERNET DRIVER
+ M: Doug Berger <opendmb@gmail.com>
M: Florian Fainelli <f.fainelli@gmail.com>
L: netdev@vger.kernel.org
S: Supported
CA8210 IEEE-802.15.4 RADIO DRIVER
M: Harry Morris <h.morris@cascoda.com>
- M: linuxdev@cascoda.com
L: linux-wpan@vger.kernel.org
W: https://github.com/Cascoda/ca8210-linux.git
S: Maintained
F: drivers/auxdisplay/cfag12864bfb.c
F: include/linux/cfag12864b.h
- CFG80211 and NL80211
+ 802.11 (including CFG80211/NL80211)
M: Johannes Berg <johannes@sipsolutions.net>
L: linux-wireless@vger.kernel.org
W: http://wireless.kernel.org/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211.git
T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211-next.git
S: Maintained
+ F: net/wireless/
F: include/uapi/linux/nl80211.h
+ F: include/linux/ieee80211.h
+ F: include/net/wext.h
F: include/net/cfg80211.h
- F: net/wireless/*
- X: net/wireless/wext*
+ F: include/net/iw_handler.h
+ F: include/net/ieee80211_radiotap.h
+ F: Documentation/driver-api/80211/cfg80211.rst
+ F: Documentation/networking/regulatory.txt
CHAR and MISC DRIVERS
M: Arnd Bergmann <arnd@arndb.de>
CISCO VIC ETHERNET NIC DRIVER
M: Christian Benvenuti <benve@cisco.com>
M: Govindarajulu Varadarajan <_govind@gmx.com>
- M: Neel Patel <neepatel@cisco.com>
+ M: Parvi Kaustubhi <pkaustub@cisco.com>
S: Supported
F: drivers/net/ethernet/cisco/enic/
L: linux-kernel@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/core
S: Supported
-F: drivers/clocksource
+F: drivers/clocksource/
+F: Documentation/devicetree/bindings/timer/
CMPC ACPI DRIVER
M: Thadeu Lima de Souza Cascardo <cascardo@holoscopio.com>
M: Julia Lawall <Julia.Lawall@lip6.fr>
M: Gilles Muller <Gilles.Muller@lip6.fr>
M: Nicolas Palix <nicolas.palix@imag.fr>
-M: Michal Marek <mmarek@suse.com>
+M: Michal Marek <michal.lkml@markovi.net>
L: cocci@systeme.lip6.fr (moderated for non-subscribers)
T: git git://git.kernel.org/pub/scm/linux/kernel/git/mmarek/kbuild.git misc
W: http://coccinelle.lip6.fr/
CPU POWER MONITORING SUBSYSTEM
M: Thomas Renninger <trenn@suse.com>
+M: Shuah Khan <shuahkh@osg.samsung.com>
+M: Shuah Khan <shuah@kernel.org>
L: linux-pm@vger.kernel.org
S: Maintained
F: tools/power/cpupower/
T: quilt http://people.redhat.com/agk/patches/linux/editing/
S: Maintained
F: Documentation/device-mapper/
+F: drivers/md/Makefile
+F: drivers/md/Kconfig
F: drivers/md/dm*
F: drivers/md/persistent-data/
F: include/linux/device-mapper.h
F: drivers/dma/
F: include/linux/dmaengine.h
F: Documentation/devicetree/bindings/dma/
-F: Documentation/dmaengine/
+F: Documentation/driver-api/dmaengine/
T: git git://git.infradead.org/users/vkoul/slave-dma.git
DMA MAPPING HELPERS
S: Maintained
F: drivers/edac/highbank*
-EDAC-CAVIUM
+EDAC-CAVIUM OCTEON
M: Ralf Baechle <ralf@linux-mips.org>
M: David Daney <david.daney@cavium.com>
L: linux-edac@vger.kernel.org
L: linux-mips@linux-mips.org
S: Supported
F: drivers/edac/octeon_edac*
+
+EDAC-CAVIUM THUNDERX
+M: David Daney <david.daney@cavium.com>
+M: Jan Glauber <jglauber@cavium.com>
+L: linux-edac@vger.kernel.org
+S: Supported
F: drivers/edac/thunderx_edac*
EDAC-CORE
Extended Verification Module (EVM)
M: Mimi Zohar <zohar@linux.vnet.ibm.com>
-L: linux-ima-devel@lists.sourceforge.net
-L: linux-security-module@vger.kernel.org
+L: linux-integrity@vger.kernel.org
S: Supported
F: security/integrity/evm/
FREESCALE CAAM (Cryptographic Acceleration and Assurance Module) DRIVER
M: Horia Geantă <horia.geanta@nxp.com>
-M: Dan Douglass <dan.douglass@nxp.com>
+M: Aymen Sghaier <aymen.sghaier@nxp.com>
L: linux-crypto@vger.kernel.org
S: Maintained
F: drivers/crypto/caam/
S: Supported
F: fs/crypto/
F: include/linux/fscrypt*.h
+F: Documentation/filesystems/fscrypt.rst
FUJITSU FR-V (FRV) PORT
S: Orphan
F: drivers/net/ethernet/hisilicon/
F: Documentation/devicetree/bindings/net/hisilicon*.txt
+HISILICON PMU DRIVER
+M: Shaokun Zhang <zhangshaokun@hisilicon.com>
+W: http://www.hisilicon.com
+S: Supported
+F: drivers/perf/hisilicon
+F: Documentation/perf/hisi-pmu.txt
+
HISILICON ROCE DRIVER
M: Lijun Ou <oulijun@huawei.com>
M: Wei Hu(Xavier) <xavier.huwei@huawei.com>
INTEGRITY MEASUREMENT ARCHITECTURE (IMA)
M: Mimi Zohar <zohar@linux.vnet.ibm.com>
M: Dmitry Kasatkin <dmitry.kasatkin@gmail.com>
-L: linux-ima-devel@lists.sourceforge.net
-L: linux-ima-user@lists.sourceforge.net
-L: linux-security-module@vger.kernel.org
+L: linux-integrity@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/zohar/linux-integrity.git
S: Supported
F: security/integrity/ima/
F: scripts/Makefile.kasan
KCONFIG
-M: "Yann E. MORIN" <yann.morin.1998@free.fr>
L: linux-kbuild@vger.kernel.org
-T: git git://gitorious.org/linux-kconfig/linux-kconfig
-S: Maintained
+S: Orphan
F: Documentation/kbuild/kconfig-language.txt
F: scripts/kconfig/
KERNEL BUILD + files below scripts/ (unless maintained elsewhere)
M: Masahiro Yamada <yamada.masahiro@socionext.com>
-M: Michal Marek <mmarek@suse.com>
+M: Michal Marek <michal.lkml@markovi.net>
T: git git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild.git
L: linux-kbuild@vger.kernel.org
S: Maintained
KEYS-ENCRYPTED
M: Mimi Zohar <zohar@linux.vnet.ibm.com>
-M: David Safford <safford@us.ibm.com>
-L: linux-security-module@vger.kernel.org
+L: linux-integrity@vger.kernel.org
L: keyrings@vger.kernel.org
S: Supported
F: Documentation/security/keys/trusted-encrypted.rst
F: security/keys/encrypted-keys/
KEYS-TRUSTED
-M: David Safford <safford@us.ibm.com>
M: Mimi Zohar <zohar@linux.vnet.ibm.com>
-L: linux-security-module@vger.kernel.org
+L: linux-integrity@vger.kernel.org
L: keyrings@vger.kernel.org
S: Supported
F: Documentation/security/keys/trusted-encrypted.rst
F: include/net/mac80211.h
F: net/mac80211/
F: drivers/net/wireless/mac80211_hwsim.[ch]
+ F: Documentation/networking/mac80211_hwsim/README
MAILBOX API
M: Jassi Brar <jassisinghbrar@gmail.com>
F: include/linux/mux/
F: drivers/mux/
-MULTISOUND SOUND DRIVER
-M: Andrew Veliath <andrewtv@usa.net>
-S: Maintained
-F: Documentation/sound/oss/MultiSound
-F: sound/oss/msnd*
-
MULTITECH MULTIPORT CARD (ISICOM)
S: Orphan
F: drivers/tty/isicom.c
S: Maintained
F: net/dsa/
F: include/net/dsa.h
+ F: include/linux/dsa/
F: drivers/net/dsa/
NETWORKING [GENERAL]
F: include/uapi/linux/net.h
F: include/uapi/linux/netdevice.h
F: include/uapi/linux/net_namespace.h
- F: tools/net/
F: tools/testing/selftests/net/
+ F: lib/net_utils.c
F: lib/random32.c
NETWORKING [IPSEC]
L: openrisc@lists.librecores.org
W: http://openrisc.io
S: Maintained
+F: Documentation/devicetree/bindings/openrisc/
+F: Documentation/openrisc/
F: arch/openrisc/
+F: drivers/irqchip/irq-ompic.c
+F: drivers/irqchip/irq-or1k-*
OPENVSWITCH
M: Pravin Shelar <pshelar@nicira.com>
L: linux-pm@vger.kernel.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/vireshk/pm.git
-F: drivers/base/power/opp/
+F: drivers/opp/
F: include/linux/pm_opp.h
F: Documentation/power/opp.txt
F: Documentation/devicetree/bindings/opp/
QAT DRIVER
M: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
-M: Salvatore Benedetto <salvatore.benedetto@intel.com>
L: qat-linux@intel.com
S: Supported
F: drivers/crypto/qat/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211-next.git
S: Maintained
F: Documentation/rfkill.txt
+ F: Documentation/ABI/stable/sysfs-class-rfkill
F: net/rfkill/
RHASHTABLE
F: drivers/mtd/nand/r852.c
F: drivers/mtd/nand/r852.h
+RISC-V ARCHITECTURE
+M: Palmer Dabbelt <palmer@sifive.com>
+M: Albert Ou <albert@sifive.com>
+L: patches@groups.riscv.org
+T: git https://github.com/riscv/riscv-linux
+S: Supported
+F: arch/riscv/
+K: riscv
+N: riscv
+
ROCCAT DRIVERS
M: Stefan Achatz <erazor_de@users.sourceforge.net>
W: http://sourceforge.net/projects/roccat/
L: linux-samsung-soc@vger.kernel.org
S: Maintained
F: drivers/crypto/exynos-rng.c
-F: Documentation/devicetree/bindings/rng/samsung,exynos-rng4.txt
+F: Documentation/devicetree/bindings/crypto/samsung,exynos-rng4.txt
SAMSUNG FRAMEBUFFER DRIVER
M: Jingoo Han <jingoohan1@gmail.com>
S: Maintained
F: drivers/mmc/host/sdhci-spear.c
+SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) TI OMAP DRIVER
+M: Kishon Vijay Abraham I <kishon@ti.com>
+L: linux-mmc@vger.kernel.org
+S: Maintained
+F: drivers/mmc/host/sdhci-omap.c
+
SECURE ENCRYPTING DEVICE (SED) OPAL DRIVER
M: Scott Bauer <scott.bauer@intel.com>
M: Jonathan Derrick <jonathan.derrick@intel.com>
-M: Rafael Antognolli <rafael.antognolli@intel.com>
L: linux-block@vger.kernel.org
S: Supported
F: block/sed*
L: linux-raid@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git
S: Supported
-F: drivers/md/
+F: drivers/md/Makefile
+F: drivers/md/Kconfig
+F: drivers/md/md*
+F: drivers/md/raid*
F: include/linux/raid/
F: include/uapi/linux/raid/
F: arch/arc/boot/dts/ax*
F: Documentation/devicetree/bindings/arc/axs10*
+SYNOPSYS DESIGNWARE APB GPIO DRIVER
+M: Hoan Tran <hotran@apm.com>
+L: linux-gpio@vger.kernel.org
+S: Maintained
+F: drivers/gpio/gpio-dwapb.c
+F: Documentation/devicetree/bindings/gpio/snps-dwapb-gpio.txt
+
SYNOPSYS DESIGNWARE DMAC DRIVER
M: Viresh Kumar <vireshk@kernel.org>
-M: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+R: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
S: Maintained
F: include/linux/dma/dw.h
F: include/linux/platform_data/dma-dw.h
M: Yehezkel Bernat <yehezkel.bernat@intel.com>
S: Maintained
F: drivers/thunderbolt/
+ F: include/linux/thunderbolt.h
+
+ THUNDERBOLT NETWORK DRIVER
+ M: Michael Jamet <michael.jamet@intel.com>
+ M: Mika Westerberg <mika.westerberg@linux.intel.com>
+ M: Yehezkel Bernat <yehezkel.bernat@intel.com>
+ L: netdev@vger.kernel.org
+ S: Maintained
+ F: drivers/net/thunderbolt.c
THUNDERX GPIO DRIVER
M: David Daney <david.daney@cavium.com>
M: Bastien Nocera <hadess@hadess.net>
L: linux-input@vger.kernel.org
S: Maintained
-F: drivers/hid/hid-udraw.c
+F: drivers/hid/hid-udraw-ps3.c
UFS FILESYSTEM
M: Evgeniy Dushistov <dushistov@mail.ru>
F: include/linux/virtio_vsock.h
F: include/uapi/linux/virtio_vsock.h
F: include/uapi/linux/vsockmon.h
+ F: include/uapi/linux/vm_sockets_diag.h
+ F: net/vmw_vsock/diag.c
F: net/vmw_vsock/af_vsock_tap.c
F: net/vmw_vsock/virtio_transport_common.c
F: net/vmw_vsock/virtio_transport.c
F: drivers/net/vsockmon.c
F: drivers/vhost/vsock.c
F: drivers/vhost/vsock.h
+ F: tools/testing/vsock/
VIRTIO CONSOLE DRIVER
M: Amit Shah <amit@kernel.org>
L: kvm@vger.kernel.org
S: Supported
F: drivers/s390/virtio/
+F: arch/s390/include/uapi/asm/virtio-ccw.h
VIRTIO GPU DRIVER
M: David Airlie <airlied@linux.ie>
S: Supported
W: http://wireless.kernel.org/en/users/Drivers/wil6210
F: drivers/net/wireless/ath/wil6210/
- F: include/uapi/linux/wil6210_uapi.h
WIMAX STACK
M: Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
F: Documentation/devicetree/bindings/regulator/arizona-regulator.txt
F: Documentation/devicetree/bindings/mfd/arizona.txt
F: Documentation/devicetree/bindings/mfd/wm831x.txt
+F: Documentation/devicetree/bindings/sound/wlf,arizona.txt
F: arch/arm/mach-s3c64xx/mach-crag6410*
F: drivers/clk/clk-wm83*.c
F: drivers/extcon/extcon-arizona.c
static void idt77105_restart_timer_func(unsigned long);
-static DEFINE_TIMER(stats_timer, idt77105_stats_timer_func, 0, 0);
-static DEFINE_TIMER(restart_timer, idt77105_restart_timer_func, 0, 0);
+static DEFINE_TIMER(stats_timer, idt77105_stats_timer_func);
+static DEFINE_TIMER(restart_timer, idt77105_restart_timer_func);
static int start_timer = 1;
static struct idt77105_priv *idt77105_all = NULL;
if (start_timer) {
start_timer = 0;
- setup_timer(&stats_timer, idt77105_stats_timer_func, 0UL);
stats_timer.expires = jiffies+IDT77105_STATS_TIMER_PERIOD;
add_timer(&stats_timer);
- setup_timer(&restart_timer, idt77105_restart_timer_func, 0UL);
restart_timer.expires = jiffies+IDT77105_RESTART_TIMER_PERIOD;
add_timer(&restart_timer);
}
static struct atm_dev *_ia_dev[8];
static int iadev_count;
static void ia_led_timer(unsigned long arg);
-static DEFINE_TIMER(ia_timer, ia_led_timer, 0, 0);
+static DEFINE_TIMER(ia_timer, ia_led_timer);
static int IA_TX_BUF = DFL_TX_BUFFERS, IA_TX_BUF_SZ = DFL_TX_BUF_SZ;
static int IA_RX_BUF = DFL_RX_BUFFERS, IA_RX_BUF_SZ = DFL_RX_BUF_SZ;
static uint IADebugFlag = /* IF_IADBG_ERR | IF_IADBG_CBR| IF_IADBG_INIT_ADAPTER
static void ia_suni_pm7345_init_ds3(struct iadev_priv *iadev)
{
- static const struct ia_reg suni_ds3_init [] = {
+ static const struct ia_reg suni_ds3_init[] = {
{ SUNI_DS3_FRM_INTR_ENBL, 0x17 },
{ SUNI_DS3_FRM_CFG, 0x01 },
{ SUNI_DS3_TRAN_CFG, 0x01 },
static void ia_suni_pm7345_init_e3(struct iadev_priv *iadev)
{
- static const struct ia_reg suni_e3_init [] = {
+ static const struct ia_reg suni_e3_init[] = {
{ SUNI_E3_FRM_FRAM_OPTIONS, 0x04 },
{ SUNI_E3_FRM_MAINT_OPTIONS, 0x20 },
{ SUNI_E3_FRM_FRAM_INTR_ENBL, 0x1d },
static void ia_suni_pm7345_init(struct iadev_priv *iadev)
{
- static const struct ia_reg suni_init [] = {
+ static const struct ia_reg suni_init[] = {
/* Enable RSOP loss of signal interrupt. */
{ SUNI_INTR_ENBL, 0x28 },
/* Clear error counters. */
slave = bond_slave_get_rcu(skb->dev);
bond = slave->bond;
- recv_probe = ACCESS_ONCE(bond->recv_probe);
+ recv_probe = READ_ONCE(bond->recv_probe);
if (recv_probe) {
ret = recv_probe(skb, bond, slave);
if (ret == RX_HANDLER_CONSUMED) {
}
}
- static int bond_master_upper_dev_link(struct bonding *bond, struct slave *slave)
+ static int bond_master_upper_dev_link(struct bonding *bond, struct slave *slave,
+ struct netlink_ext_ack *extack)
{
struct netdev_lag_upper_info lag_upper_info;
- int err;
lag_upper_info.tx_type = bond_lag_tx_type(bond);
- err = netdev_master_upper_dev_link(slave->dev, bond->dev, slave,
- &lag_upper_info);
- if (err)
- return err;
- rtmsg_ifinfo(RTM_NEWLINK, slave->dev, IFF_SLAVE, GFP_KERNEL);
- return 0;
+
+ return netdev_master_upper_dev_link(slave->dev, bond->dev, slave,
+ &lag_upper_info, extack);
}
static void bond_upper_dev_unlink(struct bonding *bond, struct slave *slave)
{
netdev_upper_dev_unlink(slave->dev, bond->dev);
slave->dev->flags &= ~IFF_SLAVE;
- rtmsg_ifinfo(RTM_NEWLINK, slave->dev, IFF_SLAVE, GFP_KERNEL);
}
static struct slave *bond_alloc_slave(struct bonding *bond)
}
/* enslave device <slave> to bond device <master> */
- int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
+ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
+ struct netlink_ext_ack *extack)
{
struct bonding *bond = netdev_priv(bond_dev);
const struct net_device_ops *slave_ops = slave_dev->netdev_ops;
/* already in-use? */
if (netdev_is_rx_handler_busy(slave_dev)) {
+ NL_SET_ERR_MSG(extack, "Device is in use and cannot be enslaved");
netdev_err(bond_dev,
"Error: Device is in use and cannot be enslaved\n");
return -EBUSY;
}
if (bond_dev == slave_dev) {
+ NL_SET_ERR_MSG(extack, "Cannot enslave bond to itself.");
netdev_err(bond_dev, "cannot enslave bond to itself.\n");
return -EPERM;
}
netdev_dbg(bond_dev, "%s is NETIF_F_VLAN_CHALLENGED\n",
slave_dev->name);
if (vlan_uses_dev(bond_dev)) {
+ NL_SET_ERR_MSG(extack, "Can not enslave VLAN challenged device to VLAN enabled bond");
netdev_err(bond_dev, "Error: cannot enslave VLAN challenged slave %s on VLAN enabled bond %s\n",
slave_dev->name, bond_dev->name);
return -EPERM;
* enslaving it; the old ifenslave will not.
*/
if (slave_dev->flags & IFF_UP) {
+ NL_SET_ERR_MSG(extack, "Device can not be enslaved while up");
netdev_err(bond_dev, "%s is up - this may be due to an out of date ifenslave\n",
slave_dev->name);
return -EPERM;
bond_dev);
}
} else if (bond_dev->type != slave_dev->type) {
+ NL_SET_ERR_MSG(extack, "Device type is different from other slaves");
netdev_err(bond_dev, "%s ether type (%d) is different from other slaves (%d), can not enslave it\n",
slave_dev->name, slave_dev->type, bond_dev->type);
return -EINVAL;
if (slave_dev->type == ARPHRD_INFINIBAND &&
BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {
+ NL_SET_ERR_MSG(extack, "Only active-backup mode is supported for infiniband slaves");
netdev_warn(bond_dev, "Type (%d) supports only active-backup mode\n",
slave_dev->type);
res = -EOPNOTSUPP;
bond->params.fail_over_mac = BOND_FOM_ACTIVE;
netdev_warn(bond_dev, "Setting fail_over_mac to active for active-backup mode\n");
} else {
+ NL_SET_ERR_MSG(extack, "Slave device does not support setting the MAC address, but fail_over_mac is not set to active");
netdev_err(bond_dev, "The slave device specified does not support setting the MAC address, but fail_over_mac is not set to active\n");
res = -EOPNOTSUPP;
goto err_undo_flags;
goto err_detach;
}
- res = bond_master_upper_dev_link(bond, new_slave);
+ res = bond_master_upper_dev_link(bond, new_slave, extack);
if (res) {
netdev_dbg(bond_dev, "Error %d calling bond_master_upper_dev_link\n", res);
goto err_unregister;
struct slave *curr_active_slave, *curr_arp_slave;
unsigned char *arp_ptr;
__be32 sip, tip;
- int alen, is_arp = skb->protocol == __cpu_to_be16(ETH_P_ARP);
+ int is_arp = skb->protocol == __cpu_to_be16(ETH_P_ARP);
+ unsigned int alen;
if (!slave_do_arp_validate(bond, slave)) {
if ((slave_do_arp_validate_only(bond) && is_arp) ||
break;
case NETDEV_UP:
case NETDEV_CHANGE:
- bond_update_speed_duplex(slave);
+ /* For 802.3ad mode only:
+ * Getting invalid Speed/Duplex values here will put slave
+ * in weird state. So mark it as link-down for the time
+ * being and let link-monitoring (miimon) set it right when
+ * correct speeds/duplex are available.
+ */
+ if (bond_update_speed_duplex(slave) &&
+ BOND_MODE(bond) == BOND_MODE_8023AD)
+ slave->link = BOND_LINK_DOWN;
+
if (BOND_MODE(bond) == BOND_MODE_8023AD)
bond_3ad_adapter_speed_duplex_changed(slave);
/* Fallthrough */
switch (cmd) {
case BOND_ENSLAVE_OLD:
case SIOCBONDENSLAVE:
- res = bond_enslave(bond_dev, slave_dev);
+ res = bond_enslave(bond_dev, slave_dev, NULL);
break;
case BOND_RELEASE_OLD:
case SIOCBONDRELEASE:
else
bond_xmit_slave_id(bond, skb, 0);
} else {
- int slave_cnt = ACCESS_ONCE(bond->slave_cnt);
+ int slave_cnt = READ_ONCE(bond->slave_cnt);
if (likely(slave_cnt)) {
slave_id = bond_rr_gen_slave_id(bond);
unsigned int count;
slaves = rcu_dereference(bond->slave_arr);
- count = slaves ? ACCESS_ONCE(slaves->count) : 0;
+ count = slaves ? READ_ONCE(slaves->count) : 0;
if (likely(count)) {
slave = slaves->arr[bond_xmit_hash(bond, skb) % count];
bond_dev_queue_xmit(bond, skb, slave->dev);
*/
static inline int reclaimable(const struct sge_txq *q)
{
- int hw_cidx = ntohs(ACCESS_ONCE(q->stat->cidx));
+ int hw_cidx = ntohs(READ_ONCE(q->stat->cidx));
hw_cidx -= q->cidx;
return hw_cidx < 0 ? hw_cidx + q->size : hw_cidx;
}
*/
static inline void reclaim_completed_tx_imm(struct sge_txq *q)
{
- int hw_cidx = ntohs(ACCESS_ONCE(q->stat->cidx));
+ int hw_cidx = ntohs(READ_ONCE(q->stat->cidx));
int reclaim = hw_cidx - q->cidx;
if (reclaim < 0)
*/
static inline int is_ofld_imm(const struct sk_buff *skb)
{
- return skb->len <= MAX_IMM_TX_PKT_LEN;
+ struct work_request_hdr *req = (struct work_request_hdr *)skb->data;
+ unsigned long opcode = FW_WR_OP_G(ntohl(req->wr_hi));
+
+ if (opcode == FW_CRYPTO_LOOKASIDE_WR)
+ return skb->len <= SGE_MAX_WR_LEN;
+ else
+ return skb->len <= MAX_IMM_TX_PKT_LEN;
}
/**
return t4_intr_intx;
}
- static void sge_rx_timer_cb(unsigned long data)
+ static void sge_rx_timer_cb(struct timer_list *t)
{
unsigned long m;
unsigned int i;
- struct adapter *adap = (struct adapter *)data;
+ struct adapter *adap = from_timer(adap, t, sge.rx_timer);
struct sge *s = &adap->sge;
for (i = 0; i < BITS_TO_LONGS(s->egr_sz); i++)
mod_timer(&s->rx_timer, jiffies + RX_QCHECK_PERIOD);
}
- static void sge_tx_timer_cb(unsigned long data)
+ static void sge_tx_timer_cb(struct timer_list *t)
{
unsigned long m;
unsigned int i, budget;
- struct adapter *adap = (struct adapter *)data;
+ struct adapter *adap = from_timer(adap, t, sge.tx_timer);
struct sge *s = &adap->sge;
for (i = 0; i < BITS_TO_LONGS(s->egr_sz); i++)
/* Set up timers used for recuring callbacks to process RX and TX
* administrative tasks.
*/
- setup_timer(&s->rx_timer, sge_rx_timer_cb, (unsigned long)adap);
- setup_timer(&s->tx_timer, sge_tx_timer_cb, (unsigned long)adap);
+ timer_setup(&s->rx_timer, sge_rx_timer_cb, 0);
+ timer_setup(&s->tx_timer, sge_tx_timer_cb, 0);
spin_lock_init(&s->intrq_lock);
vsi->rx_buf_failed, vsi->rx_page_failed);
rcu_read_lock();
for (i = 0; i < vsi->num_queue_pairs; i++) {
- struct i40e_ring *rx_ring = ACCESS_ONCE(vsi->rx_rings[i]);
+ struct i40e_ring *rx_ring = READ_ONCE(vsi->rx_rings[i]);
if (!rx_ring)
continue;
rx_ring->netdev,
rx_ring->rx_bi);
dev_info(&pf->pdev->dev,
- " rx_rings[%i]: state = %li, queue_index = %d, reg_idx = %d\n",
- i, rx_ring->state,
+ " rx_rings[%i]: state = %lu, queue_index = %d, reg_idx = %d\n",
+ i, *rx_ring->state,
rx_ring->queue_index,
rx_ring->reg_idx);
dev_info(&pf->pdev->dev,
ITR_IS_DYNAMIC(rx_ring->rx_itr_setting) ? "dynamic" : "fixed");
}
for (i = 0; i < vsi->num_queue_pairs; i++) {
- struct i40e_ring *tx_ring = ACCESS_ONCE(vsi->tx_rings[i]);
+ struct i40e_ring *tx_ring = READ_ONCE(vsi->tx_rings[i]);
if (!tx_ring)
continue;
tx_ring->netdev,
tx_ring->tx_bi);
dev_info(&pf->pdev->dev,
- " tx_rings[%i]: state = %li, queue_index = %d, reg_idx = %d\n",
- i, tx_ring->state,
+ " tx_rings[%i]: state = %lu, queue_index = %d, reg_idx = %d\n",
+ i, *tx_ring->state,
tx_ring->queue_index,
tx_ring->reg_idx);
dev_info(&pf->pdev->dev,
*/
if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) {
pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
- i40e_do_reset_safe(pf,
- BIT_ULL(__I40E_PF_RESET_REQUESTED));
+ i40e_do_reset_safe(pf, I40E_PF_RESET_FLAG);
}
vsi = i40e_vsi_setup(pf, I40E_VSI_VMDQ2, vsi_seid, 0);
I40E_PRIV_FLAG("veb-stats", I40E_FLAG_VEB_STATS_ENABLED, 0),
I40E_PRIV_FLAG("hw-atr-eviction", I40E_FLAG_HW_ATR_EVICT_ENABLED, 0),
I40E_PRIV_FLAG("legacy-rx", I40E_FLAG_LEGACY_RX, 0),
+ I40E_PRIV_FLAG("disable-source-pruning",
+ I40E_FLAG_SOURCE_PRUNING_DISABLED, 0),
};
#define I40E_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40e_gstrings_priv_flags)
/**
* i40e_phy_type_to_ethtool - convert the phy_types to ethtool link modes
- * @phy_types: PHY types to convert
- * @supported: pointer to the ethtool supported variable to fill in
- * @advertising: pointer to the ethtool advertising variable to fill in
+ * @pf: PF struct with phy_types
+ * @ks: ethtool link ksettings struct to fill out
*
**/
- static void i40e_phy_type_to_ethtool(struct i40e_pf *pf, u32 *supported,
- u32 *advertising)
+ static void i40e_phy_type_to_ethtool(struct i40e_pf *pf,
+ struct ethtool_link_ksettings *ks)
{
struct i40e_link_status *hw_link_info = &pf->hw.phy.link_info;
u64 phy_types = pf->hw.phy.phy_types;
- *supported = 0x0;
- *advertising = 0x0;
+ ethtool_link_ksettings_zero_link_mode(ks, supported);
+ ethtool_link_ksettings_zero_link_mode(ks, advertising);
if (phy_types & I40E_CAP_PHY_TYPE_SGMII) {
- *supported |= SUPPORTED_Autoneg |
- SUPPORTED_1000baseT_Full;
- *advertising |= ADVERTISED_Autoneg;
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 1000baseT_Full);
if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
- *advertising |= ADVERTISED_1000baseT_Full;
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 1000baseT_Full);
if (pf->hw_features & I40E_HW_100M_SGMII_CAPABLE) {
- *supported |= SUPPORTED_100baseT_Full;
- *advertising |= ADVERTISED_100baseT_Full;
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 100baseT_Full);
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 100baseT_Full);
}
}
if (phy_types & I40E_CAP_PHY_TYPE_XAUI ||
phy_types & I40E_CAP_PHY_TYPE_XFI ||
phy_types & I40E_CAP_PHY_TYPE_SFI ||
phy_types & I40E_CAP_PHY_TYPE_10GBASE_SFPP_CU ||
- phy_types & I40E_CAP_PHY_TYPE_10GBASE_AOC)
- *supported |= SUPPORTED_10000baseT_Full;
- if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1_CU ||
- phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1 ||
- phy_types & I40E_CAP_PHY_TYPE_10GBASE_T ||
- phy_types & I40E_CAP_PHY_TYPE_10GBASE_SR ||
- phy_types & I40E_CAP_PHY_TYPE_10GBASE_LR) {
- *supported |= SUPPORTED_Autoneg |
- SUPPORTED_10000baseT_Full;
- *advertising |= ADVERTISED_Autoneg;
+ phy_types & I40E_CAP_PHY_TYPE_10GBASE_AOC) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 10000baseT_Full);
+ if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 10000baseT_Full);
+ }
+ if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_T) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 10000baseT_Full);
if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
- *advertising |= ADVERTISED_10000baseT_Full;
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 10000baseT_Full);
}
if (phy_types & I40E_CAP_PHY_TYPE_XLAUI ||
phy_types & I40E_CAP_PHY_TYPE_XLPPI ||
phy_types & I40E_CAP_PHY_TYPE_40GBASE_AOC)
- *supported |= SUPPORTED_40000baseCR4_Full;
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 40000baseCR4_Full);
if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_CR4_CU ||
phy_types & I40E_CAP_PHY_TYPE_40GBASE_CR4) {
- *supported |= SUPPORTED_Autoneg |
- SUPPORTED_40000baseCR4_Full;
- *advertising |= ADVERTISED_Autoneg;
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 40000baseCR4_Full);
if (hw_link_info->requested_speeds & I40E_LINK_SPEED_40GB)
- *advertising |= ADVERTISED_40000baseCR4_Full;
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 40000baseCR4_Full);
}
if (phy_types & I40E_CAP_PHY_TYPE_100BASE_TX) {
- *supported |= SUPPORTED_Autoneg |
- SUPPORTED_100baseT_Full;
- *advertising |= ADVERTISED_Autoneg;
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 100baseT_Full);
if (hw_link_info->requested_speeds & I40E_LINK_SPEED_100MB)
- *advertising |= ADVERTISED_100baseT_Full;
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 100baseT_Full);
}
- if (phy_types & I40E_CAP_PHY_TYPE_1000BASE_T ||
- phy_types & I40E_CAP_PHY_TYPE_1000BASE_SX ||
- phy_types & I40E_CAP_PHY_TYPE_1000BASE_LX ||
- phy_types & I40E_CAP_PHY_TYPE_1000BASE_T_OPTICAL) {
- *supported |= SUPPORTED_Autoneg |
- SUPPORTED_1000baseT_Full;
- *advertising |= ADVERTISED_Autoneg;
+ if (phy_types & I40E_CAP_PHY_TYPE_1000BASE_T) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 1000baseT_Full);
if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
- *advertising |= ADVERTISED_1000baseT_Full;
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 1000baseT_Full);
}
if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_SR4)
- *supported |= SUPPORTED_40000baseSR4_Full;
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 40000baseSR4_Full);
if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_LR4)
- *supported |= SUPPORTED_40000baseLR4_Full;
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 40000baseLR4_Full);
if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_KR4) {
- *supported |= SUPPORTED_40000baseKR4_Full |
- SUPPORTED_Autoneg;
- *advertising |= ADVERTISED_40000baseKR4_Full |
- ADVERTISED_Autoneg;
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 40000baseLR4_Full);
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 40000baseLR4_Full);
}
if (phy_types & I40E_CAP_PHY_TYPE_20GBASE_KR2) {
- *supported |= SUPPORTED_20000baseKR2_Full |
- SUPPORTED_Autoneg;
- *advertising |= ADVERTISED_Autoneg;
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 20000baseKR2_Full);
if (hw_link_info->requested_speeds & I40E_LINK_SPEED_20GB)
- *advertising |= ADVERTISED_20000baseKR2_Full;
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 20000baseKR2_Full);
}
- if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_KR) {
- if (!(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER))
- *supported |= SUPPORTED_10000baseKR_Full |
- SUPPORTED_Autoneg;
- *advertising |= ADVERTISED_Autoneg;
+ if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_KX4) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 10000baseKX4_Full);
if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
- if (!(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER))
- *advertising |= ADVERTISED_10000baseKR_Full;
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 10000baseKX4_Full);
}
- if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_KX4) {
- *supported |= SUPPORTED_10000baseKX4_Full |
- SUPPORTED_Autoneg;
- *advertising |= ADVERTISED_Autoneg;
+ if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_KR &&
+ !(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER)) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 10000baseKR_Full);
if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
- *advertising |= ADVERTISED_10000baseKX4_Full;
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 10000baseKR_Full);
}
- if (phy_types & I40E_CAP_PHY_TYPE_1000BASE_KX) {
- if (!(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER))
- *supported |= SUPPORTED_1000baseKX_Full |
- SUPPORTED_Autoneg;
- *advertising |= ADVERTISED_Autoneg;
+ if (phy_types & I40E_CAP_PHY_TYPE_1000BASE_KX &&
+ !(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER)) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 1000baseKX_Full);
if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
- if (!(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER))
- *advertising |= ADVERTISED_1000baseKX_Full;
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 1000baseKX_Full);
}
- if (phy_types & I40E_CAP_PHY_TYPE_25GBASE_KR ||
- phy_types & I40E_CAP_PHY_TYPE_25GBASE_CR ||
- phy_types & I40E_CAP_PHY_TYPE_25GBASE_SR ||
+ /* need to add 25G PHY types */
+ if (phy_types & I40E_CAP_PHY_TYPE_25GBASE_KR) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 25000baseKR_Full);
+ if (hw_link_info->requested_speeds & I40E_LINK_SPEED_25GB)
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 25000baseKR_Full);
+ }
+ if (phy_types & I40E_CAP_PHY_TYPE_25GBASE_CR) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 25000baseCR_Full);
+ if (hw_link_info->requested_speeds & I40E_LINK_SPEED_25GB)
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 25000baseCR_Full);
+ }
+ if (phy_types & I40E_CAP_PHY_TYPE_25GBASE_SR ||
phy_types & I40E_CAP_PHY_TYPE_25GBASE_LR) {
- *supported |= SUPPORTED_Autoneg;
- *advertising |= ADVERTISED_Autoneg;
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 25000baseSR_Full);
+ if (hw_link_info->requested_speeds & I40E_LINK_SPEED_25GB)
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 25000baseSR_Full);
+ }
+ if (phy_types & I40E_CAP_PHY_TYPE_25GBASE_AOC ||
+ phy_types & I40E_CAP_PHY_TYPE_25GBASE_ACC) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 25000baseCR_Full);
+ if (hw_link_info->requested_speeds & I40E_LINK_SPEED_25GB)
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 25000baseCR_Full);
+ }
+ /* need to add new 10G PHY types */
+ if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1 ||
+ phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1_CU) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 10000baseCR_Full);
+ if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 10000baseCR_Full);
+ }
+ if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_SR) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 10000baseSR_Full);
+ if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 10000baseSR_Full);
+ }
+ if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_LR) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 10000baseLR_Full);
+ if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 10000baseLR_Full);
+ }
+ if (phy_types & I40E_CAP_PHY_TYPE_1000BASE_SX ||
+ phy_types & I40E_CAP_PHY_TYPE_1000BASE_LX ||
+ phy_types & I40E_CAP_PHY_TYPE_1000BASE_T_OPTICAL) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 1000baseX_Full);
+ if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 1000baseX_Full);
+ }
+ /* Autoneg PHY types */
+ if (phy_types & I40E_CAP_PHY_TYPE_SGMII ||
+ phy_types & I40E_CAP_PHY_TYPE_40GBASE_KR4 ||
+ phy_types & I40E_CAP_PHY_TYPE_40GBASE_CR4_CU ||
+ phy_types & I40E_CAP_PHY_TYPE_40GBASE_CR4 ||
+ phy_types & I40E_CAP_PHY_TYPE_25GBASE_SR ||
+ phy_types & I40E_CAP_PHY_TYPE_25GBASE_LR ||
+ phy_types & I40E_CAP_PHY_TYPE_25GBASE_KR ||
+ phy_types & I40E_CAP_PHY_TYPE_25GBASE_CR ||
+ phy_types & I40E_CAP_PHY_TYPE_20GBASE_KR2 ||
+ phy_types & I40E_CAP_PHY_TYPE_10GBASE_T ||
+ phy_types & I40E_CAP_PHY_TYPE_10GBASE_SR ||
+ phy_types & I40E_CAP_PHY_TYPE_10GBASE_LR ||
+ phy_types & I40E_CAP_PHY_TYPE_10GBASE_KX4 ||
+ phy_types & I40E_CAP_PHY_TYPE_10GBASE_KR ||
+ phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1_CU ||
+ phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1 ||
+ phy_types & I40E_CAP_PHY_TYPE_1000BASE_T_OPTICAL ||
+ phy_types & I40E_CAP_PHY_TYPE_1000BASE_T ||
+ phy_types & I40E_CAP_PHY_TYPE_1000BASE_SX ||
+ phy_types & I40E_CAP_PHY_TYPE_1000BASE_LX ||
+ phy_types & I40E_CAP_PHY_TYPE_1000BASE_KX ||
+ phy_types & I40E_CAP_PHY_TYPE_100BASE_TX) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ Autoneg);
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ Autoneg);
}
}
/**
* i40e_get_settings_link_up - Get the Link settings for when link is up
* @hw: hw structure
- * @ecmd: ethtool command to fill in
+ * @ks: ethtool ksettings to fill in
* @netdev: network interface device structure
- *
+ * @pf: pointer to physical function struct
**/
static void i40e_get_settings_link_up(struct i40e_hw *hw,
- struct ethtool_link_ksettings *cmd,
+ struct ethtool_link_ksettings *ks,
struct net_device *netdev,
struct i40e_pf *pf)
{
struct i40e_link_status *hw_link_info = &hw->phy.link_info;
+ struct ethtool_link_ksettings cap_ksettings;
u32 link_speed = hw_link_info->link_speed;
- u32 e_advertising = 0x0;
- u32 e_supported = 0x0;
- u32 supported, advertising;
-
- ethtool_convert_link_mode_to_legacy_u32(&supported,
- cmd->link_modes.supported);
- ethtool_convert_link_mode_to_legacy_u32(&advertising,
- cmd->link_modes.advertising);
/* Initialize supported and advertised settings based on phy settings */
switch (hw_link_info->phy_type) {
case I40E_PHY_TYPE_40GBASE_CR4:
case I40E_PHY_TYPE_40GBASE_CR4_CU:
- supported = SUPPORTED_Autoneg |
- SUPPORTED_40000baseCR4_Full;
- advertising = ADVERTISED_Autoneg |
- ADVERTISED_40000baseCR4_Full;
+ ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 40000baseCR4_Full);
+ ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 40000baseCR4_Full);
break;
case I40E_PHY_TYPE_XLAUI:
case I40E_PHY_TYPE_XLPPI:
case I40E_PHY_TYPE_40GBASE_AOC:
- supported = SUPPORTED_40000baseCR4_Full;
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 40000baseCR4_Full);
break;
case I40E_PHY_TYPE_40GBASE_SR4:
- supported = SUPPORTED_40000baseSR4_Full;
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 40000baseSR4_Full);
break;
case I40E_PHY_TYPE_40GBASE_LR4:
- supported = SUPPORTED_40000baseLR4_Full;
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 40000baseLR4_Full);
break;
+ case I40E_PHY_TYPE_25GBASE_SR:
+ case I40E_PHY_TYPE_25GBASE_LR:
case I40E_PHY_TYPE_10GBASE_SR:
case I40E_PHY_TYPE_10GBASE_LR:
case I40E_PHY_TYPE_1000BASE_SX:
case I40E_PHY_TYPE_1000BASE_LX:
- supported = SUPPORTED_10000baseT_Full;
+ ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+ ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 25000baseSR_Full);
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 25000baseSR_Full);
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 10000baseSR_Full);
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 10000baseSR_Full);
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 10000baseLR_Full);
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 10000baseLR_Full);
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 1000baseX_Full);
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 1000baseX_Full);
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 10000baseT_Full);
if (hw_link_info->module_type[2] &
I40E_MODULE_TYPE_1000BASE_SX ||
hw_link_info->module_type[2] &
I40E_MODULE_TYPE_1000BASE_LX) {
- supported |= SUPPORTED_1000baseT_Full;
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 1000baseT_Full);
if (hw_link_info->requested_speeds &
I40E_LINK_SPEED_1GB)
- advertising |= ADVERTISED_1000baseT_Full;
+ ethtool_link_ksettings_add_link_mode(
+ ks, advertising, 1000baseT_Full);
}
if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
- advertising |= ADVERTISED_10000baseT_Full;
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 10000baseT_Full);
break;
case I40E_PHY_TYPE_10GBASE_T:
case I40E_PHY_TYPE_1000BASE_T:
case I40E_PHY_TYPE_100BASE_TX:
- supported = SUPPORTED_Autoneg |
- SUPPORTED_10000baseT_Full |
- SUPPORTED_1000baseT_Full |
- SUPPORTED_100baseT_Full;
- advertising = ADVERTISED_Autoneg;
+ ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 10000baseT_Full);
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 1000baseT_Full);
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 100baseT_Full);
+ ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
- advertising |= ADVERTISED_10000baseT_Full;
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 10000baseT_Full);
if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
- advertising |= ADVERTISED_1000baseT_Full;
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 1000baseT_Full);
if (hw_link_info->requested_speeds & I40E_LINK_SPEED_100MB)
- advertising |= ADVERTISED_100baseT_Full;
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 100baseT_Full);
break;
case I40E_PHY_TYPE_1000BASE_T_OPTICAL:
- supported = SUPPORTED_Autoneg |
- SUPPORTED_1000baseT_Full;
- advertising = ADVERTISED_Autoneg |
- ADVERTISED_1000baseT_Full;
+ ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 1000baseT_Full);
+ ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 1000baseT_Full);
break;
case I40E_PHY_TYPE_10GBASE_CR1_CU:
case I40E_PHY_TYPE_10GBASE_CR1:
- supported = SUPPORTED_Autoneg |
- SUPPORTED_10000baseT_Full;
- advertising = ADVERTISED_Autoneg |
- ADVERTISED_10000baseT_Full;
+ ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 10000baseT_Full);
+ ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 10000baseT_Full);
break;
case I40E_PHY_TYPE_XAUI:
case I40E_PHY_TYPE_XFI:
case I40E_PHY_TYPE_SFI:
case I40E_PHY_TYPE_10GBASE_SFPP_CU:
case I40E_PHY_TYPE_10GBASE_AOC:
- supported = SUPPORTED_10000baseT_Full;
- advertising = SUPPORTED_10000baseT_Full;
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 10000baseT_Full);
+ if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 10000baseT_Full);
break;
case I40E_PHY_TYPE_SGMII:
- supported = SUPPORTED_Autoneg |
- SUPPORTED_1000baseT_Full;
+ ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 1000baseT_Full);
if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
- advertising |= ADVERTISED_1000baseT_Full;
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 1000baseT_Full);
if (pf->hw_features & I40E_HW_100M_SGMII_CAPABLE) {
- supported |= SUPPORTED_100baseT_Full;
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 100baseT_Full);
if (hw_link_info->requested_speeds &
I40E_LINK_SPEED_100MB)
- advertising |= ADVERTISED_100baseT_Full;
+ ethtool_link_ksettings_add_link_mode(
+ ks, advertising, 100baseT_Full);
}
break;
case I40E_PHY_TYPE_40GBASE_KR4:
+ case I40E_PHY_TYPE_25GBASE_KR:
case I40E_PHY_TYPE_20GBASE_KR2:
case I40E_PHY_TYPE_10GBASE_KR:
case I40E_PHY_TYPE_10GBASE_KX4:
case I40E_PHY_TYPE_1000BASE_KX:
- supported |= SUPPORTED_40000baseKR4_Full |
- SUPPORTED_20000baseKR2_Full |
- SUPPORTED_10000baseKR_Full |
- SUPPORTED_10000baseKX4_Full |
- SUPPORTED_1000baseKX_Full |
- SUPPORTED_Autoneg;
- advertising |= ADVERTISED_40000baseKR4_Full |
- ADVERTISED_20000baseKR2_Full |
- ADVERTISED_10000baseKR_Full |
- ADVERTISED_10000baseKX4_Full |
- ADVERTISED_1000baseKX_Full |
- ADVERTISED_Autoneg;
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 40000baseKR4_Full);
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 25000baseKR_Full);
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 20000baseKR2_Full);
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 10000baseKR_Full);
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 10000baseKX4_Full);
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 1000baseKX_Full);
+ ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 40000baseKR4_Full);
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 25000baseKR_Full);
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 20000baseKR2_Full);
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 10000baseKR_Full);
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 10000baseKX4_Full);
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 1000baseKX_Full);
+ ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
break;
- case I40E_PHY_TYPE_25GBASE_KR:
case I40E_PHY_TYPE_25GBASE_CR:
- case I40E_PHY_TYPE_25GBASE_SR:
- case I40E_PHY_TYPE_25GBASE_LR:
- supported = SUPPORTED_Autoneg;
- advertising = ADVERTISED_Autoneg;
- /* TODO: add speeds when ethtool is ready to support*/
+ ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+ ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 25000baseCR_Full);
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 25000baseCR_Full);
+ break;
+ case I40E_PHY_TYPE_25GBASE_AOC:
+ case I40E_PHY_TYPE_25GBASE_ACC:
+ ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+ ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 25000baseCR_Full);
+
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 25000baseCR_Full);
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 10000baseCR_Full);
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 10000baseCR_Full);
break;
default:
/* if we got here and link is up something bad is afoot */
- netdev_info(netdev, "WARNING: Link is up but PHY type 0x%x is not recognized.\n",
+ netdev_info(netdev,
+ "WARNING: Link is up but PHY type 0x%x is not recognized.\n",
hw_link_info->phy_type);
}
/* Now that we've worked out everything that could be supported by the
- * current PHY type, get what is supported by the NVM and them to
- * get what is truly supported
+ * current PHY type, get what is supported by the NVM and intersect
+ * them to get what is truly supported
*/
- i40e_phy_type_to_ethtool(pf, &e_supported,
- &e_advertising);
-
- supported = supported & e_supported;
- advertising = advertising & e_advertising;
+ memset(&cap_ksettings, 0, sizeof(struct ethtool_link_ksettings));
+ i40e_phy_type_to_ethtool(pf, &cap_ksettings);
+ ethtool_intersect_link_masks(ks, &cap_ksettings);
/* Set speed and duplex */
switch (link_speed) {
case I40E_LINK_SPEED_40GB:
- cmd->base.speed = SPEED_40000;
+ ks->base.speed = SPEED_40000;
break;
case I40E_LINK_SPEED_25GB:
- #ifdef SPEED_25000
- cmd->base.speed = SPEED_25000;
- #else
- netdev_info(netdev,
- "Speed is 25G, display not supported by this version of ethtool.\n");
- #endif
+ ks->base.speed = SPEED_25000;
break;
case I40E_LINK_SPEED_20GB:
- cmd->base.speed = SPEED_20000;
+ ks->base.speed = SPEED_20000;
break;
case I40E_LINK_SPEED_10GB:
- cmd->base.speed = SPEED_10000;
+ ks->base.speed = SPEED_10000;
break;
case I40E_LINK_SPEED_1GB:
- cmd->base.speed = SPEED_1000;
+ ks->base.speed = SPEED_1000;
break;
case I40E_LINK_SPEED_100MB:
- cmd->base.speed = SPEED_100;
+ ks->base.speed = SPEED_100;
break;
default:
break;
}
- cmd->base.duplex = DUPLEX_FULL;
-
- ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
- supported);
- ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
- advertising);
+ ks->base.duplex = DUPLEX_FULL;
}
/**
* i40e_get_settings_link_down - Get the Link settings for when link is down
* @hw: hw structure
- * @ecmd: ethtool command to fill in
+ * @ks: ethtool ksettings to fill in
+ * @pf: pointer to physical function struct
*
* Reports link settings that can be determined when link is down
**/
static void i40e_get_settings_link_down(struct i40e_hw *hw,
- struct ethtool_link_ksettings *cmd,
+ struct ethtool_link_ksettings *ks,
struct i40e_pf *pf)
{
- u32 supported, advertising;
-
/* link is down and the driver needs to fall back on
* supported phy types to figure out what info to display
*/
- i40e_phy_type_to_ethtool(pf, &supported, &advertising);
-
- ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
- supported);
- ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
- advertising);
+ i40e_phy_type_to_ethtool(pf, ks);
/* With no link speed and duplex are unknown */
- cmd->base.speed = SPEED_UNKNOWN;
- cmd->base.duplex = DUPLEX_UNKNOWN;
+ ks->base.speed = SPEED_UNKNOWN;
+ ks->base.duplex = DUPLEX_UNKNOWN;
}
/**
- * i40e_get_settings - Get Link Speed and Duplex settings
+ * i40e_get_link_ksettings - Get Link Speed and Duplex settings
* @netdev: network interface device structure
- * @ecmd: ethtool command
+ * @ks: ethtool ksettings
*
* Reports speed/duplex settings based on media_type
**/
static int i40e_get_link_ksettings(struct net_device *netdev,
- struct ethtool_link_ksettings *cmd)
+ struct ethtool_link_ksettings *ks)
{
struct i40e_netdev_priv *np = netdev_priv(netdev);
struct i40e_pf *pf = np->vsi->back;
struct i40e_hw *hw = &pf->hw;
struct i40e_link_status *hw_link_info = &hw->phy.link_info;
bool link_up = hw_link_info->link_info & I40E_AQ_LINK_UP;
- u32 advertising;
+
+ ethtool_link_ksettings_zero_link_mode(ks, supported);
+ ethtool_link_ksettings_zero_link_mode(ks, advertising);
if (link_up)
- i40e_get_settings_link_up(hw, cmd, netdev, pf);
+ i40e_get_settings_link_up(hw, ks, netdev, pf);
else
- i40e_get_settings_link_down(hw, cmd, pf);
+ i40e_get_settings_link_down(hw, ks, pf);
/* Now set the settings that don't rely on link being up/down */
/* Set autoneg settings */
- cmd->base.autoneg = ((hw_link_info->an_info & I40E_AQ_AN_COMPLETED) ?
- AUTONEG_ENABLE : AUTONEG_DISABLE);
+ ks->base.autoneg = ((hw_link_info->an_info & I40E_AQ_AN_COMPLETED) ?
+ AUTONEG_ENABLE : AUTONEG_DISABLE);
+ /* Set media type settings */
switch (hw->phy.media_type) {
case I40E_MEDIA_TYPE_BACKPLANE:
- ethtool_link_ksettings_add_link_mode(cmd, supported,
- Autoneg);
- ethtool_link_ksettings_add_link_mode(cmd, supported,
- Backplane);
- ethtool_link_ksettings_add_link_mode(cmd, advertising,
- Autoneg);
- ethtool_link_ksettings_add_link_mode(cmd, advertising,
+ ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+ ethtool_link_ksettings_add_link_mode(ks, supported, Backplane);
+ ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
Backplane);
- cmd->base.port = PORT_NONE;
+ ks->base.port = PORT_NONE;
break;
case I40E_MEDIA_TYPE_BASET:
- ethtool_link_ksettings_add_link_mode(cmd, supported, TP);
- ethtool_link_ksettings_add_link_mode(cmd, advertising, TP);
- cmd->base.port = PORT_TP;
+ ethtool_link_ksettings_add_link_mode(ks, supported, TP);
+ ethtool_link_ksettings_add_link_mode(ks, advertising, TP);
+ ks->base.port = PORT_TP;
break;
case I40E_MEDIA_TYPE_DA:
case I40E_MEDIA_TYPE_CX4:
- ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE);
- ethtool_link_ksettings_add_link_mode(cmd, advertising, FIBRE);
- cmd->base.port = PORT_DA;
+ ethtool_link_ksettings_add_link_mode(ks, supported, FIBRE);
+ ethtool_link_ksettings_add_link_mode(ks, advertising, FIBRE);
+ ks->base.port = PORT_DA;
break;
case I40E_MEDIA_TYPE_FIBER:
- ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE);
- cmd->base.port = PORT_FIBRE;
+ ethtool_link_ksettings_add_link_mode(ks, supported, FIBRE);
+ ks->base.port = PORT_FIBRE;
break;
case I40E_MEDIA_TYPE_UNKNOWN:
default:
- cmd->base.port = PORT_OTHER;
+ ks->base.port = PORT_OTHER;
break;
}
/* Set flow control settings */
- ethtool_link_ksettings_add_link_mode(cmd, supported, Pause);
+ ethtool_link_ksettings_add_link_mode(ks, supported, Pause);
switch (hw->fc.requested_mode) {
case I40E_FC_FULL:
- ethtool_link_ksettings_add_link_mode(cmd, advertising,
- Pause);
+ ethtool_link_ksettings_add_link_mode(ks, advertising, Pause);
break;
case I40E_FC_TX_PAUSE:
- ethtool_link_ksettings_add_link_mode(cmd, advertising,
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
Asym_Pause);
break;
case I40E_FC_RX_PAUSE:
- ethtool_link_ksettings_add_link_mode(cmd, advertising,
- Pause);
- ethtool_link_ksettings_add_link_mode(cmd, advertising,
+ ethtool_link_ksettings_add_link_mode(ks, advertising, Pause);
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
Asym_Pause);
break;
default:
- ethtool_convert_link_mode_to_legacy_u32(
- &advertising, cmd->link_modes.advertising);
-
- advertising &= ~(ADVERTISED_Pause | ADVERTISED_Asym_Pause);
-
- ethtool_convert_legacy_u32_to_link_mode(
- cmd->link_modes.advertising, advertising);
+ ethtool_link_ksettings_del_link_mode(ks, advertising, Pause);
+ ethtool_link_ksettings_del_link_mode(ks, advertising,
+ Asym_Pause);
break;
}
}
/**
- * i40e_set_settings - Set Speed and Duplex
+ * i40e_set_link_ksettings - Set Speed and Duplex
* @netdev: network interface device structure
- * @ecmd: ethtool command
+ * @ks: ethtool ksettings
*
* Set speed/duplex per media_types advertised/forced
**/
static int i40e_set_link_ksettings(struct net_device *netdev,
- const struct ethtool_link_ksettings *cmd)
+ const struct ethtool_link_ksettings *ks)
{
struct i40e_netdev_priv *np = netdev_priv(netdev);
struct i40e_aq_get_phy_abilities_resp abilities;
+ struct ethtool_link_ksettings safe_ks;
+ struct ethtool_link_ksettings copy_ks;
struct i40e_aq_set_phy_config config;
struct i40e_pf *pf = np->vsi->back;
struct i40e_vsi *vsi = np->vsi;
struct i40e_hw *hw = &pf->hw;
- struct ethtool_link_ksettings safe_cmd;
- struct ethtool_link_ksettings copy_cmd;
+ bool autoneg_changed = false;
i40e_status status = 0;
- bool change = false;
int timeout = 50;
int err = 0;
- u32 autoneg;
- u32 advertise;
- u32 tmp;
+ u8 autoneg;
/* Changing port settings is not supported if this isn't the
* port's controlling PF
i40e_partition_setting_complaint(pf);
return -EOPNOTSUPP;
}
-
if (vsi != pf->vsi[pf->lan_vsi])
return -EOPNOTSUPP;
-
if (hw->phy.media_type != I40E_MEDIA_TYPE_BASET &&
hw->phy.media_type != I40E_MEDIA_TYPE_FIBER &&
hw->phy.media_type != I40E_MEDIA_TYPE_BACKPLANE &&
hw->phy.media_type != I40E_MEDIA_TYPE_DA &&
hw->phy.link_info.link_info & I40E_AQ_LINK_UP)
return -EOPNOTSUPP;
-
if (hw->device_id == I40E_DEV_ID_KX_B ||
hw->device_id == I40E_DEV_ID_KX_C ||
hw->device_id == I40E_DEV_ID_20G_KR2 ||
return -EOPNOTSUPP;
}
- /* copy the cmd to copy_cmd to avoid modifying the origin */
- memcpy(©_cmd, cmd, sizeof(struct ethtool_link_ksettings));
+ /* copy the ksettings to copy_ks to avoid modifying the origin */
+ memcpy(©_ks, ks, sizeof(struct ethtool_link_ksettings));
- /* get our own copy of the bits to check against */
- memset(&safe_cmd, 0, sizeof(struct ethtool_link_ksettings));
- i40e_get_link_ksettings(netdev, &safe_cmd);
+ /* save autoneg out of ksettings */
+ autoneg = copy_ks.base.autoneg;
- /* save autoneg and speed out of cmd */
- autoneg = cmd->base.autoneg;
- ethtool_convert_link_mode_to_legacy_u32(&advertise,
- cmd->link_modes.advertising);
+ memset(&safe_ks, 0, sizeof(safe_ks));
+ /* Get link modes supported by hardware and check against modes
+ * requested by the user. Return an error if unsupported mode was set.
+ */
+ i40e_phy_type_to_ethtool(pf, &safe_ks);
+ if (!bitmap_subset(copy_ks.link_modes.advertising,
+ safe_ks.link_modes.supported,
+ __ETHTOOL_LINK_MODE_MASK_NBITS))
+ return -EINVAL;
- /* set autoneg and speed back to what they currently are */
- copy_cmd.base.autoneg = safe_cmd.base.autoneg;
- ethtool_convert_link_mode_to_legacy_u32(
- &tmp, safe_cmd.link_modes.advertising);
- ethtool_convert_legacy_u32_to_link_mode(
- copy_cmd.link_modes.advertising, tmp);
+ /* get our own copy of the bits to check against */
+ memset(&safe_ks, 0, sizeof(struct ethtool_link_ksettings));
+ safe_ks.base.cmd = copy_ks.base.cmd;
+ safe_ks.base.link_mode_masks_nwords =
+ copy_ks.base.link_mode_masks_nwords;
+ i40e_get_link_ksettings(netdev, &safe_ks);
- copy_cmd.base.cmd = safe_cmd.base.cmd;
+ /* set autoneg back to what it currently is */
+ copy_ks.base.autoneg = safe_ks.base.autoneg;
- /* If copy_cmd and safe_cmd are not the same now, then they are
- * trying to set something that we do not support
+ /* If copy_ks.base and safe_ks.base are not the same now, then they are
+ * trying to set something that we do not support.
*/
- if (memcmp(©_cmd, &safe_cmd, sizeof(struct ethtool_link_ksettings)))
+ if (memcmp(©_ks.base, &safe_ks.base,
+ sizeof(struct ethtool_link_settings)))
return -EOPNOTSUPP;
while (test_and_set_bit(__I40E_CONFIG_BUSY, pf->state)) {
/* If autoneg was not already enabled */
if (!(hw->phy.link_info.an_info & I40E_AQ_AN_COMPLETED)) {
/* If autoneg is not supported, return error */
- if (!ethtool_link_ksettings_test_link_mode(
- &safe_cmd, supported, Autoneg)) {
+ if (!ethtool_link_ksettings_test_link_mode(&safe_ks,
+ supported,
+ Autoneg)) {
netdev_info(netdev, "Autoneg not supported on this phy\n");
err = -EINVAL;
goto done;
/* Autoneg is allowed to change */
config.abilities = abilities.abilities |
I40E_AQ_PHY_ENABLE_AN;
- change = true;
+ autoneg_changed = true;
}
} else {
/* If autoneg is currently enabled */
/* If autoneg is supported 10GBASE_T is the only PHY
* that can disable it, so otherwise return error
*/
- if (ethtool_link_ksettings_test_link_mode(
- &safe_cmd, supported, Autoneg) &&
+ if (ethtool_link_ksettings_test_link_mode(&safe_ks,
+ supported,
+ Autoneg) &&
hw->phy.link_info.phy_type !=
I40E_PHY_TYPE_10GBASE_T) {
netdev_info(netdev, "Autoneg cannot be disabled on this phy\n");
/* Autoneg is allowed to change */
config.abilities = abilities.abilities &
~I40E_AQ_PHY_ENABLE_AN;
- change = true;
+ autoneg_changed = true;
}
}
- ethtool_convert_link_mode_to_legacy_u32(&tmp,
- safe_cmd.link_modes.supported);
- if (advertise & ~tmp) {
- err = -EINVAL;
- goto done;
- }
-
- if (advertise & ADVERTISED_100baseT_Full)
+ if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 100baseT_Full))
config.link_speed |= I40E_LINK_SPEED_100MB;
- if (advertise & ADVERTISED_1000baseT_Full ||
- advertise & ADVERTISED_1000baseKX_Full)
+ if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 1000baseT_Full) ||
+ ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 1000baseX_Full) ||
+ ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 1000baseKX_Full))
config.link_speed |= I40E_LINK_SPEED_1GB;
- if (advertise & ADVERTISED_10000baseT_Full ||
- advertise & ADVERTISED_10000baseKX4_Full ||
- advertise & ADVERTISED_10000baseKR_Full)
+ if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 10000baseT_Full) ||
+ ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 10000baseKX4_Full) ||
+ ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 10000baseKR_Full) ||
+ ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 10000baseCR_Full) ||
+ ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 10000baseSR_Full))
config.link_speed |= I40E_LINK_SPEED_10GB;
- if (advertise & ADVERTISED_20000baseKR2_Full)
+ if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 20000baseKR2_Full))
config.link_speed |= I40E_LINK_SPEED_20GB;
- if (advertise & ADVERTISED_40000baseKR4_Full ||
- advertise & ADVERTISED_40000baseCR4_Full ||
- advertise & ADVERTISED_40000baseSR4_Full ||
- advertise & ADVERTISED_40000baseLR4_Full)
+ if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 25000baseCR_Full) ||
+ ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 25000baseKR_Full) ||
+ ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 25000baseSR_Full))
+ config.link_speed |= I40E_LINK_SPEED_25GB;
+ if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 40000baseKR4_Full) ||
+ ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 40000baseCR4_Full) ||
+ ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 40000baseSR4_Full) ||
+ ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 40000baseLR4_Full))
config.link_speed |= I40E_LINK_SPEED_40GB;
/* If speed didn't get set, set it to what it currently is.
*/
if (!config.link_speed)
config.link_speed = abilities.link_speed;
-
- if (change || (abilities.link_speed != config.link_speed)) {
+ if (autoneg_changed || abilities.link_speed != config.link_speed) {
/* copy over the rest of the abilities */
config.phy_type = abilities.phy_type;
config.phy_type_ext = abilities.phy_type_ext;
/* make the aq call */
status = i40e_aq_set_phy_config(hw, &config, NULL);
if (status) {
- netdev_info(netdev, "Set phy config failed, err %s aq_err %s\n",
+ netdev_info(netdev,
+ "Set phy config failed, err %s aq_err %s\n",
i40e_stat_str(hw, status),
i40e_aq_str(hw, hw->aq.asq_last_status));
err = -EAGAIN;
status = i40e_update_link_info(hw);
if (status)
- netdev_dbg(netdev, "Updating link info failed with err %s aq_err %s\n",
+ netdev_dbg(netdev,
+ "Updating link info failed with err %s aq_err %s\n",
i40e_stat_str(hw, status),
i40e_aq_str(hw, hw->aq.asq_last_status));
}
rcu_read_lock();
for (j = 0; j < vsi->num_queue_pairs; j++) {
- tx_ring = ACCESS_ONCE(vsi->tx_rings[j]);
+ tx_ring = READ_ONCE(vsi->tx_rings[j]);
if (!tx_ring)
continue;
if (!(pf->hw_features & I40E_HW_PHY_CONTROLS_LEDS)) {
pf->led_status = i40e_led_get(hw);
} else {
- i40e_aq_set_phy_debug(hw, I40E_PHY_DEBUG_ALL, NULL);
+ if (!(hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE))
+ i40e_aq_set_phy_debug(hw, I40E_PHY_DEBUG_ALL,
+ NULL);
ret = i40e_led_get_phy(hw, &temp_status,
&pf->phy_led_val);
pf->led_status = temp_status;
ret = i40e_led_set_phy(hw, false, pf->led_status,
(pf->phy_led_val |
I40E_PHY_LED_MODE_ORIG));
- i40e_aq_set_phy_debug(hw, 0, NULL);
+ if (!(hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE))
+ i40e_aq_set_phy_debug(hw, 0, NULL);
}
break;
default:
ec->tx_max_coalesced_frames_irq = vsi->work_limit;
ec->rx_max_coalesced_frames_irq = vsi->work_limit;
- /* rx and tx usecs has per queue value. If user doesn't specify the queue,
- * return queue 0's value to represent.
+ /* rx and tx usecs has per queue value. If user doesn't specify the
+ * queue, return queue 0's value to represent.
*/
- if (queue < 0) {
+ if (queue < 0)
queue = 0;
- } else if (queue >= vsi->num_queue_pairs) {
+ else if (queue >= vsi->num_queue_pairs)
return -EINVAL;
- }
rx_ring = vsi->rx_rings[queue];
tx_ring = vsi->tx_rings[queue];
ec->rx_coalesce_usecs = rx_ring->rx_itr_setting & ~I40E_ITR_DYNAMIC;
ec->tx_coalesce_usecs = tx_ring->tx_itr_setting & ~I40E_ITR_DYNAMIC;
-
/* we use the _usecs_high to store/set the interrupt rate limit
* that the hardware supports, that almost but not quite
* fits the original intent of the ethtool variable,
*
* Change the ITR settings for a specific queue.
**/
-
static void i40e_set_itr_per_queue(struct i40e_vsi *vsi,
struct ethtool_coalesce *ec,
int queue)
vsi->int_rate_limit);
}
- /* rx and tx usecs has per queue value. If user doesn't specify the queue,
- * apply to all queues.
+ /* rx and tx usecs has per queue value. If user doesn't specify the
+ * queue, apply to all queues.
*/
if (queue < 0) {
for (i = 0; i < vsi->num_queue_pairs; i++)
switch (cmd->cmd) {
case ETHTOOL_GRXRINGS:
- cmd->data = vsi->num_queue_pairs;
+ cmd->data = vsi->rss_size;
ret = 0;
break;
case ETHTOOL_GRXFH:
if (vsi->type != I40E_VSI_MAIN)
return -EINVAL;
+ /* We do not support setting channels via ethtool when TCs are
+ * configured through mqprio
+ */
+ if (pf->flags & I40E_FLAG_TC_MQPRIO)
+ return -EINVAL;
+
/* verify they are not requesting separate vectors */
if (!count || ch->rx_count || ch->tx_count)
return -EINVAL;
return I40E_HLUT_ARRAY_SIZE;
}
+ /**
+ * i40e_get_rxfh - get the rx flow hash indirection table
+ * @netdev: network interface device structure
+ * @indir: indirection table
+ * @key: hash key
+ * @hfunc: hash function
+ *
+ * Reads the indirection table directly from the hardware. Returns 0 on
+ * success.
+ **/
static int i40e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
u8 *hfunc)
{
struct i40e_netdev_priv *np = netdev_priv(dev);
struct i40e_vsi *vsi = np->vsi;
struct i40e_pf *pf = vsi->back;
- u64 orig_flags, new_flags, changed_flags;
+ u32 orig_flags, new_flags, changed_flags;
u32 i, j;
orig_flags = READ_ONCE(pf->flags);
return -EOPNOTSUPP;
/* Compare and exchange the new flags into place. If we failed, that
- * is if cmpxchg64 returns anything but the old value, this means that
+ * is if cmpxchg returns anything but the old value, this means that
* something else has modified the flags variable since we copied it
* originally. We'll just punt with an error and log something in the
* message buffer.
*/
- if (cmpxchg64(&pf->flags, orig_flags, new_flags) != orig_flags) {
+ if (cmpxchg(&pf->flags, orig_flags, new_flags) != orig_flags) {
dev_warn(&pf->pdev->dev,
"Unable to update pf->flags as it was modified by another thread...\n");
return -EAGAIN;
sw_flags = I40E_AQ_SET_SWITCH_CFG_PROMISC;
valid_flags = I40E_AQ_SET_SWITCH_CFG_PROMISC;
ret = i40e_aq_set_switch_config(&pf->hw, sw_flags, valid_flags,
- NULL);
+ 0, NULL);
if (ret && pf->hw.aq.asq_last_status != I40E_AQ_RC_ESRCH) {
dev_info(&pf->pdev->dev,
"couldn't set switch config bits, err %s aq_err %s\n",
/* Issue reset to cause things to take effect, as additional bits
* are added we will need to create a mask of bits requiring reset
*/
- if ((changed_flags & I40E_FLAG_VEB_STATS_ENABLED) ||
- ((changed_flags & I40E_FLAG_LEGACY_RX) && netif_running(dev)))
+ if (changed_flags & (I40E_FLAG_VEB_STATS_ENABLED |
+ I40E_FLAG_LEGACY_RX |
+ I40E_FLAG_SOURCE_PRUNING_DISABLED))
i40e_do_reset(pf, BIT(__I40E_PF_RESET_REQUESTED), true);
return 0;
}
+ /**
+ * i40e_get_module_info - get (Q)SFP+ module type info
+ * @netdev: network interface device structure
+ * @modinfo: module EEPROM size and layout information structure
+ **/
+ static int i40e_get_module_info(struct net_device *netdev,
+ struct ethtool_modinfo *modinfo)
+ {
+ struct i40e_netdev_priv *np = netdev_priv(netdev);
+ struct i40e_vsi *vsi = np->vsi;
+ struct i40e_pf *pf = vsi->back;
+ struct i40e_hw *hw = &pf->hw;
+ u32 sff8472_comp = 0;
+ u32 sff8472_swap = 0;
+ u32 sff8636_rev = 0;
+ i40e_status status;
+ u32 type = 0;
+
+ /* Check if firmware supports reading module EEPROM. */
+ if (!(hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE)) {
+ netdev_err(vsi->netdev, "Module EEPROM memory read not supported. Please update the NVM image.\n");
+ return -EINVAL;
+ }
+
+ status = i40e_update_link_info(hw);
+ if (status)
+ return -EIO;
+
+ if (hw->phy.link_info.phy_type == I40E_PHY_TYPE_EMPTY) {
+ netdev_err(vsi->netdev, "Cannot read module EEPROM memory. No module connected.\n");
+ return -EINVAL;
+ }
+
+ type = hw->phy.link_info.module_type[0];
+
+ switch (type) {
+ case I40E_MODULE_TYPE_SFP:
+ status = i40e_aq_get_phy_register(hw,
+ I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE,
+ I40E_I2C_EEPROM_DEV_ADDR,
+ I40E_MODULE_SFF_8472_COMP,
+ &sff8472_comp, NULL);
+ if (status)
+ return -EIO;
+
+ status = i40e_aq_get_phy_register(hw,
+ I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE,
+ I40E_I2C_EEPROM_DEV_ADDR,
+ I40E_MODULE_SFF_8472_SWAP,
+ &sff8472_swap, NULL);
+ if (status)
+ return -EIO;
+
+ /* Check if the module requires address swap to access
+ * the other EEPROM memory page.
+ */
+ if (sff8472_swap & I40E_MODULE_SFF_ADDR_MODE) {
+ netdev_warn(vsi->netdev, "Module address swap to access page 0xA2 is not supported.\n");
+ modinfo->type = ETH_MODULE_SFF_8079;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
+ } else if (sff8472_comp == 0x00) {
+ /* Module is not SFF-8472 compliant */
+ modinfo->type = ETH_MODULE_SFF_8079;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
+ } else {
+ modinfo->type = ETH_MODULE_SFF_8472;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+ }
+ break;
+ case I40E_MODULE_TYPE_QSFP_PLUS:
+ /* Read from memory page 0. */
+ status = i40e_aq_get_phy_register(hw,
+ I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE,
+ 0,
+ I40E_MODULE_REVISION_ADDR,
+ &sff8636_rev, NULL);
+ if (status)
+ return -EIO;
+ /* Determine revision compliance byte */
+ if (sff8636_rev > 0x02) {
+ /* Module is SFF-8636 compliant */
+ modinfo->type = ETH_MODULE_SFF_8636;
+ modinfo->eeprom_len = I40E_MODULE_QSFP_MAX_LEN;
+ } else {
+ modinfo->type = ETH_MODULE_SFF_8436;
+ modinfo->eeprom_len = I40E_MODULE_QSFP_MAX_LEN;
+ }
+ break;
+ case I40E_MODULE_TYPE_QSFP28:
+ modinfo->type = ETH_MODULE_SFF_8636;
+ modinfo->eeprom_len = I40E_MODULE_QSFP_MAX_LEN;
+ break;
+ default:
+ netdev_err(vsi->netdev, "Module type unrecognized\n");
+ return -EINVAL;
+ }
+ return 0;
+ }
+
+ /**
+ * i40e_get_module_eeprom - fills buffer with (Q)SFP+ module memory contents
+ * @netdev: network interface device structure
+ * @ee: EEPROM dump request structure
+ * @data: buffer to be filled with EEPROM contents
+ **/
+ static int i40e_get_module_eeprom(struct net_device *netdev,
+ struct ethtool_eeprom *ee,
+ u8 *data)
+ {
+ struct i40e_netdev_priv *np = netdev_priv(netdev);
+ struct i40e_vsi *vsi = np->vsi;
+ struct i40e_pf *pf = vsi->back;
+ struct i40e_hw *hw = &pf->hw;
+ bool is_sfp = false;
+ i40e_status status;
+ u32 value = 0;
+ int i;
+
+ if (!ee || !ee->len || !data)
+ return -EINVAL;
+
+ if (hw->phy.link_info.module_type[0] == I40E_MODULE_TYPE_SFP)
+ is_sfp = true;
+
+ for (i = 0; i < ee->len; i++) {
+ u32 offset = i + ee->offset;
+ u32 addr = is_sfp ? I40E_I2C_EEPROM_DEV_ADDR : 0;
+
+ /* Check if we need to access the other memory page */
+ if (is_sfp) {
+ if (offset >= ETH_MODULE_SFF_8079_LEN) {
+ offset -= ETH_MODULE_SFF_8079_LEN;
+ addr = I40E_I2C_EEPROM_DEV_ADDR2;
+ }
+ } else {
+ while (offset >= ETH_MODULE_SFF_8436_LEN) {
+ /* Compute memory page number and offset. */
+ offset -= ETH_MODULE_SFF_8436_LEN / 2;
+ addr++;
+ }
+ }
+
+ status = i40e_aq_get_phy_register(hw,
+ I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE,
+ addr, offset, &value, NULL);
+ if (status)
+ return -EIO;
+ data[i] = value;
+ }
+ return 0;
+ }
+
static const struct ethtool_ops i40e_ethtool_ops = {
.get_drvinfo = i40e_get_drvinfo,
.get_regs_len = i40e_get_regs_len,
.set_rxfh = i40e_set_rxfh,
.get_channels = i40e_get_channels,
.set_channels = i40e_set_channels,
+ .get_module_info = i40e_get_module_info,
+ .get_module_eeprom = i40e_get_module_eeprom,
.get_ts_info = i40e_get_ts_info,
.get_priv_flags = i40e_get_priv_flags,
.set_priv_flags = i40e_set_priv_flags,
static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired);
static void i40e_fdir_sb_setup(struct i40e_pf *pf);
static int i40e_veb_get_bw_info(struct i40e_veb *veb);
+ static int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,
+ struct i40e_cloud_filter *filter,
+ bool add);
+ static int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
+ struct i40e_cloud_filter *filter,
+ bool add);
+ static int i40e_get_capabilities(struct i40e_pf *pf,
+ enum i40e_admin_queue_opc list_type);
+
/* i40e_pci_tbl - PCI Device ID Table
*
u64 bytes, packets;
unsigned int start;
- tx_ring = ACCESS_ONCE(vsi->tx_rings[i]);
+ tx_ring = READ_ONCE(vsi->tx_rings[i]);
if (!tx_ring)
continue;
i40e_get_netdev_stats_struct_tx(tx_ring, stats);
*stat = (u32)((new_data + BIT_ULL(32)) - *offset);
}
+ /**
+ * i40e_stat_update_and_clear32 - read and clear hw reg, update a 32 bit stat
+ * @hw: ptr to the hardware info
+ * @reg: the hw reg to read and clear
+ * @stat: ptr to the stat
+ **/
+ static void i40e_stat_update_and_clear32(struct i40e_hw *hw, u32 reg, u64 *stat)
+ {
+ u32 new_data = rd32(hw, reg);
+
+ wr32(hw, reg, 1); /* must write a nonzero value to clear register */
+ *stat += new_data;
+ }
+
/**
* i40e_update_eth_stats - Update VSI-specific ethernet statistics counters.
* @vsi: the VSI to be updated
rcu_read_lock();
for (q = 0; q < vsi->num_queue_pairs; q++) {
/* locate Tx ring */
- p = ACCESS_ONCE(vsi->tx_rings[q]);
+ p = READ_ONCE(vsi->tx_rings[q]);
do {
start = u64_stats_fetch_begin_irq(&p->syncp);
&osd->rx_jabber, &nsd->rx_jabber);
/* FDIR stats */
- i40e_stat_update32(hw,
- I40E_GLQF_PCNT(I40E_FD_ATR_STAT_IDX(pf->hw.pf_id)),
- pf->stat_offsets_loaded,
- &osd->fd_atr_match, &nsd->fd_atr_match);
- i40e_stat_update32(hw,
- I40E_GLQF_PCNT(I40E_FD_SB_STAT_IDX(pf->hw.pf_id)),
- pf->stat_offsets_loaded,
- &osd->fd_sb_match, &nsd->fd_sb_match);
- i40e_stat_update32(hw,
- I40E_GLQF_PCNT(I40E_FD_ATR_TUNNEL_STAT_IDX(pf->hw.pf_id)),
- pf->stat_offsets_loaded,
- &osd->fd_atr_tunnel_match, &nsd->fd_atr_tunnel_match);
+ i40e_stat_update_and_clear32(hw,
+ I40E_GLQF_PCNT(I40E_FD_ATR_STAT_IDX(hw->pf_id)),
+ &nsd->fd_atr_match);
+ i40e_stat_update_and_clear32(hw,
+ I40E_GLQF_PCNT(I40E_FD_SB_STAT_IDX(hw->pf_id)),
+ &nsd->fd_sb_match);
+ i40e_stat_update_and_clear32(hw,
+ I40E_GLQF_PCNT(I40E_FD_ATR_TUNNEL_STAT_IDX(hw->pf_id)),
+ &nsd->fd_atr_tunnel_match);
val = rd32(hw, I40E_PRTPM_EEE_STAT);
nsd->tx_lpi_status =
return 0;
}
+ /**
+ * i40e_config_rss_aq - Prepare for RSS using AQ commands
+ * @vsi: vsi structure
+ * @seed: RSS hash seed
+ **/
+ static int i40e_config_rss_aq(struct i40e_vsi *vsi, const u8 *seed,
+ u8 *lut, u16 lut_size)
+ {
+ struct i40e_pf *pf = vsi->back;
+ struct i40e_hw *hw = &pf->hw;
+ int ret = 0;
+
+ if (seed) {
+ struct i40e_aqc_get_set_rss_key_data *seed_dw =
+ (struct i40e_aqc_get_set_rss_key_data *)seed;
+ ret = i40e_aq_set_rss_key(hw, vsi->id, seed_dw);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+ "Cannot set RSS key, err %s aq_err %s\n",
+ i40e_stat_str(hw, ret),
+ i40e_aq_str(hw, hw->aq.asq_last_status));
+ return ret;
+ }
+ }
+ if (lut) {
+ bool pf_lut = vsi->type == I40E_VSI_MAIN ? true : false;
+
+ ret = i40e_aq_set_rss_lut(hw, vsi->id, pf_lut, lut, lut_size);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+ "Cannot set RSS lut, err %s aq_err %s\n",
+ i40e_stat_str(hw, ret),
+ i40e_aq_str(hw, hw->aq.asq_last_status));
+ return ret;
+ }
+ }
+ return ret;
+ }
+
+ /**
+ * i40e_vsi_config_rss - Prepare for VSI(VMDq) RSS if used
+ * @vsi: VSI structure
+ **/
+ static int i40e_vsi_config_rss(struct i40e_vsi *vsi)
+ {
+ struct i40e_pf *pf = vsi->back;
+ u8 seed[I40E_HKEY_ARRAY_SIZE];
+ u8 *lut;
+ int ret;
+
+ if (!(pf->hw_features & I40E_HW_RSS_AQ_CAPABLE))
+ return 0;
+ if (!vsi->rss_size)
+ vsi->rss_size = min_t(int, pf->alloc_rss_size,
+ vsi->num_queue_pairs);
+ if (!vsi->rss_size)
+ return -EINVAL;
+ lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
+ if (!lut)
+ return -ENOMEM;
+
+ /* Use the user configured hash keys and lookup table if there is one,
+ * otherwise use default
+ */
+ if (vsi->rss_lut_user)
+ memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size);
+ else
+ i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size);
+ if (vsi->rss_hkey_user)
+ memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE);
+ else
+ netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE);
+ ret = i40e_config_rss_aq(vsi, seed, lut, vsi->rss_table_size);
+ kfree(lut);
+ return ret;
+ }
+
+ /**
+ * i40e_vsi_setup_queue_map_mqprio - Prepares mqprio based tc_config
+ * @vsi: the VSI being configured,
+ * @ctxt: VSI context structure
+ * @enabled_tc: number of traffic classes to enable
+ *
+ * Prepares VSI tc_config to have queue configurations based on MQPRIO options.
+ **/
+ static int i40e_vsi_setup_queue_map_mqprio(struct i40e_vsi *vsi,
+ struct i40e_vsi_context *ctxt,
+ u8 enabled_tc)
+ {
+ u16 qcount = 0, max_qcount, qmap, sections = 0;
+ int i, override_q, pow, num_qps, ret;
+ u8 netdev_tc = 0, offset = 0;
+
+ if (vsi->type != I40E_VSI_MAIN)
+ return -EINVAL;
+ sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID;
+ sections |= I40E_AQ_VSI_PROP_SCHED_VALID;
+ vsi->tc_config.numtc = vsi->mqprio_qopt.qopt.num_tc;
+ vsi->tc_config.enabled_tc = enabled_tc ? enabled_tc : 1;
+ num_qps = vsi->mqprio_qopt.qopt.count[0];
+
+ /* find the next higher power-of-2 of num queue pairs */
+ pow = ilog2(num_qps);
+ if (!is_power_of_2(num_qps))
+ pow++;
+ qmap = (offset << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) |
+ (pow << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT);
+
+ /* Setup queue offset/count for all TCs for given VSI */
+ max_qcount = vsi->mqprio_qopt.qopt.count[0];
+ for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+ /* See if the given TC is enabled for the given VSI */
+ if (vsi->tc_config.enabled_tc & BIT(i)) {
+ offset = vsi->mqprio_qopt.qopt.offset[i];
+ qcount = vsi->mqprio_qopt.qopt.count[i];
+ if (qcount > max_qcount)
+ max_qcount = qcount;
+ vsi->tc_config.tc_info[i].qoffset = offset;
+ vsi->tc_config.tc_info[i].qcount = qcount;
+ vsi->tc_config.tc_info[i].netdev_tc = netdev_tc++;
+ } else {
+ /* TC is not enabled so set the offset to
+ * default queue and allocate one queue
+ * for the given TC.
+ */
+ vsi->tc_config.tc_info[i].qoffset = 0;
+ vsi->tc_config.tc_info[i].qcount = 1;
+ vsi->tc_config.tc_info[i].netdev_tc = 0;
+ }
+ }
+
+ /* Set actual Tx/Rx queue pairs */
+ vsi->num_queue_pairs = offset + qcount;
+
+ /* Setup queue TC[0].qmap for given VSI context */
+ ctxt->info.tc_mapping[0] = cpu_to_le16(qmap);
+ ctxt->info.mapping_flags |= cpu_to_le16(I40E_AQ_VSI_QUE_MAP_CONTIG);
+ ctxt->info.queue_mapping[0] = cpu_to_le16(vsi->base_queue);
+ ctxt->info.valid_sections |= cpu_to_le16(sections);
+
+ /* Reconfigure RSS for main VSI with max queue count */
+ vsi->rss_size = max_qcount;
+ ret = i40e_vsi_config_rss(vsi);
+ if (ret) {
+ dev_info(&vsi->back->pdev->dev,
+ "Failed to reconfig rss for num_queues (%u)\n",
+ max_qcount);
+ return ret;
+ }
+ vsi->reconfig_rss = true;
+ dev_dbg(&vsi->back->pdev->dev,
+ "Reconfigured rss with num_queues (%u)\n", max_qcount);
+
+ /* Find queue count available for channel VSIs and starting offset
+ * for channel VSIs
+ */
+ override_q = vsi->mqprio_qopt.qopt.count[0];
+ if (override_q && override_q < vsi->num_queue_pairs) {
+ vsi->cnt_q_avail = vsi->num_queue_pairs - override_q;
+ vsi->next_base_queue = override_q;
+ }
+ return 0;
+ }
+
/**
* i40e_vsi_setup_queue_map - Setup a VSI queue map based on enabled_tc
* @vsi: the VSI being setup
numtc = 1;
}
} else {
- /* At least TC0 is enabled in case of non-DCB case */
+ /* At least TC0 is enabled in non-DCB, non-MQPRIO case */
numtc = 1;
}
vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
vsi->back->flags |= I40E_FLAG_FILTER_SYNC;
}
-
- /* schedule our worker thread which will take care of
- * applying the new filter changes
- */
- i40e_service_event_schedule(vsi->back);
}
/**
**/
static void i40e_config_xps_tx_ring(struct i40e_ring *ring)
{
- struct i40e_vsi *vsi = ring->vsi;
+ int cpu;
- if (!ring->q_vector || !ring->netdev)
+ if (!ring->q_vector || !ring->netdev || ring->ch)
return;
- if ((vsi->tc_config.numtc <= 1) &&
- !test_and_set_bit(__I40E_TX_XPS_INIT_DONE, &ring->state)) {
- netif_set_xps_queue(ring->netdev,
- get_cpu_mask(ring->q_vector->v_idx),
- ring->queue_index);
- }
+ /* We only initialize XPS once, so as not to overwrite user settings */
+ if (test_and_set_bit(__I40E_TX_XPS_INIT_DONE, ring->state))
+ return;
- /* schedule our worker thread which will take care of
- * applying the new filter changes
- */
- i40e_service_event_schedule(vsi->back);
+ cpu = cpumask_local_spread(ring->q_vector->v_idx, -1);
+ netif_set_xps_queue(ring->netdev, get_cpu_mask(cpu),
+ ring->queue_index);
}
/**
* initialization. This has to be done regardless of
* DCB as by default everything is mapped to TC0.
*/
- tx_ctx.rdylist = le16_to_cpu(vsi->info.qs_handle[ring->dcb_tc]);
+
+ if (ring->ch)
+ tx_ctx.rdylist =
+ le16_to_cpu(ring->ch->info.qs_handle[ring->dcb_tc]);
+
+ else
+ tx_ctx.rdylist = le16_to_cpu(vsi->info.qs_handle[ring->dcb_tc]);
+
tx_ctx.rdylist_act = 0;
/* clear the context in the HMC */
}
/* Now associate this queue with this PCI function */
- if (vsi->type == I40E_VSI_VMDQ2) {
- qtx_ctl = I40E_QTX_CTL_VM_QUEUE;
- qtx_ctl |= ((vsi->id) << I40E_QTX_CTL_VFVM_INDX_SHIFT) &
- I40E_QTX_CTL_VFVM_INDX_MASK;
+ if (ring->ch) {
+ if (ring->ch->type == I40E_VSI_VMDQ2)
+ qtx_ctl = I40E_QTX_CTL_VM_QUEUE;
+ else
+ return -EINVAL;
+
+ qtx_ctl |= (ring->ch->vsi_number <<
+ I40E_QTX_CTL_VFVM_INDX_SHIFT) &
+ I40E_QTX_CTL_VFVM_INDX_MASK;
} else {
- qtx_ctl = I40E_QTX_CTL_PF_QUEUE;
+ if (vsi->type == I40E_VSI_VMDQ2) {
+ qtx_ctl = I40E_QTX_CTL_VM_QUEUE;
+ qtx_ctl |= ((vsi->id) << I40E_QTX_CTL_VFVM_INDX_SHIFT) &
+ I40E_QTX_CTL_VFVM_INDX_MASK;
+ } else {
+ qtx_ctl = I40E_QTX_CTL_PF_QUEUE;
+ }
}
qtx_ctl |= ((hw->pf_id << I40E_QTX_CTL_PF_INDX_SHIFT) &
struct i40e_hmc_obj_rxq rx_ctx;
i40e_status err = 0;
- ring->state = 0;
+ bitmap_zero(ring->state, __I40E_RING_STATE_NBITS);
/* clear the context structure first */
memset(&rx_ctx, 0, sizeof(rx_ctx));
if (hw->revision_id == 0)
rx_ctx.lrxqthresh = 0;
else
- rx_ctx.lrxqthresh = 2;
+ rx_ctx.lrxqthresh = 1;
rx_ctx.crcstrip = 1;
rx_ctx.l2tsel = 1;
/* this controls whether VLAN is stripped from inner headers */
rx_ring->dcb_tc = 0;
tx_ring->dcb_tc = 0;
}
+ return;
}
for (n = 0; n < I40E_MAX_TRAFFIC_CLASS; n++) {
/**
* i40e_irq_dynamic_enable_icr0 - Enable default interrupt generation for icr0
* @pf: board private structure
- * @clearpba: true when all pending interrupt events should be cleared
**/
- void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf, bool clearpba)
+ void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf)
{
struct i40e_hw *hw = &pf->hw;
u32 val;
val = I40E_PFINT_DYN_CTL0_INTENA_MASK |
- (clearpba ? I40E_PFINT_DYN_CTL0_CLEARPBA_MASK : 0) |
+ I40E_PFINT_DYN_CTL0_CLEARPBA_MASK |
(I40E_ITR_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT);
wr32(hw, I40E_PFINT_DYN_CTL0, val);
int tx_int_idx = 0;
int vector, err;
int irq_num;
+ int cpu;
for (vector = 0; vector < q_vectors; vector++) {
struct i40e_q_vector *q_vector = vsi->q_vectors[vector];
q_vector->affinity_notify.notify = i40e_irq_affinity_notify;
q_vector->affinity_notify.release = i40e_irq_affinity_release;
irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify);
- /* get_cpu_mask returns a static constant mask with
- * a permanent lifetime so it's ok to use here.
+ /* Spread affinity hints out across online CPUs.
+ *
+ * get_cpu_mask returns a static constant mask with
+ * a permanent lifetime so it's ok to pass to
+ * irq_set_affinity_hint without making a copy.
*/
- irq_set_affinity_hint(irq_num, get_cpu_mask(q_vector->v_idx));
+ cpu = cpumask_local_spread(q_vector->v_idx, -1);
+ irq_set_affinity_hint(irq_num, get_cpu_mask(cpu));
}
vsi->irqs_ready = true;
for (i = 0; i < vsi->num_q_vectors; i++)
i40e_irq_dynamic_enable(vsi, i);
} else {
- i40e_irq_dynamic_enable_icr0(pf, true);
+ i40e_irq_dynamic_enable_icr0(pf);
}
i40e_flush(&pf->hw);
}
/**
- * i40e_stop_misc_vector - Stop the vector that handles non-queue events
+ * i40e_free_misc_vector - Free the vector that handles non-queue events
* @pf: board private structure
**/
- static void i40e_stop_misc_vector(struct i40e_pf *pf)
+ static void i40e_free_misc_vector(struct i40e_pf *pf)
{
/* Disable ICR 0 */
wr32(&pf->hw, I40E_PFINT_ICR0_ENA, 0);
i40e_flush(&pf->hw);
+
+ if (pf->flags & I40E_FLAG_MSIX_ENABLED && pf->msix_entries) {
+ synchronize_irq(pf->msix_entries[0].vector);
+ free_irq(pf->msix_entries[0].vector, pf);
+ clear_bit(__I40E_MISC_IRQ_REQUESTED, pf->state);
+ }
}
/**
wr32(hw, I40E_PFINT_ICR0_ENA, ena_mask);
if (!test_bit(__I40E_DOWN, pf->state)) {
i40e_service_event_schedule(pf);
- i40e_irq_dynamic_enable_icr0(pf, false);
+ i40e_irq_dynamic_enable_icr0(pf);
}
return ret;
{
int i;
- i40e_stop_misc_vector(pf);
- if (pf->flags & I40E_FLAG_MSIX_ENABLED && pf->msix_entries) {
- synchronize_irq(pf->msix_entries[0].vector);
- free_irq(pf->msix_entries[0].vector, pf);
- }
+ i40e_free_misc_vector(pf);
i40e_put_lump(pf->irq_pile, pf->iwarp_base_vector,
I40E_IWARP_IRQ_PILE_ID);
return enabled_tc;
}
+ /**
+ * i40e_mqprio_get_enabled_tc - Get enabled traffic classes
+ * @pf: PF being queried
+ *
+ * Query the current MQPRIO configuration and return the number of
+ * traffic classes enabled.
+ **/
+ static u8 i40e_mqprio_get_enabled_tc(struct i40e_pf *pf)
+ {
+ struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
+ u8 num_tc = vsi->mqprio_qopt.qopt.num_tc;
+ u8 enabled_tc = 1, i;
+
+ for (i = 1; i < num_tc; i++)
+ enabled_tc |= BIT(i);
+ return enabled_tc;
+ }
+
/**
* i40e_pf_get_num_tc - Get enabled traffic classes for PF
* @pf: PF being queried
u8 num_tc = 0;
struct i40e_dcbx_config *dcbcfg = &hw->local_dcbx_config;
- /* If DCB is not enabled then always in single TC */
+ if (pf->flags & I40E_FLAG_TC_MQPRIO)
+ return pf->vsi[pf->lan_vsi]->mqprio_qopt.qopt.num_tc;
+
+ /* If neither MQPRIO nor DCB is enabled, then always use single TC */
if (!(pf->flags & I40E_FLAG_DCB_ENABLED))
return 1;
**/
static u8 i40e_pf_get_tc_map(struct i40e_pf *pf)
{
- /* If DCB is not enabled for this PF then just return default TC */
+ if (pf->flags & I40E_FLAG_TC_MQPRIO)
+ return i40e_mqprio_get_enabled_tc(pf);
+
+ /* If neither MQPRIO nor DCB is enabled for this PF then just return
+ * default TC
+ */
if (!(pf->flags & I40E_FLAG_DCB_ENABLED))
return I40E_DEFAULT_TRAFFIC_CLASS;
i40e_status ret;
int i;
+ if (vsi->back->flags & I40E_FLAG_TC_MQPRIO)
+ return 0;
+ if (!vsi->mqprio_qopt.qopt.hw) {
+ ret = i40e_set_bw_limit(vsi, vsi->seid, 0);
+ if (ret)
+ dev_info(&vsi->back->pdev->dev,
+ "Failed to reset tx rate for vsi->seid %u\n",
+ vsi->seid);
+ return ret;
+ }
bw_data.tc_valid_bits = enabled_tc;
for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
bw_data.tc_bw_credits[i] = bw_share[i];
vsi->tc_config.tc_info[i].qoffset);
}
+ if (pf->flags & I40E_FLAG_TC_MQPRIO)
+ return;
+
/* Assign UP2TC map for the VSI */
for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
/* Get the actual TC# for the UP */
int i;
/* Check if enabled_tc is same as existing or new TCs */
- if (vsi->tc_config.enabled_tc == enabled_tc)
+ if (vsi->tc_config.enabled_tc == enabled_tc &&
+ vsi->mqprio_qopt.mode != TC_MQPRIO_MODE_CHANNEL)
return ret;
/* Enable ETS TCs with equal BW Share for now across all VSIs */
ctxt.vf_num = 0;
ctxt.uplink_seid = vsi->uplink_seid;
ctxt.info = vsi->info;
- i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, false);
+ if (vsi->back->flags & I40E_FLAG_TC_MQPRIO) {
+ ret = i40e_vsi_setup_queue_map_mqprio(vsi, &ctxt, enabled_tc);
+ if (ret)
+ goto out;
+ } else {
+ i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, false);
+ }
+ /* On destroying the qdisc, reset vsi->rss_size, as number of enabled
+ * queues changed.
+ */
+ if (!vsi->mqprio_qopt.qopt.hw && vsi->reconfig_rss) {
+ vsi->rss_size = min_t(int, vsi->back->alloc_rss_size,
+ vsi->num_queue_pairs);
+ ret = i40e_vsi_config_rss(vsi);
+ if (ret) {
+ dev_info(&vsi->back->pdev->dev,
+ "Failed to reconfig rss for num_queues\n");
+ return ret;
+ }
+ vsi->reconfig_rss = false;
+ }
if (vsi->back->flags & I40E_FLAG_IWARP_ENABLED) {
ctxt.info.valid_sections |=
cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID);
ctxt.info.queueing_opt_flags |= I40E_AQ_VSI_QUE_OPT_TCP_ENA;
}
- /* Update the VSI after updating the VSI queue-mapping information */
+ /* Update the VSI after updating the VSI queue-mapping
+ * information
+ */
ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL);
if (ret) {
dev_info(&vsi->back->pdev->dev,
}
/**
- * i40e_veb_config_tc - Configure TCs for given VEB
- * @veb: given VEB
- * @enabled_tc: TC bitmap
+ * i40e_get_link_speed - Returns link speed for the interface
+ * @vsi: VSI to be configured
*
- * Configures given TC bitmap for VEB (switching) element
**/
- int i40e_veb_config_tc(struct i40e_veb *veb, u8 enabled_tc)
+ int i40e_get_link_speed(struct i40e_vsi *vsi)
{
- struct i40e_aqc_configure_switching_comp_bw_config_data bw_data = {0};
- struct i40e_pf *pf = veb->pf;
- int ret = 0;
- int i;
-
- /* No TCs or already enabled TCs just return */
- if (!enabled_tc || veb->enabled_tc == enabled_tc)
- return ret;
-
- bw_data.tc_valid_bits = enabled_tc;
- /* bw_data.absolute_credits is not set (relative) */
-
- /* Enable ETS TCs with equal BW Share for now */
- for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
- if (enabled_tc & BIT(i))
- bw_data.tc_bw_share_credits[i] = 1;
- }
-
- ret = i40e_aq_config_switch_comp_bw_config(&pf->hw, veb->seid,
- &bw_data, NULL);
- if (ret) {
- dev_info(&pf->pdev->dev,
- "VEB bw config failed, err %s aq_err %s\n",
- i40e_stat_str(&pf->hw, ret),
- i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
- goto out;
- }
+ struct i40e_pf *pf = vsi->back;
- /* Update the BW information */
- ret = i40e_veb_get_bw_info(veb);
- if (ret) {
- dev_info(&pf->pdev->dev,
- "Failed getting veb bw config, err %s aq_err %s\n",
- i40e_stat_str(&pf->hw, ret),
- i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ switch (pf->hw.phy.link_info.link_speed) {
+ case I40E_LINK_SPEED_40GB:
+ return 40000;
+ case I40E_LINK_SPEED_25GB:
+ return 25000;
+ case I40E_LINK_SPEED_20GB:
+ return 20000;
+ case I40E_LINK_SPEED_10GB:
+ return 10000;
+ case I40E_LINK_SPEED_1GB:
+ return 1000;
+ default:
+ return -EINVAL;
}
-
- out:
- return ret;
}
- #ifdef CONFIG_I40E_DCB
/**
- * i40e_dcb_reconfigure - Reconfigure all VEBs and VSIs
- * @pf: PF struct
+ * i40e_set_bw_limit - setup BW limit for Tx traffic based on max_tx_rate
+ * @vsi: VSI to be configured
+ * @seid: seid of the channel/VSI
+ * @max_tx_rate: max TX rate to be configured as BW limit
*
- * Reconfigure VEB/VSIs on a given PF; it is assumed that
- * the caller would've quiesce all the VSIs before calling
- * this function
+ * Helper function to set BW limit for a given VSI
**/
- static void i40e_dcb_reconfigure(struct i40e_pf *pf)
+ int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate)
{
- u8 tc_map = 0;
- int ret;
- u8 v;
+ struct i40e_pf *pf = vsi->back;
+ u64 credits = 0;
+ int speed = 0;
+ int ret = 0;
- /* Enable the TCs available on PF to all VEBs */
- tc_map = i40e_pf_get_tc_map(pf);
- for (v = 0; v < I40E_MAX_VEB; v++) {
- if (!pf->veb[v])
+ speed = i40e_get_link_speed(vsi);
+ if (max_tx_rate > speed) {
+ dev_err(&pf->pdev->dev,
+ "Invalid max tx rate %llu specified for VSI seid %d.",
+ max_tx_rate, seid);
+ return -EINVAL;
+ }
+ if (max_tx_rate && max_tx_rate < 50) {
+ dev_warn(&pf->pdev->dev,
+ "Setting max tx rate to minimum usable value of 50Mbps.\n");
+ max_tx_rate = 50;
+ }
+
+ /* Tx rate credits are in values of 50Mbps, 0 is disabled */
+ credits = max_tx_rate;
+ do_div(credits, I40E_BW_CREDIT_DIVISOR);
+ ret = i40e_aq_config_vsi_bw_limit(&pf->hw, seid, credits,
+ I40E_MAX_BW_INACTIVE_ACCUM, NULL);
+ if (ret)
+ dev_err(&pf->pdev->dev,
+ "Failed set tx rate (%llu Mbps) for vsi->seid %u, err %s aq_err %s\n",
+ max_tx_rate, seid, i40e_stat_str(&pf->hw, ret),
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ return ret;
+ }
+
+ /**
+ * i40e_remove_queue_channels - Remove queue channels for the TCs
+ * @vsi: VSI to be configured
+ *
+ * Remove queue channels for the TCs
+ **/
+ static void i40e_remove_queue_channels(struct i40e_vsi *vsi)
+ {
+ enum i40e_admin_queue_err last_aq_status;
+ struct i40e_cloud_filter *cfilter;
+ struct i40e_channel *ch, *ch_tmp;
+ struct i40e_pf *pf = vsi->back;
+ struct hlist_node *node;
+ int ret, i;
+
+ /* Reset rss size that was stored when reconfiguring rss for
+ * channel VSIs with non-power-of-2 queue count.
+ */
+ vsi->current_rss_size = 0;
+
+ /* perform cleanup for channels if they exist */
+ if (list_empty(&vsi->ch_list))
+ return;
+
+ list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) {
+ struct i40e_vsi *p_vsi;
+
+ list_del(&ch->list);
+ p_vsi = ch->parent_vsi;
+ if (!p_vsi || !ch->initialized) {
+ kfree(ch);
continue;
- ret = i40e_veb_config_tc(pf->veb[v], tc_map);
+ }
+ /* Reset queue contexts */
+ for (i = 0; i < ch->num_queue_pairs; i++) {
+ struct i40e_ring *tx_ring, *rx_ring;
+ u16 pf_q;
+
+ pf_q = ch->base_queue + i;
+ tx_ring = vsi->tx_rings[pf_q];
+ tx_ring->ch = NULL;
+
+ rx_ring = vsi->rx_rings[pf_q];
+ rx_ring->ch = NULL;
+ }
+
+ /* Reset BW configured for this VSI via mqprio */
+ ret = i40e_set_bw_limit(vsi, ch->seid, 0);
+ if (ret)
+ dev_info(&vsi->back->pdev->dev,
+ "Failed to reset tx rate for ch->seid %u\n",
+ ch->seid);
+
+ /* delete cloud filters associated with this channel */
+ hlist_for_each_entry_safe(cfilter, node,
+ &pf->cloud_filter_list, cloud_node) {
+ if (cfilter->seid != ch->seid)
+ continue;
+
+ hash_del(&cfilter->cloud_node);
+ if (cfilter->dst_port)
+ ret = i40e_add_del_cloud_filter_big_buf(vsi,
+ cfilter,
+ false);
+ else
+ ret = i40e_add_del_cloud_filter(vsi, cfilter,
+ false);
+ last_aq_status = pf->hw.aq.asq_last_status;
+ if (ret)
+ dev_info(&pf->pdev->dev,
+ "Failed to delete cloud filter, err %s aq_err %s\n",
+ i40e_stat_str(&pf->hw, ret),
+ i40e_aq_str(&pf->hw, last_aq_status));
+ kfree(cfilter);
+ }
+
+ /* delete VSI from FW */
+ ret = i40e_aq_delete_element(&vsi->back->hw, ch->seid,
+ NULL);
+ if (ret)
+ dev_err(&vsi->back->pdev->dev,
+ "unable to remove channel (%d) for parent VSI(%d)\n",
+ ch->seid, p_vsi->seid);
+ kfree(ch);
+ }
+ INIT_LIST_HEAD(&vsi->ch_list);
+ }
+
+ /**
+ * i40e_is_any_channel - channel exist or not
+ * @vsi: ptr to VSI to which channels are associated with
+ *
+ * Returns true or false if channel(s) exist for associated VSI or not
+ **/
+ static bool i40e_is_any_channel(struct i40e_vsi *vsi)
+ {
+ struct i40e_channel *ch, *ch_tmp;
+
+ list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) {
+ if (ch->initialized)
+ return true;
+ }
+
+ return false;
+ }
+
+ /**
+ * i40e_get_max_queues_for_channel
+ * @vsi: ptr to VSI to which channels are associated with
+ *
+ * Helper function which returns max value among the queue counts set on the
+ * channels/TCs created.
+ **/
+ static int i40e_get_max_queues_for_channel(struct i40e_vsi *vsi)
+ {
+ struct i40e_channel *ch, *ch_tmp;
+ int max = 0;
+
+ list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) {
+ if (!ch->initialized)
+ continue;
+ if (ch->num_queue_pairs > max)
+ max = ch->num_queue_pairs;
+ }
+
+ return max;
+ }
+
+ /**
+ * i40e_validate_num_queues - validate num_queues w.r.t channel
+ * @pf: ptr to PF device
+ * @num_queues: number of queues
+ * @vsi: the parent VSI
+ * @reconfig_rss: indicates should the RSS be reconfigured or not
+ *
+ * This function validates number of queues in the context of new channel
+ * which is being established and determines if RSS should be reconfigured
+ * or not for parent VSI.
+ **/
+ static int i40e_validate_num_queues(struct i40e_pf *pf, int num_queues,
+ struct i40e_vsi *vsi, bool *reconfig_rss)
+ {
+ int max_ch_queues;
+
+ if (!reconfig_rss)
+ return -EINVAL;
+
+ *reconfig_rss = false;
+
+ if (num_queues > I40E_MAX_QUEUES_PER_CH) {
+ dev_err(&pf->pdev->dev,
+ "Failed to create VMDq VSI. User requested num_queues (%d) > I40E_MAX_QUEUES_PER_VSI (%u)\n",
+ num_queues, I40E_MAX_QUEUES_PER_CH);
+ return -EINVAL;
+ }
+
+ if (vsi->current_rss_size) {
+ if (num_queues > vsi->current_rss_size) {
+ dev_dbg(&pf->pdev->dev,
+ "Error: num_queues (%d) > vsi's current_size(%d)\n",
+ num_queues, vsi->current_rss_size);
+ return -EINVAL;
+ } else if ((num_queues < vsi->current_rss_size) &&
+ (!is_power_of_2(num_queues))) {
+ dev_dbg(&pf->pdev->dev,
+ "Error: num_queues (%d) < vsi's current_size(%d), but not power of 2\n",
+ num_queues, vsi->current_rss_size);
+ return -EINVAL;
+ }
+ }
+
+ if (!is_power_of_2(num_queues)) {
+ /* Find the max num_queues configured for channel if channel
+ * exist.
+ * if channel exist, then enforce 'num_queues' to be more than
+ * max ever queues configured for channel.
+ */
+ max_ch_queues = i40e_get_max_queues_for_channel(vsi);
+ if (num_queues < max_ch_queues) {
+ dev_dbg(&pf->pdev->dev,
+ "Error: num_queues (%d) < max queues configured for channel(%d)\n",
+ num_queues, max_ch_queues);
+ return -EINVAL;
+ }
+ *reconfig_rss = true;
+ }
+
+ return 0;
+ }
+
+ /**
+ * i40e_vsi_reconfig_rss - reconfig RSS based on specified rss_size
+ * @vsi: the VSI being setup
+ * @rss_size: size of RSS, accordingly LUT gets reprogrammed
+ *
+ * This function reconfigures RSS by reprogramming LUTs using 'rss_size'
+ **/
+ static int i40e_vsi_reconfig_rss(struct i40e_vsi *vsi, u16 rss_size)
+ {
+ struct i40e_pf *pf = vsi->back;
+ u8 seed[I40E_HKEY_ARRAY_SIZE];
+ struct i40e_hw *hw = &pf->hw;
+ int local_rss_size;
+ u8 *lut;
+ int ret;
+
+ if (!vsi->rss_size)
+ return -EINVAL;
+
+ if (rss_size > vsi->rss_size)
+ return -EINVAL;
+
+ local_rss_size = min_t(int, vsi->rss_size, rss_size);
+ lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
+ if (!lut)
+ return -ENOMEM;
+
+ /* Ignoring user configured lut if there is one */
+ i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, local_rss_size);
+
+ /* Use user configured hash key if there is one, otherwise
+ * use default.
+ */
+ if (vsi->rss_hkey_user)
+ memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE);
+ else
+ netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE);
+
+ ret = i40e_config_rss(vsi, seed, lut, vsi->rss_table_size);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+ "Cannot set RSS lut, err %s aq_err %s\n",
+ i40e_stat_str(hw, ret),
+ i40e_aq_str(hw, hw->aq.asq_last_status));
+ kfree(lut);
+ return ret;
+ }
+ kfree(lut);
+
+ /* Do the update w.r.t. storing rss_size */
+ if (!vsi->orig_rss_size)
+ vsi->orig_rss_size = vsi->rss_size;
+ vsi->current_rss_size = local_rss_size;
+
+ return ret;
+ }
+
+ /**
+ * i40e_channel_setup_queue_map - Setup a channel queue map
+ * @pf: ptr to PF device
+ * @vsi: the VSI being setup
+ * @ctxt: VSI context structure
+ * @ch: ptr to channel structure
+ *
+ * Setup queue map for a specific channel
+ **/
+ static void i40e_channel_setup_queue_map(struct i40e_pf *pf,
+ struct i40e_vsi_context *ctxt,
+ struct i40e_channel *ch)
+ {
+ u16 qcount, qmap, sections = 0;
+ u8 offset = 0;
+ int pow;
+
+ sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID;
+ sections |= I40E_AQ_VSI_PROP_SCHED_VALID;
+
+ qcount = min_t(int, ch->num_queue_pairs, pf->num_lan_msix);
+ ch->num_queue_pairs = qcount;
+
+ /* find the next higher power-of-2 of num queue pairs */
+ pow = ilog2(qcount);
+ if (!is_power_of_2(qcount))
+ pow++;
+
+ qmap = (offset << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) |
+ (pow << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT);
+
+ /* Setup queue TC[0].qmap for given VSI context */
+ ctxt->info.tc_mapping[0] = cpu_to_le16(qmap);
+
+ ctxt->info.up_enable_bits = 0x1; /* TC0 enabled */
+ ctxt->info.mapping_flags |= cpu_to_le16(I40E_AQ_VSI_QUE_MAP_CONTIG);
+ ctxt->info.queue_mapping[0] = cpu_to_le16(ch->base_queue);
+ ctxt->info.valid_sections |= cpu_to_le16(sections);
+ }
+
+ /**
+ * i40e_add_channel - add a channel by adding VSI
+ * @pf: ptr to PF device
+ * @uplink_seid: underlying HW switching element (VEB) ID
+ * @ch: ptr to channel structure
+ *
+ * Add a channel (VSI) using add_vsi and queue_map
+ **/
+ static int i40e_add_channel(struct i40e_pf *pf, u16 uplink_seid,
+ struct i40e_channel *ch)
+ {
+ struct i40e_hw *hw = &pf->hw;
+ struct i40e_vsi_context ctxt;
+ u8 enabled_tc = 0x1; /* TC0 enabled */
+ int ret;
+
+ if (ch->type != I40E_VSI_VMDQ2) {
+ dev_info(&pf->pdev->dev,
+ "add new vsi failed, ch->type %d\n", ch->type);
+ return -EINVAL;
+ }
+
+ memset(&ctxt, 0, sizeof(ctxt));
+ ctxt.pf_num = hw->pf_id;
+ ctxt.vf_num = 0;
+ ctxt.uplink_seid = uplink_seid;
+ ctxt.connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL;
+ if (ch->type == I40E_VSI_VMDQ2)
+ ctxt.flags = I40E_AQ_VSI_TYPE_VMDQ2;
+
+ if (pf->flags & I40E_FLAG_VEB_MODE_ENABLED) {
+ ctxt.info.valid_sections |=
+ cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
+ ctxt.info.switch_id =
+ cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB);
+ }
+
+ /* Set queue map for a given VSI context */
+ i40e_channel_setup_queue_map(pf, &ctxt, ch);
+
+ /* Now time to create VSI */
+ ret = i40e_aq_add_vsi(hw, &ctxt, NULL);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+ "add new vsi failed, err %s aq_err %s\n",
+ i40e_stat_str(&pf->hw, ret),
+ i40e_aq_str(&pf->hw,
+ pf->hw.aq.asq_last_status));
+ return -ENOENT;
+ }
+
+ /* Success, update channel */
+ ch->enabled_tc = enabled_tc;
+ ch->seid = ctxt.seid;
+ ch->vsi_number = ctxt.vsi_number;
+ ch->stat_counter_idx = cpu_to_le16(ctxt.info.stat_counter_idx);
+
+ /* copy just the sections touched not the entire info
+ * since not all sections are valid as returned by
+ * update vsi params
+ */
+ ch->info.mapping_flags = ctxt.info.mapping_flags;
+ memcpy(&ch->info.queue_mapping,
+ &ctxt.info.queue_mapping, sizeof(ctxt.info.queue_mapping));
+ memcpy(&ch->info.tc_mapping, ctxt.info.tc_mapping,
+ sizeof(ctxt.info.tc_mapping));
+
+ return 0;
+ }
+
+ static int i40e_channel_config_bw(struct i40e_vsi *vsi, struct i40e_channel *ch,
+ u8 *bw_share)
+ {
+ struct i40e_aqc_configure_vsi_tc_bw_data bw_data;
+ i40e_status ret;
+ int i;
+
+ bw_data.tc_valid_bits = ch->enabled_tc;
+ for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
+ bw_data.tc_bw_credits[i] = bw_share[i];
+
+ ret = i40e_aq_config_vsi_tc_bw(&vsi->back->hw, ch->seid,
+ &bw_data, NULL);
+ if (ret) {
+ dev_info(&vsi->back->pdev->dev,
+ "Config VSI BW allocation per TC failed, aq_err: %d for new_vsi->seid %u\n",
+ vsi->back->hw.aq.asq_last_status, ch->seid);
+ return -EINVAL;
+ }
+
+ for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
+ ch->info.qs_handle[i] = bw_data.qs_handles[i];
+
+ return 0;
+ }
+
+ /**
+ * i40e_channel_config_tx_ring - config TX ring associated with new channel
+ * @pf: ptr to PF device
+ * @vsi: the VSI being setup
+ * @ch: ptr to channel structure
+ *
+ * Configure TX rings associated with channel (VSI) since queues are being
+ * from parent VSI.
+ **/
+ static int i40e_channel_config_tx_ring(struct i40e_pf *pf,
+ struct i40e_vsi *vsi,
+ struct i40e_channel *ch)
+ {
+ i40e_status ret;
+ int i;
+ u8 bw_share[I40E_MAX_TRAFFIC_CLASS] = {0};
+
+ /* Enable ETS TCs with equal BW Share for now across all VSIs */
+ for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+ if (ch->enabled_tc & BIT(i))
+ bw_share[i] = 1;
+ }
+
+ /* configure BW for new VSI */
+ ret = i40e_channel_config_bw(vsi, ch, bw_share);
+ if (ret) {
+ dev_info(&vsi->back->pdev->dev,
+ "Failed configuring TC map %d for channel (seid %u)\n",
+ ch->enabled_tc, ch->seid);
+ return ret;
+ }
+
+ for (i = 0; i < ch->num_queue_pairs; i++) {
+ struct i40e_ring *tx_ring, *rx_ring;
+ u16 pf_q;
+
+ pf_q = ch->base_queue + i;
+
+ /* Get to TX ring ptr of main VSI, for re-setup TX queue
+ * context
+ */
+ tx_ring = vsi->tx_rings[pf_q];
+ tx_ring->ch = ch;
+
+ /* Get the RX ring ptr */
+ rx_ring = vsi->rx_rings[pf_q];
+ rx_ring->ch = ch;
+ }
+
+ return 0;
+ }
+
+ /**
+ * i40e_setup_hw_channel - setup new channel
+ * @pf: ptr to PF device
+ * @vsi: the VSI being setup
+ * @ch: ptr to channel structure
+ * @uplink_seid: underlying HW switching element (VEB) ID
+ * @type: type of channel to be created (VMDq2/VF)
+ *
+ * Setup new channel (VSI) based on specified type (VMDq2/VF)
+ * and configures TX rings accordingly
+ **/
+ static inline int i40e_setup_hw_channel(struct i40e_pf *pf,
+ struct i40e_vsi *vsi,
+ struct i40e_channel *ch,
+ u16 uplink_seid, u8 type)
+ {
+ int ret;
+
+ ch->initialized = false;
+ ch->base_queue = vsi->next_base_queue;
+ ch->type = type;
+
+ /* Proceed with creation of channel (VMDq2) VSI */
+ ret = i40e_add_channel(pf, uplink_seid, ch);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+ "failed to add_channel using uplink_seid %u\n",
+ uplink_seid);
+ return ret;
+ }
+
+ /* Mark the successful creation of channel */
+ ch->initialized = true;
+
+ /* Reconfigure TX queues using QTX_CTL register */
+ ret = i40e_channel_config_tx_ring(pf, vsi, ch);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+ "failed to configure TX rings for channel %u\n",
+ ch->seid);
+ return ret;
+ }
+
+ /* update 'next_base_queue' */
+ vsi->next_base_queue = vsi->next_base_queue + ch->num_queue_pairs;
+ dev_dbg(&pf->pdev->dev,
+ "Added channel: vsi_seid %u, vsi_number %u, stat_counter_idx %u, num_queue_pairs %u, pf->next_base_queue %d\n",
+ ch->seid, ch->vsi_number, ch->stat_counter_idx,
+ ch->num_queue_pairs,
+ vsi->next_base_queue);
+ return ret;
+ }
+
+ /**
+ * i40e_setup_channel - setup new channel using uplink element
+ * @pf: ptr to PF device
+ * @type: type of channel to be created (VMDq2/VF)
+ * @uplink_seid: underlying HW switching element (VEB) ID
+ * @ch: ptr to channel structure
+ *
+ * Setup new channel (VSI) based on specified type (VMDq2/VF)
+ * and uplink switching element (uplink_seid)
+ **/
+ static bool i40e_setup_channel(struct i40e_pf *pf, struct i40e_vsi *vsi,
+ struct i40e_channel *ch)
+ {
+ u8 vsi_type;
+ u16 seid;
+ int ret;
+
+ if (vsi->type == I40E_VSI_MAIN) {
+ vsi_type = I40E_VSI_VMDQ2;
+ } else {
+ dev_err(&pf->pdev->dev, "unsupported parent vsi type(%d)\n",
+ vsi->type);
+ return false;
+ }
+
+ /* underlying switching element */
+ seid = pf->vsi[pf->lan_vsi]->uplink_seid;
+
+ /* create channel (VSI), configure TX rings */
+ ret = i40e_setup_hw_channel(pf, vsi, ch, seid, vsi_type);
+ if (ret) {
+ dev_err(&pf->pdev->dev, "failed to setup hw_channel\n");
+ return false;
+ }
+
+ return ch->initialized ? true : false;
+ }
+
+ /**
+ * i40e_validate_and_set_switch_mode - sets up switch mode correctly
+ * @vsi: ptr to VSI which has PF backing
+ *
+ * Sets up switch mode correctly if it needs to be changed and perform
+ * what are allowed modes.
+ **/
+ static int i40e_validate_and_set_switch_mode(struct i40e_vsi *vsi)
+ {
+ u8 mode;
+ struct i40e_pf *pf = vsi->back;
+ struct i40e_hw *hw = &pf->hw;
+ int ret;
+
+ ret = i40e_get_capabilities(pf, i40e_aqc_opc_list_dev_capabilities);
+ if (ret)
+ return -EINVAL;
+
+ if (hw->dev_caps.switch_mode) {
+ /* if switch mode is set, support mode2 (non-tunneled for
+ * cloud filter) for now
+ */
+ u32 switch_mode = hw->dev_caps.switch_mode &
+ I40E_SWITCH_MODE_MASK;
+ if (switch_mode >= I40E_CLOUD_FILTER_MODE1) {
+ if (switch_mode == I40E_CLOUD_FILTER_MODE2)
+ return 0;
+ dev_err(&pf->pdev->dev,
+ "Invalid switch_mode (%d), only non-tunneled mode for cloud filter is supported\n",
+ hw->dev_caps.switch_mode);
+ return -EINVAL;
+ }
+ }
+
+ /* Set Bit 7 to be valid */
+ mode = I40E_AQ_SET_SWITCH_BIT7_VALID;
+
+ /* Set L4type to both TCP and UDP support */
+ mode |= I40E_AQ_SET_SWITCH_L4_TYPE_BOTH;
+
+ /* Set cloud filter mode */
+ mode |= I40E_AQ_SET_SWITCH_MODE_NON_TUNNEL;
+
+ /* Prep mode field for set_switch_config */
+ ret = i40e_aq_set_switch_config(hw, pf->last_sw_conf_flags,
+ pf->last_sw_conf_valid_flags,
+ mode, NULL);
+ if (ret && hw->aq.asq_last_status != I40E_AQ_RC_ESRCH)
+ dev_err(&pf->pdev->dev,
+ "couldn't set switch config bits, err %s aq_err %s\n",
+ i40e_stat_str(hw, ret),
+ i40e_aq_str(hw,
+ hw->aq.asq_last_status));
+
+ return ret;
+ }
+
+ /**
+ * i40e_create_queue_channel - function to create channel
+ * @vsi: VSI to be configured
+ * @ch: ptr to channel (it contains channel specific params)
+ *
+ * This function creates channel (VSI) using num_queues specified by user,
+ * reconfigs RSS if needed.
+ **/
+ int i40e_create_queue_channel(struct i40e_vsi *vsi,
+ struct i40e_channel *ch)
+ {
+ struct i40e_pf *pf = vsi->back;
+ bool reconfig_rss;
+ int err;
+
+ if (!ch)
+ return -EINVAL;
+
+ if (!ch->num_queue_pairs) {
+ dev_err(&pf->pdev->dev, "Invalid num_queues requested: %d\n",
+ ch->num_queue_pairs);
+ return -EINVAL;
+ }
+
+ /* validate user requested num_queues for channel */
+ err = i40e_validate_num_queues(pf, ch->num_queue_pairs, vsi,
+ &reconfig_rss);
+ if (err) {
+ dev_info(&pf->pdev->dev, "Failed to validate num_queues (%d)\n",
+ ch->num_queue_pairs);
+ return -EINVAL;
+ }
+
+ /* By default we are in VEPA mode, if this is the first VF/VMDq
+ * VSI to be added switch to VEB mode.
+ */
+ if ((!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) ||
+ (!i40e_is_any_channel(vsi))) {
+ if (!is_power_of_2(vsi->tc_config.tc_info[0].qcount)) {
+ dev_dbg(&pf->pdev->dev,
+ "Failed to create channel. Override queues (%u) not power of 2\n",
+ vsi->tc_config.tc_info[0].qcount);
+ return -EINVAL;
+ }
+
+ if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) {
+ pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
+
+ if (vsi->type == I40E_VSI_MAIN) {
+ if (pf->flags & I40E_FLAG_TC_MQPRIO)
+ i40e_do_reset(pf, I40E_PF_RESET_FLAG,
+ true);
+ else
+ i40e_do_reset_safe(pf,
+ I40E_PF_RESET_FLAG);
+ }
+ }
+ /* now onwards for main VSI, number of queues will be value
+ * of TC0's queue count
+ */
+ }
+
+ /* By this time, vsi->cnt_q_avail shall be set to non-zero and
+ * it should be more than num_queues
+ */
+ if (!vsi->cnt_q_avail || vsi->cnt_q_avail < ch->num_queue_pairs) {
+ dev_dbg(&pf->pdev->dev,
+ "Error: cnt_q_avail (%u) less than num_queues %d\n",
+ vsi->cnt_q_avail, ch->num_queue_pairs);
+ return -EINVAL;
+ }
+
+ /* reconfig_rss only if vsi type is MAIN_VSI */
+ if (reconfig_rss && (vsi->type == I40E_VSI_MAIN)) {
+ err = i40e_vsi_reconfig_rss(vsi, ch->num_queue_pairs);
+ if (err) {
+ dev_info(&pf->pdev->dev,
+ "Error: unable to reconfig rss for num_queues (%u)\n",
+ ch->num_queue_pairs);
+ return -EINVAL;
+ }
+ }
+
+ if (!i40e_setup_channel(pf, vsi, ch)) {
+ dev_info(&pf->pdev->dev, "Failed to setup channel\n");
+ return -EINVAL;
+ }
+
+ dev_info(&pf->pdev->dev,
+ "Setup channel (id:%u) utilizing num_queues %d\n",
+ ch->seid, ch->num_queue_pairs);
+
+ /* configure VSI for BW limit */
+ if (ch->max_tx_rate) {
+ u64 credits = ch->max_tx_rate;
+
+ if (i40e_set_bw_limit(vsi, ch->seid, ch->max_tx_rate))
+ return -EINVAL;
+
+ do_div(credits, I40E_BW_CREDIT_DIVISOR);
+ dev_dbg(&pf->pdev->dev,
+ "Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n",
+ ch->max_tx_rate,
+ credits,
+ ch->seid);
+ }
+
+ /* in case of VF, this will be main SRIOV VSI */
+ ch->parent_vsi = vsi;
+
+ /* and update main_vsi's count for queue_available to use */
+ vsi->cnt_q_avail -= ch->num_queue_pairs;
+
+ return 0;
+ }
+
+ /**
+ * i40e_configure_queue_channels - Add queue channel for the given TCs
+ * @vsi: VSI to be configured
+ *
+ * Configures queue channel mapping to the given TCs
+ **/
+ static int i40e_configure_queue_channels(struct i40e_vsi *vsi)
+ {
+ struct i40e_channel *ch;
+ u64 max_rate = 0;
+ int ret = 0, i;
+
+ /* Create app vsi with the TCs. Main VSI with TC0 is already set up */
+ vsi->tc_seid_map[0] = vsi->seid;
+ for (i = 1; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+ if (vsi->tc_config.enabled_tc & BIT(i)) {
+ ch = kzalloc(sizeof(*ch), GFP_KERNEL);
+ if (!ch) {
+ ret = -ENOMEM;
+ goto err_free;
+ }
+
+ INIT_LIST_HEAD(&ch->list);
+ ch->num_queue_pairs =
+ vsi->tc_config.tc_info[i].qcount;
+ ch->base_queue =
+ vsi->tc_config.tc_info[i].qoffset;
+
+ /* Bandwidth limit through tc interface is in bytes/s,
+ * change to Mbit/s
+ */
+ max_rate = vsi->mqprio_qopt.max_rate[i];
+ do_div(max_rate, I40E_BW_MBPS_DIVISOR);
+ ch->max_tx_rate = max_rate;
+
+ list_add_tail(&ch->list, &vsi->ch_list);
+
+ ret = i40e_create_queue_channel(vsi, ch);
+ if (ret) {
+ dev_err(&vsi->back->pdev->dev,
+ "Failed creating queue channel with TC%d: queues %d\n",
+ i, ch->num_queue_pairs);
+ goto err_free;
+ }
+ vsi->tc_seid_map[i] = ch->seid;
+ }
+ }
+ return ret;
+
+ err_free:
+ i40e_remove_queue_channels(vsi);
+ return ret;
+ }
+
+ /**
+ * i40e_veb_config_tc - Configure TCs for given VEB
+ * @veb: given VEB
+ * @enabled_tc: TC bitmap
+ *
+ * Configures given TC bitmap for VEB (switching) element
+ **/
+ int i40e_veb_config_tc(struct i40e_veb *veb, u8 enabled_tc)
+ {
+ struct i40e_aqc_configure_switching_comp_bw_config_data bw_data = {0};
+ struct i40e_pf *pf = veb->pf;
+ int ret = 0;
+ int i;
+
+ /* No TCs or already enabled TCs just return */
+ if (!enabled_tc || veb->enabled_tc == enabled_tc)
+ return ret;
+
+ bw_data.tc_valid_bits = enabled_tc;
+ /* bw_data.absolute_credits is not set (relative) */
+
+ /* Enable ETS TCs with equal BW Share for now */
+ for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+ if (enabled_tc & BIT(i))
+ bw_data.tc_bw_share_credits[i] = 1;
+ }
+
+ ret = i40e_aq_config_switch_comp_bw_config(&pf->hw, veb->seid,
+ &bw_data, NULL);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+ "VEB bw config failed, err %s aq_err %s\n",
+ i40e_stat_str(&pf->hw, ret),
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ goto out;
+ }
+
+ /* Update the BW information */
+ ret = i40e_veb_get_bw_info(veb);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+ "Failed getting veb bw config, err %s aq_err %s\n",
+ i40e_stat_str(&pf->hw, ret),
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ }
+
+ out:
+ return ret;
+ }
+
+ #ifdef CONFIG_I40E_DCB
+ /**
+ * i40e_dcb_reconfigure - Reconfigure all VEBs and VSIs
+ * @pf: PF struct
+ *
+ * Reconfigure VEB/VSIs on a given PF; it is assumed that
+ * the caller would've quiesce all the VSIs before calling
+ * this function
+ **/
+ static void i40e_dcb_reconfigure(struct i40e_pf *pf)
+ {
+ u8 tc_map = 0;
+ int ret;
+ u8 v;
+
+ /* Enable the TCs available on PF to all VEBs */
+ tc_map = i40e_pf_get_tc_map(pf);
+ for (v = 0; v < I40E_MAX_VEB; v++) {
+ if (!pf->veb[v])
+ continue;
+ ret = i40e_veb_config_tc(pf->veb[v], tc_map);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+ "Failed configuring TC for VEB seid=%d\n",
+ pf->veb[v]->seid);
+ /* Will try to configure as many components */
+ }
+ }
+
+ /* Update each VSI */
+ for (v = 0; v < pf->num_alloc_vsi; v++) {
+ if (!pf->vsi[v])
+ continue;
+
+ /* - Enable all TCs for the LAN VSI
+ * - For all others keep them at TC0 for now
+ */
+ if (v == pf->lan_vsi)
+ tc_map = i40e_pf_get_tc_map(pf);
+ else
+ tc_map = I40E_DEFAULT_TRAFFIC_CLASS;
+
+ ret = i40e_vsi_config_tc(pf->vsi[v], tc_map);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+ "Failed configuring TC for VSI seid=%d\n",
+ pf->vsi[v]->seid);
+ /* Will try to configure as many components */
+ } else {
+ /* Re-configure VSI vectors based on updated TC map */
+ i40e_vsi_map_rings_to_vectors(pf->vsi[v]);
+ if (pf->vsi[v]->netdev)
+ i40e_dcbnl_set_all(pf->vsi[v]);
+ }
+ }
+ }
+
+ /**
+ * i40e_resume_port_tx - Resume port Tx
+ * @pf: PF struct
+ *
+ * Resume a port's Tx and issue a PF reset in case of failure to
+ * resume.
+ **/
+ static int i40e_resume_port_tx(struct i40e_pf *pf)
+ {
+ struct i40e_hw *hw = &pf->hw;
+ int ret;
+
+ ret = i40e_aq_resume_port_tx(hw, NULL);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+ "Resume Port Tx failed, err %s aq_err %s\n",
+ i40e_stat_str(&pf->hw, ret),
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ /* Schedule PF reset to recover */
+ set_bit(__I40E_PF_RESET_REQUESTED, pf->state);
+ i40e_service_event_schedule(pf);
+ }
+
+ return ret;
+ }
+
+ /**
+ * i40e_init_pf_dcb - Initialize DCB configuration
+ * @pf: PF being configured
+ *
+ * Query the current DCB configuration and cache it
+ * in the hardware structure
+ **/
+ static int i40e_init_pf_dcb(struct i40e_pf *pf)
+ {
+ struct i40e_hw *hw = &pf->hw;
+ int err = 0;
+
+ /* Do not enable DCB for SW1 and SW2 images even if the FW is capable */
+ if (pf->hw_features & I40E_HW_NO_DCB_SUPPORT)
+ goto out;
+
+ /* Get the initial DCB configuration */
+ err = i40e_init_dcb(hw);
+ if (!err) {
+ /* Device/Function is not DCBX capable */
+ if ((!hw->func_caps.dcb) ||
+ (hw->dcbx_status == I40E_DCBX_STATUS_DISABLED)) {
+ dev_info(&pf->pdev->dev,
+ "DCBX offload is not supported or is disabled for this PF.\n");
+ } else {
+ /* When status is not DISABLED then DCBX in FW */
+ pf->dcbx_cap = DCB_CAP_DCBX_LLD_MANAGED |
+ DCB_CAP_DCBX_VER_IEEE;
+
+ pf->flags |= I40E_FLAG_DCB_CAPABLE;
+ /* Enable DCB tagging only when more than one TC
+ * or explicitly disable if only one TC
+ */
+ if (i40e_dcb_get_num_tc(&hw->local_dcbx_config) > 1)
+ pf->flags |= I40E_FLAG_DCB_ENABLED;
+ else
+ pf->flags &= ~I40E_FLAG_DCB_ENABLED;
+ dev_dbg(&pf->pdev->dev,
+ "DCBX offload is supported for this PF.\n");
+ }
+ } else {
+ dev_info(&pf->pdev->dev,
+ "Query for DCB configuration failed, err %s aq_err %s\n",
+ i40e_stat_str(&pf->hw, err),
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ }
+
+ out:
+ return err;
+ }
+ #endif /* CONFIG_I40E_DCB */
+ #define SPEED_SIZE 14
+ #define FC_SIZE 8
+ /**
+ * i40e_print_link_message - print link up or down
+ * @vsi: the VSI for which link needs a message
+ */
+ void i40e_print_link_message(struct i40e_vsi *vsi, bool isup)
+ {
+ enum i40e_aq_link_speed new_speed;
+ struct i40e_pf *pf = vsi->back;
+ char *speed = "Unknown";
+ char *fc = "Unknown";
+ char *fec = "";
+ char *req_fec = "";
+ char *an = "";
+
+ new_speed = pf->hw.phy.link_info.link_speed;
+
+ if ((vsi->current_isup == isup) && (vsi->current_speed == new_speed))
+ return;
+ vsi->current_isup = isup;
+ vsi->current_speed = new_speed;
+ if (!isup) {
+ netdev_info(vsi->netdev, "NIC Link is Down\n");
+ return;
+ }
+
+ /* Warn user if link speed on NPAR enabled partition is not at
+ * least 10GB
+ */
+ if (pf->hw.func_caps.npar_enable &&
+ (pf->hw.phy.link_info.link_speed == I40E_LINK_SPEED_1GB ||
+ pf->hw.phy.link_info.link_speed == I40E_LINK_SPEED_100MB))
+ netdev_warn(vsi->netdev,
+ "The partition detected link speed that is less than 10Gbps\n");
+
+ switch (pf->hw.phy.link_info.link_speed) {
+ case I40E_LINK_SPEED_40GB:
+ speed = "40 G";
+ break;
+ case I40E_LINK_SPEED_20GB:
+ speed = "20 G";
+ break;
+ case I40E_LINK_SPEED_25GB:
+ speed = "25 G";
+ break;
+ case I40E_LINK_SPEED_10GB:
+ speed = "10 G";
+ break;
+ case I40E_LINK_SPEED_1GB:
+ speed = "1000 M";
+ break;
+ case I40E_LINK_SPEED_100MB:
+ speed = "100 M";
+ break;
+ default:
+ break;
+ }
+
+ switch (pf->hw.fc.current_mode) {
+ case I40E_FC_FULL:
+ fc = "RX/TX";
+ break;
+ case I40E_FC_TX_PAUSE:
+ fc = "TX";
+ break;
+ case I40E_FC_RX_PAUSE:
+ fc = "RX";
+ break;
+ default:
+ fc = "None";
+ break;
+ }
+
+ if (pf->hw.phy.link_info.link_speed == I40E_LINK_SPEED_25GB) {
+ req_fec = ", Requested FEC: None";
+ fec = ", FEC: None";
+ an = ", Autoneg: False";
+
+ if (pf->hw.phy.link_info.an_info & I40E_AQ_AN_COMPLETED)
+ an = ", Autoneg: True";
+
+ if (pf->hw.phy.link_info.fec_info &
+ I40E_AQ_CONFIG_FEC_KR_ENA)
+ fec = ", FEC: CL74 FC-FEC/BASE-R";
+ else if (pf->hw.phy.link_info.fec_info &
+ I40E_AQ_CONFIG_FEC_RS_ENA)
+ fec = ", FEC: CL108 RS-FEC";
+
+ /* 'CL108 RS-FEC' should be displayed when RS is requested, or
+ * both RS and FC are requested
+ */
+ if (vsi->back->hw.phy.link_info.req_fec_info &
+ (I40E_AQ_REQUEST_FEC_KR | I40E_AQ_REQUEST_FEC_RS)) {
+ if (vsi->back->hw.phy.link_info.req_fec_info &
+ I40E_AQ_REQUEST_FEC_RS)
+ req_fec = ", Requested FEC: CL108 RS-FEC";
+ else
+ req_fec = ", Requested FEC: CL74 FC-FEC/BASE-R";
+ }
+ }
+
+ netdev_info(vsi->netdev, "NIC Link is Up, %sbps Full Duplex%s%s%s, Flow Control: %s\n",
+ speed, req_fec, fec, an, fc);
+ }
+
+ /**
+ * i40e_up_complete - Finish the last steps of bringing up a connection
+ * @vsi: the VSI being configured
+ **/
+ static int i40e_up_complete(struct i40e_vsi *vsi)
+ {
+ struct i40e_pf *pf = vsi->back;
+ int err;
+
+ if (pf->flags & I40E_FLAG_MSIX_ENABLED)
+ i40e_vsi_configure_msix(vsi);
+ else
+ i40e_configure_msi_and_legacy(vsi);
+
+ /* start rings */
+ err = i40e_vsi_start_rings(vsi);
+ if (err)
+ return err;
+
+ clear_bit(__I40E_VSI_DOWN, vsi->state);
+ i40e_napi_enable_all(vsi);
+ i40e_vsi_enable_irq(vsi);
+
+ if ((pf->hw.phy.link_info.link_info & I40E_AQ_LINK_UP) &&
+ (vsi->netdev)) {
+ i40e_print_link_message(vsi, true);
+ netif_tx_start_all_queues(vsi->netdev);
+ netif_carrier_on(vsi->netdev);
+ }
+
+ /* replay FDIR SB filters */
+ if (vsi->type == I40E_VSI_FDIR) {
+ /* reset fd counters */
+ pf->fd_add_err = 0;
+ pf->fd_atr_cnt = 0;
+ i40e_fdir_filter_restore(vsi);
+ }
+
+ /* On the next run of the service_task, notify any clients of the new
+ * opened netdev
+ */
+ pf->flags |= I40E_FLAG_SERVICE_CLIENT_REQUESTED;
+ i40e_service_event_schedule(pf);
+
+ return 0;
+ }
+
+ /**
+ * i40e_vsi_reinit_locked - Reset the VSI
+ * @vsi: the VSI being configured
+ *
+ * Rebuild the ring structs after some configuration
+ * has changed, e.g. MTU size.
+ **/
+ static void i40e_vsi_reinit_locked(struct i40e_vsi *vsi)
+ {
+ struct i40e_pf *pf = vsi->back;
+
+ WARN_ON(in_interrupt());
+ while (test_and_set_bit(__I40E_CONFIG_BUSY, pf->state))
+ usleep_range(1000, 2000);
+ i40e_down(vsi);
+
+ i40e_up(vsi);
+ clear_bit(__I40E_CONFIG_BUSY, pf->state);
+ }
+
+ /**
+ * i40e_up - Bring the connection back up after being down
+ * @vsi: the VSI being configured
+ **/
+ int i40e_up(struct i40e_vsi *vsi)
+ {
+ int err;
+
+ err = i40e_vsi_configure(vsi);
+ if (!err)
+ err = i40e_up_complete(vsi);
+
+ return err;
+ }
+
+ /**
+ * i40e_down - Shutdown the connection processing
+ * @vsi: the VSI being stopped
+ **/
+ void i40e_down(struct i40e_vsi *vsi)
+ {
+ int i;
+
+ /* It is assumed that the caller of this function
+ * sets the vsi->state __I40E_VSI_DOWN bit.
+ */
+ if (vsi->netdev) {
+ netif_carrier_off(vsi->netdev);
+ netif_tx_disable(vsi->netdev);
+ }
+ i40e_vsi_disable_irq(vsi);
+ i40e_vsi_stop_rings(vsi);
+ i40e_napi_disable_all(vsi);
+
+ for (i = 0; i < vsi->num_queue_pairs; i++) {
+ i40e_clean_tx_ring(vsi->tx_rings[i]);
+ if (i40e_enabled_xdp_vsi(vsi))
+ i40e_clean_tx_ring(vsi->xdp_rings[i]);
+ i40e_clean_rx_ring(vsi->rx_rings[i]);
+ }
+
+ }
+
+ /**
+ * i40e_validate_mqprio_qopt- validate queue mapping info
+ * @vsi: the VSI being configured
+ * @mqprio_qopt: queue parametrs
+ **/
+ static int i40e_validate_mqprio_qopt(struct i40e_vsi *vsi,
+ struct tc_mqprio_qopt_offload *mqprio_qopt)
+ {
+ u64 sum_max_rate = 0;
+ u64 max_rate = 0;
+ int i;
+
+ if (mqprio_qopt->qopt.offset[0] != 0 ||
+ mqprio_qopt->qopt.num_tc < 1 ||
+ mqprio_qopt->qopt.num_tc > I40E_MAX_TRAFFIC_CLASS)
+ return -EINVAL;
+ for (i = 0; ; i++) {
+ if (!mqprio_qopt->qopt.count[i])
+ return -EINVAL;
+ if (mqprio_qopt->min_rate[i]) {
+ dev_err(&vsi->back->pdev->dev,
+ "Invalid min tx rate (greater than 0) specified\n");
+ return -EINVAL;
+ }
+ max_rate = mqprio_qopt->max_rate[i];
+ do_div(max_rate, I40E_BW_MBPS_DIVISOR);
+ sum_max_rate += max_rate;
+
+ if (i >= mqprio_qopt->qopt.num_tc - 1)
+ break;
+ if (mqprio_qopt->qopt.offset[i + 1] !=
+ (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i]))
+ return -EINVAL;
+ }
+ if (vsi->num_queue_pairs <
+ (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i])) {
+ return -EINVAL;
+ }
+ if (sum_max_rate > i40e_get_link_speed(vsi)) {
+ dev_err(&vsi->back->pdev->dev,
+ "Invalid max tx rate specified\n");
+ return -EINVAL;
+ }
+ return 0;
+ }
+
+ /**
+ * i40e_vsi_set_default_tc_config - set default values for tc configuration
+ * @vsi: the VSI being configured
+ **/
+ static void i40e_vsi_set_default_tc_config(struct i40e_vsi *vsi)
+ {
+ u16 qcount;
+ int i;
+
+ /* Only TC0 is enabled */
+ vsi->tc_config.numtc = 1;
+ vsi->tc_config.enabled_tc = 1;
+ qcount = min_t(int, vsi->alloc_queue_pairs,
+ i40e_pf_get_max_q_per_tc(vsi->back));
+ for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+ /* For the TC that is not enabled set the offset to to default
+ * queue and allocate one queue for the given TC.
+ */
+ vsi->tc_config.tc_info[i].qoffset = 0;
+ if (i == 0)
+ vsi->tc_config.tc_info[i].qcount = qcount;
+ else
+ vsi->tc_config.tc_info[i].qcount = 1;
+ vsi->tc_config.tc_info[i].netdev_tc = 0;
+ }
+ }
+
+ /**
+ * i40e_setup_tc - configure multiple traffic classes
+ * @netdev: net device to configure
+ * @type_data: tc offload data
+ **/
+ static int i40e_setup_tc(struct net_device *netdev, void *type_data)
+ {
+ struct tc_mqprio_qopt_offload *mqprio_qopt = type_data;
+ struct i40e_netdev_priv *np = netdev_priv(netdev);
+ struct i40e_vsi *vsi = np->vsi;
+ struct i40e_pf *pf = vsi->back;
+ u8 enabled_tc = 0, num_tc, hw;
+ bool need_reset = false;
+ int ret = -EINVAL;
+ u16 mode;
+ int i;
+
+ num_tc = mqprio_qopt->qopt.num_tc;
+ hw = mqprio_qopt->qopt.hw;
+ mode = mqprio_qopt->mode;
+ if (!hw) {
+ pf->flags &= ~I40E_FLAG_TC_MQPRIO;
+ memcpy(&vsi->mqprio_qopt, mqprio_qopt, sizeof(*mqprio_qopt));
+ goto config_tc;
+ }
+
+ /* Check if MFP enabled */
+ if (pf->flags & I40E_FLAG_MFP_ENABLED) {
+ netdev_info(netdev,
+ "Configuring TC not supported in MFP mode\n");
+ return ret;
+ }
+ switch (mode) {
+ case TC_MQPRIO_MODE_DCB:
+ pf->flags &= ~I40E_FLAG_TC_MQPRIO;
+
+ /* Check if DCB enabled to continue */
+ if (!(pf->flags & I40E_FLAG_DCB_ENABLED)) {
+ netdev_info(netdev,
+ "DCB is not enabled for adapter\n");
+ return ret;
+ }
+
+ /* Check whether tc count is within enabled limit */
+ if (num_tc > i40e_pf_get_num_tc(pf)) {
+ netdev_info(netdev,
+ "TC count greater than enabled on link for adapter\n");
+ return ret;
+ }
+ break;
+ case TC_MQPRIO_MODE_CHANNEL:
+ if (pf->flags & I40E_FLAG_DCB_ENABLED) {
+ netdev_info(netdev,
+ "Full offload of TC Mqprio options is not supported when DCB is enabled\n");
+ return ret;
+ }
+ if (!(pf->flags & I40E_FLAG_MSIX_ENABLED))
+ return ret;
+ ret = i40e_validate_mqprio_qopt(vsi, mqprio_qopt);
+ if (ret)
+ return ret;
+ memcpy(&vsi->mqprio_qopt, mqprio_qopt,
+ sizeof(*mqprio_qopt));
+ pf->flags |= I40E_FLAG_TC_MQPRIO;
+ pf->flags &= ~I40E_FLAG_DCB_ENABLED;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ config_tc:
+ /* Generate TC map for number of tc requested */
+ for (i = 0; i < num_tc; i++)
+ enabled_tc |= BIT(i);
+
+ /* Requesting same TC configuration as already enabled */
+ if (enabled_tc == vsi->tc_config.enabled_tc &&
+ mode != TC_MQPRIO_MODE_CHANNEL)
+ return 0;
+
+ /* Quiesce VSI queues */
+ i40e_quiesce_vsi(vsi);
+
+ if (!hw && !(pf->flags & I40E_FLAG_TC_MQPRIO))
+ i40e_remove_queue_channels(vsi);
+
+ /* Configure VSI for enabled TCs */
+ ret = i40e_vsi_config_tc(vsi, enabled_tc);
+ if (ret) {
+ netdev_info(netdev, "Failed configuring TC for VSI seid=%d\n",
+ vsi->seid);
+ need_reset = true;
+ goto exit;
+ }
+
+ if (pf->flags & I40E_FLAG_TC_MQPRIO) {
+ if (vsi->mqprio_qopt.max_rate[0]) {
+ u64 max_tx_rate = vsi->mqprio_qopt.max_rate[0];
+
+ do_div(max_tx_rate, I40E_BW_MBPS_DIVISOR);
+ ret = i40e_set_bw_limit(vsi, vsi->seid, max_tx_rate);
+ if (!ret) {
+ u64 credits = max_tx_rate;
+
+ do_div(credits, I40E_BW_CREDIT_DIVISOR);
+ dev_dbg(&vsi->back->pdev->dev,
+ "Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n",
+ max_tx_rate,
+ credits,
+ vsi->seid);
+ } else {
+ need_reset = true;
+ goto exit;
+ }
+ }
+ ret = i40e_configure_queue_channels(vsi);
if (ret) {
- dev_info(&pf->pdev->dev,
- "Failed configuring TC for VEB seid=%d\n",
- pf->veb[v]->seid);
- /* Will try to configure as many components */
+ netdev_info(netdev,
+ "Failed configuring queue channels\n");
+ need_reset = true;
+ goto exit;
}
}
- /* Update each VSI */
- for (v = 0; v < pf->num_alloc_vsi; v++) {
- if (!pf->vsi[v])
- continue;
+ exit:
+ /* Reset the configuration data to defaults, only TC0 is enabled */
+ if (need_reset) {
+ i40e_vsi_set_default_tc_config(vsi);
+ need_reset = false;
+ }
- /* - Enable all TCs for the LAN VSI
- * - For all others keep them at TC0 for now
- */
- if (v == pf->lan_vsi)
- tc_map = i40e_pf_get_tc_map(pf);
- else
- tc_map = I40E_DEFAULT_TRAFFIC_CLASS;
+ /* Unquiesce VSI */
+ i40e_unquiesce_vsi(vsi);
+ return ret;
+ }
- ret = i40e_vsi_config_tc(pf->vsi[v], tc_map);
- if (ret) {
- dev_info(&pf->pdev->dev,
- "Failed configuring TC for VSI seid=%d\n",
- pf->vsi[v]->seid);
- /* Will try to configure as many components */
- } else {
- /* Re-configure VSI vectors based on updated TC map */
- i40e_vsi_map_rings_to_vectors(pf->vsi[v]);
- if (pf->vsi[v]->netdev)
- i40e_dcbnl_set_all(pf->vsi[v]);
+ /**
+ * i40e_set_cld_element - sets cloud filter element data
+ * @filter: cloud filter rule
+ * @cld: ptr to cloud filter element data
+ *
+ * This is helper function to copy data into cloud filter element
+ **/
+ static inline void
+ i40e_set_cld_element(struct i40e_cloud_filter *filter,
+ struct i40e_aqc_cloud_filters_element_data *cld)
+ {
+ int i, j;
+ u32 ipa;
+
+ memset(cld, 0, sizeof(*cld));
+ ether_addr_copy(cld->outer_mac, filter->dst_mac);
+ ether_addr_copy(cld->inner_mac, filter->src_mac);
+
+ if (filter->n_proto != ETH_P_IP && filter->n_proto != ETH_P_IPV6)
+ return;
+
+ if (filter->n_proto == ETH_P_IPV6) {
+ #define IPV6_MAX_INDEX (ARRAY_SIZE(filter->dst_ipv6) - 1)
+ for (i = 0, j = 0; i < ARRAY_SIZE(filter->dst_ipv6);
+ i++, j += 2) {
+ ipa = be32_to_cpu(filter->dst_ipv6[IPV6_MAX_INDEX - i]);
+ ipa = cpu_to_le32(ipa);
+ memcpy(&cld->ipaddr.raw_v6.data[j], &ipa, sizeof(ipa));
}
+ } else {
+ ipa = be32_to_cpu(filter->dst_ipv4);
+ memcpy(&cld->ipaddr.v4.data, &ipa, sizeof(ipa));
}
+
+ cld->inner_vlan = cpu_to_le16(ntohs(filter->vlan_id));
+
+ /* tenant_id is not supported by FW now, once the support is enabled
+ * fill the cld->tenant_id with cpu_to_le32(filter->tenant_id)
+ */
+ if (filter->tenant_id)
+ return;
}
/**
- * i40e_resume_port_tx - Resume port Tx
- * @pf: PF struct
+ * i40e_add_del_cloud_filter - Add/del cloud filter
+ * @vsi: pointer to VSI
+ * @filter: cloud filter rule
+ * @add: if true, add, if false, delete
*
- * Resume a port's Tx and issue a PF reset in case of failure to
- * resume.
+ * Add or delete a cloud filter for a specific flow spec.
+ * Returns 0 if the filter were successfully added.
**/
- static int i40e_resume_port_tx(struct i40e_pf *pf)
+ static int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,
+ struct i40e_cloud_filter *filter, bool add)
{
- struct i40e_hw *hw = &pf->hw;
+ struct i40e_aqc_cloud_filters_element_data cld_filter;
+ struct i40e_pf *pf = vsi->back;
int ret;
+ static const u16 flag_table[128] = {
+ [I40E_CLOUD_FILTER_FLAGS_OMAC] =
+ I40E_AQC_ADD_CLOUD_FILTER_OMAC,
+ [I40E_CLOUD_FILTER_FLAGS_IMAC] =
+ I40E_AQC_ADD_CLOUD_FILTER_IMAC,
+ [I40E_CLOUD_FILTER_FLAGS_IMAC_IVLAN] =
+ I40E_AQC_ADD_CLOUD_FILTER_IMAC_IVLAN,
+ [I40E_CLOUD_FILTER_FLAGS_IMAC_TEN_ID] =
+ I40E_AQC_ADD_CLOUD_FILTER_IMAC_TEN_ID,
+ [I40E_CLOUD_FILTER_FLAGS_OMAC_TEN_ID_IMAC] =
+ I40E_AQC_ADD_CLOUD_FILTER_OMAC_TEN_ID_IMAC,
+ [I40E_CLOUD_FILTER_FLAGS_IMAC_IVLAN_TEN_ID] =
+ I40E_AQC_ADD_CLOUD_FILTER_IMAC_IVLAN_TEN_ID,
+ [I40E_CLOUD_FILTER_FLAGS_IIP] =
+ I40E_AQC_ADD_CLOUD_FILTER_IIP,
+ };
+
+ if (filter->flags >= ARRAY_SIZE(flag_table))
+ return I40E_ERR_CONFIG;
+
+ /* copy element needed to add cloud filter from filter */
+ i40e_set_cld_element(filter, &cld_filter);
+
+ if (filter->tunnel_type != I40E_CLOUD_TNL_TYPE_NONE)
+ cld_filter.flags = cpu_to_le16(filter->tunnel_type <<
+ I40E_AQC_ADD_CLOUD_TNL_TYPE_SHIFT);
+
+ if (filter->n_proto == ETH_P_IPV6)
+ cld_filter.flags |= cpu_to_le16(flag_table[filter->flags] |
+ I40E_AQC_ADD_CLOUD_FLAGS_IPV6);
+ else
+ cld_filter.flags |= cpu_to_le16(flag_table[filter->flags] |
+ I40E_AQC_ADD_CLOUD_FLAGS_IPV4);
- ret = i40e_aq_resume_port_tx(hw, NULL);
- if (ret) {
+ if (add)
+ ret = i40e_aq_add_cloud_filters(&pf->hw, filter->seid,
+ &cld_filter, 1);
+ else
+ ret = i40e_aq_rem_cloud_filters(&pf->hw, filter->seid,
+ &cld_filter, 1);
+ if (ret)
+ dev_dbg(&pf->pdev->dev,
+ "Failed to %s cloud filter using l4 port %u, err %d aq_err %d\n",
+ add ? "add" : "delete", filter->dst_port, ret,
+ pf->hw.aq.asq_last_status);
+ else
dev_info(&pf->pdev->dev,
- "Resume Port Tx failed, err %s aq_err %s\n",
- i40e_stat_str(&pf->hw, ret),
- i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
- /* Schedule PF reset to recover */
- set_bit(__I40E_PF_RESET_REQUESTED, pf->state);
- i40e_service_event_schedule(pf);
- }
-
+ "%s cloud filter for VSI: %d\n",
+ add ? "Added" : "Deleted", filter->seid);
return ret;
}
/**
- * i40e_init_pf_dcb - Initialize DCB configuration
- * @pf: PF being configured
+ * i40e_add_del_cloud_filter_big_buf - Add/del cloud filter using big_buf
+ * @vsi: pointer to VSI
+ * @filter: cloud filter rule
+ * @add: if true, add, if false, delete
*
- * Query the current DCB configuration and cache it
- * in the hardware structure
+ * Add or delete a cloud filter for a specific flow spec using big buffer.
+ * Returns 0 if the filter were successfully added.
**/
- static int i40e_init_pf_dcb(struct i40e_pf *pf)
+ static int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
+ struct i40e_cloud_filter *filter,
+ bool add)
{
- struct i40e_hw *hw = &pf->hw;
- int err = 0;
+ struct i40e_aqc_cloud_filters_element_bb cld_filter;
+ struct i40e_pf *pf = vsi->back;
+ int ret;
- /* Do not enable DCB for SW1 and SW2 images even if the FW is capable */
- if (pf->hw_features & I40E_HW_NO_DCB_SUPPORT)
- goto out;
+ /* Both (src/dst) valid mac_addr are not supported */
+ if ((is_valid_ether_addr(filter->dst_mac) &&
+ is_valid_ether_addr(filter->src_mac)) ||
+ (is_multicast_ether_addr(filter->dst_mac) &&
+ is_multicast_ether_addr(filter->src_mac)))
+ return -EINVAL;
- /* Get the initial DCB configuration */
- err = i40e_init_dcb(hw);
- if (!err) {
- /* Device/Function is not DCBX capable */
- if ((!hw->func_caps.dcb) ||
- (hw->dcbx_status == I40E_DCBX_STATUS_DISABLED)) {
- dev_info(&pf->pdev->dev,
- "DCBX offload is not supported or is disabled for this PF.\n");
- } else {
- /* When status is not DISABLED then DCBX in FW */
- pf->dcbx_cap = DCB_CAP_DCBX_LLD_MANAGED |
- DCB_CAP_DCBX_VER_IEEE;
+ /* Make sure port is specified, otherwise bail out, for channel
+ * specific cloud filter needs 'L4 port' to be non-zero
+ */
+ if (!filter->dst_port)
+ return -EINVAL;
- pf->flags |= I40E_FLAG_DCB_CAPABLE;
- /* Enable DCB tagging only when more than one TC
- * or explicitly disable if only one TC
- */
- if (i40e_dcb_get_num_tc(&hw->local_dcbx_config) > 1)
- pf->flags |= I40E_FLAG_DCB_ENABLED;
- else
- pf->flags &= ~I40E_FLAG_DCB_ENABLED;
- dev_dbg(&pf->pdev->dev,
- "DCBX offload is supported for this PF.\n");
+ /* adding filter using src_port/src_ip is not supported at this stage */
+ if (filter->src_port || filter->src_ipv4 ||
+ !ipv6_addr_any(&filter->ip.v6.src_ip6))
+ return -EINVAL;
+
+ /* copy element needed to add cloud filter from filter */
+ i40e_set_cld_element(filter, &cld_filter.element);
+
+ if (is_valid_ether_addr(filter->dst_mac) ||
+ is_valid_ether_addr(filter->src_mac) ||
+ is_multicast_ether_addr(filter->dst_mac) ||
+ is_multicast_ether_addr(filter->src_mac)) {
+ /* MAC + IP : unsupported mode */
+ if (filter->dst_ipv4)
+ return -EINVAL;
+
+ /* since we validated that L4 port must be valid before
+ * we get here, start with respective "flags" value
+ * and update if vlan is present or not
+ */
+ cld_filter.element.flags =
+ cpu_to_le16(I40E_AQC_ADD_CLOUD_FILTER_MAC_PORT);
+
+ if (filter->vlan_id) {
+ cld_filter.element.flags =
+ cpu_to_le16(I40E_AQC_ADD_CLOUD_FILTER_MAC_VLAN_PORT);
}
+
+ } else if (filter->dst_ipv4 ||
+ !ipv6_addr_any(&filter->ip.v6.dst_ip6)) {
+ cld_filter.element.flags =
+ cpu_to_le16(I40E_AQC_ADD_CLOUD_FILTER_IP_PORT);
+ if (filter->n_proto == ETH_P_IPV6)
+ cld_filter.element.flags |=
+ cpu_to_le16(I40E_AQC_ADD_CLOUD_FLAGS_IPV6);
+ else
+ cld_filter.element.flags |=
+ cpu_to_le16(I40E_AQC_ADD_CLOUD_FLAGS_IPV4);
} else {
- dev_info(&pf->pdev->dev,
- "Query for DCB configuration failed, err %s aq_err %s\n",
- i40e_stat_str(&pf->hw, err),
- i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ dev_err(&pf->pdev->dev,
+ "either mac or ip has to be valid for cloud filter\n");
+ return -EINVAL;
}
- out:
- return err;
+ /* Now copy L4 port in Byte 6..7 in general fields */
+ cld_filter.general_fields[I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD0] =
+ be16_to_cpu(filter->dst_port);
+
+ if (add) {
+ /* Validate current device switch mode, change if necessary */
+ ret = i40e_validate_and_set_switch_mode(vsi);
+ if (ret) {
+ dev_err(&pf->pdev->dev,
+ "failed to set switch mode, ret %d\n",
+ ret);
+ return ret;
+ }
+
+ ret = i40e_aq_add_cloud_filters_bb(&pf->hw, filter->seid,
+ &cld_filter, 1);
+ } else {
+ ret = i40e_aq_rem_cloud_filters_bb(&pf->hw, filter->seid,
+ &cld_filter, 1);
+ }
+
+ if (ret)
+ dev_dbg(&pf->pdev->dev,
+ "Failed to %s cloud filter(big buffer) err %d aq_err %d\n",
+ add ? "add" : "delete", ret, pf->hw.aq.asq_last_status);
+ else
+ dev_info(&pf->pdev->dev,
+ "%s cloud filter for VSI: %d, L4 port: %d\n",
+ add ? "add" : "delete", filter->seid,
+ ntohs(filter->dst_port));
+ return ret;
}
- #endif /* CONFIG_I40E_DCB */
- #define SPEED_SIZE 14
- #define FC_SIZE 8
+
/**
- * i40e_print_link_message - print link up or down
- * @vsi: the VSI for which link needs a message
- */
- void i40e_print_link_message(struct i40e_vsi *vsi, bool isup)
+ * i40e_parse_cls_flower - Parse tc flower filters provided by kernel
+ * @vsi: Pointer to VSI
+ * @cls_flower: Pointer to struct tc_cls_flower_offload
+ * @filter: Pointer to cloud filter structure
+ *
+ **/
+ static int i40e_parse_cls_flower(struct i40e_vsi *vsi,
+ struct tc_cls_flower_offload *f,
+ struct i40e_cloud_filter *filter)
{
- enum i40e_aq_link_speed new_speed;
- char *speed = "Unknown";
- char *fc = "Unknown";
- char *fec = "";
- char *req_fec = "";
- char *an = "";
+ u16 n_proto_mask = 0, n_proto_key = 0, addr_type = 0;
+ struct i40e_pf *pf = vsi->back;
+ u8 field_flags = 0;
+
+ if (f->dissector->used_keys &
+ ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
+ BIT(FLOW_DISSECTOR_KEY_BASIC) |
+ BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_VLAN) |
+ BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_PORTS) |
+ BIT(FLOW_DISSECTOR_KEY_ENC_KEYID))) {
+ dev_err(&pf->pdev->dev, "Unsupported key used: 0x%x\n",
+ f->dissector->used_keys);
+ return -EOPNOTSUPP;
+ }
- new_speed = vsi->back->hw.phy.link_info.link_speed;
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+ struct flow_dissector_key_keyid *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ENC_KEYID,
+ f->key);
- if ((vsi->current_isup == isup) && (vsi->current_speed == new_speed))
- return;
- vsi->current_isup = isup;
- vsi->current_speed = new_speed;
- if (!isup) {
- netdev_info(vsi->netdev, "NIC Link is Down\n");
- return;
+ struct flow_dissector_key_keyid *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ENC_KEYID,
+ f->mask);
+
+ if (mask->keyid != 0)
+ field_flags |= I40E_CLOUD_FIELD_TEN_ID;
+
+ filter->tenant_id = be32_to_cpu(key->keyid);
}
- /* Warn user if link speed on NPAR enabled partition is not at
- * least 10GB
- */
- if (vsi->back->hw.func_caps.npar_enable &&
- (vsi->back->hw.phy.link_info.link_speed == I40E_LINK_SPEED_1GB ||
- vsi->back->hw.phy.link_info.link_speed == I40E_LINK_SPEED_100MB))
- netdev_warn(vsi->netdev,
- "The partition detected link speed that is less than 10Gbps\n");
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
+ struct flow_dissector_key_basic *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_BASIC,
+ f->key);
- switch (vsi->back->hw.phy.link_info.link_speed) {
- case I40E_LINK_SPEED_40GB:
- speed = "40 G";
- break;
- case I40E_LINK_SPEED_20GB:
- speed = "20 G";
- break;
- case I40E_LINK_SPEED_25GB:
- speed = "25 G";
- break;
- case I40E_LINK_SPEED_10GB:
- speed = "10 G";
- break;
- case I40E_LINK_SPEED_1GB:
- speed = "1000 M";
- break;
- case I40E_LINK_SPEED_100MB:
- speed = "100 M";
- break;
- default:
- break;
+ struct flow_dissector_key_basic *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_BASIC,
+ f->mask);
+
+ n_proto_key = ntohs(key->n_proto);
+ n_proto_mask = ntohs(mask->n_proto);
+
+ if (n_proto_key == ETH_P_ALL) {
+ n_proto_key = 0;
+ n_proto_mask = 0;
+ }
+ filter->n_proto = n_proto_key & n_proto_mask;
+ filter->ip_proto = key->ip_proto;
}
- switch (vsi->back->hw.fc.current_mode) {
- case I40E_FC_FULL:
- fc = "RX/TX";
- break;
- case I40E_FC_TX_PAUSE:
- fc = "TX";
- break;
- case I40E_FC_RX_PAUSE:
- fc = "RX";
- break;
- default:
- fc = "None";
- break;
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+ struct flow_dissector_key_eth_addrs *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ETH_ADDRS,
+ f->key);
+
+ struct flow_dissector_key_eth_addrs *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ETH_ADDRS,
+ f->mask);
+
+ /* use is_broadcast and is_zero to check for all 0xf or 0 */
+ if (!is_zero_ether_addr(mask->dst)) {
+ if (is_broadcast_ether_addr(mask->dst)) {
+ field_flags |= I40E_CLOUD_FIELD_OMAC;
+ } else {
+ dev_err(&pf->pdev->dev, "Bad ether dest mask %pM\n",
+ mask->dst);
+ return I40E_ERR_CONFIG;
+ }
+ }
+
+ if (!is_zero_ether_addr(mask->src)) {
+ if (is_broadcast_ether_addr(mask->src)) {
+ field_flags |= I40E_CLOUD_FIELD_IMAC;
+ } else {
+ dev_err(&pf->pdev->dev, "Bad ether src mask %pM\n",
+ mask->src);
+ return I40E_ERR_CONFIG;
+ }
+ }
+ ether_addr_copy(filter->dst_mac, key->dst);
+ ether_addr_copy(filter->src_mac, key->src);
}
- if (vsi->back->hw.phy.link_info.link_speed == I40E_LINK_SPEED_25GB) {
- req_fec = ", Requested FEC: None";
- fec = ", FEC: None";
- an = ", Autoneg: False";
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
+ struct flow_dissector_key_vlan *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_VLAN,
+ f->key);
+ struct flow_dissector_key_vlan *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_VLAN,
+ f->mask);
- if (vsi->back->hw.phy.link_info.an_info & I40E_AQ_AN_COMPLETED)
- an = ", Autoneg: True";
+ if (mask->vlan_id) {
+ if (mask->vlan_id == VLAN_VID_MASK) {
+ field_flags |= I40E_CLOUD_FIELD_IVLAN;
- if (vsi->back->hw.phy.link_info.fec_info &
- I40E_AQ_CONFIG_FEC_KR_ENA)
- fec = ", FEC: CL74 FC-FEC/BASE-R";
- else if (vsi->back->hw.phy.link_info.fec_info &
- I40E_AQ_CONFIG_FEC_RS_ENA)
- fec = ", FEC: CL108 RS-FEC";
+ } else {
+ dev_err(&pf->pdev->dev, "Bad vlan mask 0x%04x\n",
+ mask->vlan_id);
+ return I40E_ERR_CONFIG;
+ }
+ }
- /* 'CL108 RS-FEC' should be displayed when RS is requested, or
- * both RS and FC are requested
+ filter->vlan_id = cpu_to_be16(key->vlan_id);
+ }
+
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
+ struct flow_dissector_key_control *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_CONTROL,
+ f->key);
+
+ addr_type = key->addr_type;
+ }
+
+ if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+ struct flow_dissector_key_ipv4_addrs *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+ f->key);
+ struct flow_dissector_key_ipv4_addrs *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+ f->mask);
+
+ if (mask->dst) {
+ if (mask->dst == cpu_to_be32(0xffffffff)) {
+ field_flags |= I40E_CLOUD_FIELD_IIP;
+ } else {
+ mask->dst = be32_to_cpu(mask->dst);
+ dev_err(&pf->pdev->dev, "Bad ip dst mask %pI4\n",
+ &mask->dst);
+ return I40E_ERR_CONFIG;
+ }
+ }
+
+ if (mask->src) {
+ if (mask->src == cpu_to_be32(0xffffffff)) {
+ field_flags |= I40E_CLOUD_FIELD_IIP;
+ } else {
+ mask->src = be32_to_cpu(mask->src);
+ dev_err(&pf->pdev->dev, "Bad ip src mask %pI4\n",
+ &mask->src);
+ return I40E_ERR_CONFIG;
+ }
+ }
+
+ if (field_flags & I40E_CLOUD_FIELD_TEN_ID) {
+ dev_err(&pf->pdev->dev, "Tenant id not allowed for ip filter\n");
+ return I40E_ERR_CONFIG;
+ }
+ filter->dst_ipv4 = key->dst;
+ filter->src_ipv4 = key->src;
+ }
+
+ if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+ struct flow_dissector_key_ipv6_addrs *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+ f->key);
+ struct flow_dissector_key_ipv6_addrs *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+ f->mask);
+
+ /* src and dest IPV6 address should not be LOOPBACK
+ * (0:0:0:0:0:0:0:1), which can be represented as ::1
*/
- if (vsi->back->hw.phy.link_info.req_fec_info &
- (I40E_AQ_REQUEST_FEC_KR | I40E_AQ_REQUEST_FEC_RS)) {
- if (vsi->back->hw.phy.link_info.req_fec_info &
- I40E_AQ_REQUEST_FEC_RS)
- req_fec = ", Requested FEC: CL108 RS-FEC";
- else
- req_fec = ", Requested FEC: CL74 FC-FEC/BASE-R";
+ if (ipv6_addr_loopback(&key->dst) ||
+ ipv6_addr_loopback(&key->src)) {
+ dev_err(&pf->pdev->dev,
+ "Bad ipv6, addr is LOOPBACK\n");
+ return I40E_ERR_CONFIG;
+ }
+ if (!ipv6_addr_any(&mask->dst) || !ipv6_addr_any(&mask->src))
+ field_flags |= I40E_CLOUD_FIELD_IIP;
+
+ memcpy(&filter->src_ipv6, &key->src.s6_addr32,
+ sizeof(filter->src_ipv6));
+ memcpy(&filter->dst_ipv6, &key->dst.s6_addr32,
+ sizeof(filter->dst_ipv6));
+ }
+
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) {
+ struct flow_dissector_key_ports *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_PORTS,
+ f->key);
+ struct flow_dissector_key_ports *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_PORTS,
+ f->mask);
+
+ if (mask->src) {
+ if (mask->src == cpu_to_be16(0xffff)) {
+ field_flags |= I40E_CLOUD_FIELD_IIP;
+ } else {
+ dev_err(&pf->pdev->dev, "Bad src port mask 0x%04x\n",
+ be16_to_cpu(mask->src));
+ return I40E_ERR_CONFIG;
+ }
+ }
+
+ if (mask->dst) {
+ if (mask->dst == cpu_to_be16(0xffff)) {
+ field_flags |= I40E_CLOUD_FIELD_IIP;
+ } else {
+ dev_err(&pf->pdev->dev, "Bad dst port mask 0x%04x\n",
+ be16_to_cpu(mask->dst));
+ return I40E_ERR_CONFIG;
+ }
+ }
+
+ filter->dst_port = key->dst;
+ filter->src_port = key->src;
+
+ switch (filter->ip_proto) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ break;
+ default:
+ dev_err(&pf->pdev->dev,
+ "Only UDP and TCP transport are supported\n");
+ return -EINVAL;
}
}
+ filter->flags = field_flags;
+ return 0;
+ }
- netdev_info(vsi->netdev, "NIC Link is Up, %sbps Full Duplex%s%s%s, Flow Control: %s\n",
- speed, req_fec, fec, an, fc);
+ /**
+ * i40e_handle_tclass: Forward to a traffic class on the device
+ * @vsi: Pointer to VSI
+ * @tc: traffic class index on the device
+ * @filter: Pointer to cloud filter structure
+ *
+ **/
+ static int i40e_handle_tclass(struct i40e_vsi *vsi, u32 tc,
+ struct i40e_cloud_filter *filter)
+ {
+ struct i40e_channel *ch, *ch_tmp;
+
+ /* direct to a traffic class on the same device */
+ if (tc == 0) {
+ filter->seid = vsi->seid;
+ return 0;
+ } else if (vsi->tc_config.enabled_tc & BIT(tc)) {
+ if (!filter->dst_port) {
+ dev_err(&vsi->back->pdev->dev,
+ "Specify destination port to direct to traffic class that is not default\n");
+ return -EINVAL;
+ }
+ if (list_empty(&vsi->ch_list))
+ return -EINVAL;
+ list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list,
+ list) {
+ if (ch->seid == vsi->tc_seid_map[tc])
+ filter->seid = ch->seid;
+ }
+ return 0;
+ }
+ dev_err(&vsi->back->pdev->dev, "TC is not enabled\n");
+ return -EINVAL;
}
/**
- * i40e_up_complete - Finish the last steps of bringing up a connection
- * @vsi: the VSI being configured
+ * i40e_configure_clsflower - Configure tc flower filters
+ * @vsi: Pointer to VSI
+ * @cls_flower: Pointer to struct tc_cls_flower_offload
+ *
**/
- static int i40e_up_complete(struct i40e_vsi *vsi)
+ static int i40e_configure_clsflower(struct i40e_vsi *vsi,
+ struct tc_cls_flower_offload *cls_flower)
{
+ int tc = tc_classid_to_hwtc(vsi->netdev, cls_flower->classid);
+ struct i40e_cloud_filter *filter = NULL;
struct i40e_pf *pf = vsi->back;
- int err;
+ int err = 0;
- if (pf->flags & I40E_FLAG_MSIX_ENABLED)
- i40e_vsi_configure_msix(vsi);
- else
- i40e_configure_msi_and_legacy(vsi);
+ if (tc < 0) {
+ dev_err(&vsi->back->pdev->dev, "Invalid traffic class\n");
+ return -EINVAL;
+ }
- /* start rings */
- err = i40e_vsi_start_rings(vsi);
- if (err)
- return err;
+ if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) ||
+ test_bit(__I40E_RESET_INTR_RECEIVED, pf->state))
+ return -EBUSY;
- clear_bit(__I40E_VSI_DOWN, vsi->state);
- i40e_napi_enable_all(vsi);
- i40e_vsi_enable_irq(vsi);
+ if (pf->fdir_pf_active_filters ||
+ (!hlist_empty(&pf->fdir_filter_list))) {
+ dev_err(&vsi->back->pdev->dev,
+ "Flow Director Sideband filters exists, turn ntuple off to configure cloud filters\n");
+ return -EINVAL;
+ }
- if ((pf->hw.phy.link_info.link_info & I40E_AQ_LINK_UP) &&
- (vsi->netdev)) {
- i40e_print_link_message(vsi, true);
- netif_tx_start_all_queues(vsi->netdev);
- netif_carrier_on(vsi->netdev);
- } else if (vsi->netdev) {
- i40e_print_link_message(vsi, false);
- /* need to check for qualified module here*/
- if ((pf->hw.phy.link_info.link_info &
- I40E_AQ_MEDIA_AVAILABLE) &&
- (!(pf->hw.phy.link_info.an_info &
- I40E_AQ_QUALIFIED_MODULE)))
- netdev_err(vsi->netdev,
- "the driver failed to link because an unqualified module was detected.");
+ if (vsi->back->flags & I40E_FLAG_FD_SB_ENABLED) {
+ dev_err(&vsi->back->pdev->dev,
+ "Disable Flow Director Sideband, configuring Cloud filters via tc-flower\n");
+ vsi->back->flags &= ~I40E_FLAG_FD_SB_ENABLED;
+ vsi->back->flags |= I40E_FLAG_FD_SB_TO_CLOUD_FILTER;
}
- /* replay FDIR SB filters */
- if (vsi->type == I40E_VSI_FDIR) {
- /* reset fd counters */
- pf->fd_add_err = 0;
- pf->fd_atr_cnt = 0;
- i40e_fdir_filter_restore(vsi);
+ filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+ if (!filter)
+ return -ENOMEM;
+
+ filter->cookie = cls_flower->cookie;
+
+ err = i40e_parse_cls_flower(vsi, cls_flower, filter);
+ if (err < 0)
+ goto err;
+
+ err = i40e_handle_tclass(vsi, tc, filter);
+ if (err < 0)
+ goto err;
+
+ /* Add cloud filter */
+ if (filter->dst_port)
+ err = i40e_add_del_cloud_filter_big_buf(vsi, filter, true);
+ else
+ err = i40e_add_del_cloud_filter(vsi, filter, true);
+
+ if (err) {
+ dev_err(&pf->pdev->dev,
+ "Failed to add cloud filter, err %s\n",
+ i40e_stat_str(&pf->hw, err));
+ err = i40e_aq_rc_to_posix(err, pf->hw.aq.asq_last_status);
+ goto err;
}
- /* On the next run of the service_task, notify any clients of the new
- * opened netdev
- */
- pf->flags |= I40E_FLAG_SERVICE_CLIENT_REQUESTED;
- i40e_service_event_schedule(pf);
+ /* add filter to the ordered list */
+ INIT_HLIST_NODE(&filter->cloud_node);
- return 0;
+ hlist_add_head(&filter->cloud_node, &pf->cloud_filter_list);
+
+ pf->num_cloud_filters++;
+
+ return err;
+ err:
+ kfree(filter);
+ return err;
}
/**
- * i40e_vsi_reinit_locked - Reset the VSI
- * @vsi: the VSI being configured
+ * i40e_find_cloud_filter - Find the could filter in the list
+ * @vsi: Pointer to VSI
+ * @cookie: filter specific cookie
*
- * Rebuild the ring structs after some configuration
- * has changed, e.g. MTU size.
**/
- static void i40e_vsi_reinit_locked(struct i40e_vsi *vsi)
+ static struct i40e_cloud_filter *i40e_find_cloud_filter(struct i40e_vsi *vsi,
+ unsigned long *cookie)
{
- struct i40e_pf *pf = vsi->back;
-
- WARN_ON(in_interrupt());
- while (test_and_set_bit(__I40E_CONFIG_BUSY, pf->state))
- usleep_range(1000, 2000);
- i40e_down(vsi);
+ struct i40e_cloud_filter *filter = NULL;
+ struct hlist_node *node2;
- i40e_up(vsi);
- clear_bit(__I40E_CONFIG_BUSY, pf->state);
+ hlist_for_each_entry_safe(filter, node2,
+ &vsi->back->cloud_filter_list, cloud_node)
+ if (!memcmp(cookie, &filter->cookie, sizeof(filter->cookie)))
+ return filter;
+ return NULL;
}
/**
- * i40e_up - Bring the connection back up after being down
- * @vsi: the VSI being configured
+ * i40e_delete_clsflower - Remove tc flower filters
+ * @vsi: Pointer to VSI
+ * @cls_flower: Pointer to struct tc_cls_flower_offload
+ *
**/
- int i40e_up(struct i40e_vsi *vsi)
+ static int i40e_delete_clsflower(struct i40e_vsi *vsi,
+ struct tc_cls_flower_offload *cls_flower)
{
- int err;
+ struct i40e_cloud_filter *filter = NULL;
+ struct i40e_pf *pf = vsi->back;
+ int err = 0;
- err = i40e_vsi_configure(vsi);
- if (!err)
- err = i40e_up_complete(vsi);
+ filter = i40e_find_cloud_filter(vsi, &cls_flower->cookie);
- return err;
- }
+ if (!filter)
+ return -EINVAL;
- /**
- * i40e_down - Shutdown the connection processing
- * @vsi: the VSI being stopped
- **/
- void i40e_down(struct i40e_vsi *vsi)
- {
- int i;
+ hash_del(&filter->cloud_node);
- /* It is assumed that the caller of this function
- * sets the vsi->state __I40E_VSI_DOWN bit.
- */
- if (vsi->netdev) {
- netif_carrier_off(vsi->netdev);
- netif_tx_disable(vsi->netdev);
- }
- i40e_vsi_disable_irq(vsi);
- i40e_vsi_stop_rings(vsi);
- i40e_napi_disable_all(vsi);
+ if (filter->dst_port)
+ err = i40e_add_del_cloud_filter_big_buf(vsi, filter, false);
+ else
+ err = i40e_add_del_cloud_filter(vsi, filter, false);
- for (i = 0; i < vsi->num_queue_pairs; i++) {
- i40e_clean_tx_ring(vsi->tx_rings[i]);
- if (i40e_enabled_xdp_vsi(vsi))
- i40e_clean_tx_ring(vsi->xdp_rings[i]);
- i40e_clean_rx_ring(vsi->rx_rings[i]);
+ kfree(filter);
+ if (err) {
+ dev_err(&pf->pdev->dev,
+ "Failed to delete cloud filter, err %s\n",
+ i40e_stat_str(&pf->hw, err));
+ return i40e_aq_rc_to_posix(err, pf->hw.aq.asq_last_status);
}
+ pf->num_cloud_filters--;
+ if (!pf->num_cloud_filters)
+ if ((pf->flags & I40E_FLAG_FD_SB_TO_CLOUD_FILTER) &&
+ !(pf->flags & I40E_FLAG_FD_SB_INACTIVE)) {
+ pf->flags |= I40E_FLAG_FD_SB_ENABLED;
+ pf->flags &= ~I40E_FLAG_FD_SB_TO_CLOUD_FILTER;
+ pf->flags &= ~I40E_FLAG_FD_SB_INACTIVE;
+ }
+ return 0;
}
/**
- * i40e_setup_tc - configure multiple traffic classes
+ * i40e_setup_tc_cls_flower - flower classifier offloads
* @netdev: net device to configure
- * @tc: number of traffic classes to enable
+ * @type_data: offload data
**/
- static int i40e_setup_tc(struct net_device *netdev, u8 tc)
+ static int i40e_setup_tc_cls_flower(struct i40e_netdev_priv *np,
+ struct tc_cls_flower_offload *cls_flower)
{
- struct i40e_netdev_priv *np = netdev_priv(netdev);
struct i40e_vsi *vsi = np->vsi;
- struct i40e_pf *pf = vsi->back;
- u8 enabled_tc = 0;
- int ret = -EINVAL;
- int i;
-
- /* Check if DCB enabled to continue */
- if (!(pf->flags & I40E_FLAG_DCB_ENABLED)) {
- netdev_info(netdev, "DCB is not enabled for adapter\n");
- goto exit;
- }
- /* Check if MFP enabled */
- if (pf->flags & I40E_FLAG_MFP_ENABLED) {
- netdev_info(netdev, "Configuring TC not supported in MFP mode\n");
- goto exit;
- }
+ if (cls_flower->common.chain_index)
+ return -EOPNOTSUPP;
- /* Check whether tc count is within enabled limit */
- if (tc > i40e_pf_get_num_tc(pf)) {
- netdev_info(netdev, "TC count greater than enabled on link for adapter\n");
- goto exit;
+ switch (cls_flower->command) {
+ case TC_CLSFLOWER_REPLACE:
+ return i40e_configure_clsflower(vsi, cls_flower);
+ case TC_CLSFLOWER_DESTROY:
+ return i40e_delete_clsflower(vsi, cls_flower);
+ case TC_CLSFLOWER_STATS:
+ return -EOPNOTSUPP;
+ default:
+ return -EINVAL;
}
+ }
- /* Generate TC map for number of tc requested */
- for (i = 0; i < tc; i++)
- enabled_tc |= BIT(i);
-
- /* Requesting same TC configuration as already enabled */
- if (enabled_tc == vsi->tc_config.enabled_tc)
- return 0;
+ static int i40e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+ void *cb_priv)
+ {
+ struct i40e_netdev_priv *np = cb_priv;
- /* Quiesce VSI queues */
- i40e_quiesce_vsi(vsi);
+ switch (type) {
+ case TC_SETUP_CLSFLOWER:
+ return i40e_setup_tc_cls_flower(np, type_data);
- /* Configure VSI for enabled TCs */
- ret = i40e_vsi_config_tc(vsi, enabled_tc);
- if (ret) {
- netdev_info(netdev, "Failed configuring TC for VSI seid=%d\n",
- vsi->seid);
- goto exit;
+ default:
+ return -EOPNOTSUPP;
}
+ }
- /* Unquiesce VSI */
- i40e_unquiesce_vsi(vsi);
+ static int i40e_setup_tc_block(struct net_device *dev,
+ struct tc_block_offload *f)
+ {
+ struct i40e_netdev_priv *np = netdev_priv(dev);
- exit:
- return ret;
+ if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+ return -EOPNOTSUPP;
+
+ switch (f->command) {
+ case TC_BLOCK_BIND:
+ return tcf_block_cb_register(f->block, i40e_setup_tc_block_cb,
+ np, np);
+ case TC_BLOCK_UNBIND:
+ tcf_block_cb_unregister(f->block, i40e_setup_tc_block_cb, np);
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
}
static int __i40e_setup_tc(struct net_device *netdev, enum tc_setup_type type,
void *type_data)
{
- struct tc_mqprio_qopt *mqprio = type_data;
-
- if (type != TC_SETUP_MQPRIO)
+ switch (type) {
+ case TC_SETUP_QDISC_MQPRIO:
+ return i40e_setup_tc(netdev, type_data);
+ case TC_SETUP_BLOCK:
+ return i40e_setup_tc_block(netdev, type_data);
+ default:
return -EOPNOTSUPP;
-
- mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
-
- return i40e_setup_tc(netdev, mqprio->num_tc);
+ }
}
/**
err_setup_tx:
i40e_vsi_free_tx_resources(vsi);
if (vsi == pf->vsi[pf->lan_vsi])
- i40e_do_reset(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED), true);
+ i40e_do_reset(pf, I40E_PF_RESET_FLAG, true);
return err;
}
I40E_L3_SRC_MASK | I40E_L3_DST_MASK);
}
+ /**
+ * i40e_cloud_filter_exit - Cleans up the cloud filters
+ * @pf: Pointer to PF
+ *
+ * This function destroys the hlist where all the cloud filters
+ * were saved.
+ **/
+ static void i40e_cloud_filter_exit(struct i40e_pf *pf)
+ {
+ struct i40e_cloud_filter *cfilter;
+ struct hlist_node *node;
+
+ hlist_for_each_entry_safe(cfilter, node,
+ &pf->cloud_filter_list, cloud_node) {
+ hlist_del(&cfilter->cloud_node);
+ kfree(cfilter);
+ }
+ pf->num_cloud_filters = 0;
+
+ if ((pf->flags & I40E_FLAG_FD_SB_TO_CLOUD_FILTER) &&
+ !(pf->flags & I40E_FLAG_FD_SB_INACTIVE)) {
+ pf->flags |= I40E_FLAG_FD_SB_ENABLED;
+ pf->flags &= ~I40E_FLAG_FD_SB_TO_CLOUD_FILTER;
+ pf->flags &= ~I40E_FLAG_FD_SB_INACTIVE;
+ }
+ }
+
/**
* i40e_close - Disables a network interface
* @netdev: network interface device structure
wr32(&pf->hw, I40E_GLGEN_RTRIG, val);
i40e_flush(&pf->hw);
- } else if (reset_flags & BIT_ULL(__I40E_PF_RESET_REQUESTED)) {
+ } else if (reset_flags & I40E_PF_RESET_FLAG) {
/* Request a PF Reset
*
hlist_del(&filter->fdir_node);
kfree(filter);
pf->fdir_pf_active_filters--;
+ pf->fd_inv = 0;
}
}
}
new_link == netif_carrier_ok(vsi->netdev)))
return;
- if (!test_bit(__I40E_VSI_DOWN, vsi->state))
- i40e_print_link_message(vsi, new_link);
+ i40e_print_link_message(vsi, new_link);
/* Notify the base of the switch tree connected to
* the link. Floating VEBs are not notified.
*/
i40e_link_event(pf);
- /* check for unqualified module, if link is down */
- if ((status->link_info & I40E_AQ_MEDIA_AVAILABLE) &&
- (!(status->an_info & I40E_AQ_QUALIFIED_MODULE)) &&
- (!(status->link_info & I40E_AQ_LINK_UP)))
+ /* Check if module meets thermal requirements */
+ if (status->phy_type == I40E_PHY_TYPE_NOT_SUPPORTED_HIGH_TEMP) {
dev_err(&pf->pdev->dev,
- "The driver failed to link because an unqualified module was detected.\n");
+ "Rx/Tx is disabled on this device because the module does not meet thermal requirements.\n");
+ dev_err(&pf->pdev->dev,
+ "Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n");
+ } else {
+ /* check for unqualified module, if link is down, suppress
+ * the message if link was forced to be down.
+ */
+ if ((status->link_info & I40E_AQ_MEDIA_AVAILABLE) &&
+ (!(status->an_info & I40E_AQ_QUALIFIED_MODULE)) &&
+ (!(status->link_info & I40E_AQ_LINK_UP)) &&
+ (!(pf->flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED))) {
+ dev_err(&pf->pdev->dev,
+ "Rx/Tx is disabled on this device because an unsupported SFP module type was detected.\n");
+ dev_err(&pf->pdev->dev,
+ "Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n");
+ }
+ }
}
/**
* i40e_get_capabilities - get info about the HW
* @pf: the PF struct
**/
- static int i40e_get_capabilities(struct i40e_pf *pf)
+ static int i40e_get_capabilities(struct i40e_pf *pf,
+ enum i40e_admin_queue_opc list_type)
{
struct i40e_aqc_list_capabilities_element_resp *cap_buf;
u16 data_size;
/* this loads the data into the hw struct for us */
err = i40e_aq_discover_capabilities(&pf->hw, cap_buf, buf_len,
- &data_size,
- i40e_aqc_opc_list_func_capabilities,
- NULL);
+ &data_size, list_type,
+ NULL);
/* data loaded, buffer no longer needed */
kfree(cap_buf);
}
} while (err);
- if (pf->hw.debug_mask & I40E_DEBUG_USER)
- dev_info(&pf->pdev->dev,
- "pf=%d, num_vfs=%d, msix_pf=%d, msix_vf=%d, fd_g=%d, fd_b=%d, pf_max_q=%d num_vsi=%d\n",
- pf->hw.pf_id, pf->hw.func_caps.num_vfs,
- pf->hw.func_caps.num_msix_vectors,
- pf->hw.func_caps.num_msix_vectors_vf,
- pf->hw.func_caps.fd_filters_guaranteed,
- pf->hw.func_caps.fd_filters_best_effort,
- pf->hw.func_caps.num_tx_qp,
- pf->hw.func_caps.num_vsis);
-
+ if (pf->hw.debug_mask & I40E_DEBUG_USER) {
+ if (list_type == i40e_aqc_opc_list_func_capabilities) {
+ dev_info(&pf->pdev->dev,
+ "pf=%d, num_vfs=%d, msix_pf=%d, msix_vf=%d, fd_g=%d, fd_b=%d, pf_max_q=%d num_vsi=%d\n",
+ pf->hw.pf_id, pf->hw.func_caps.num_vfs,
+ pf->hw.func_caps.num_msix_vectors,
+ pf->hw.func_caps.num_msix_vectors_vf,
+ pf->hw.func_caps.fd_filters_guaranteed,
+ pf->hw.func_caps.fd_filters_best_effort,
+ pf->hw.func_caps.num_tx_qp,
+ pf->hw.func_caps.num_vsis);
+ } else if (list_type == i40e_aqc_opc_list_dev_capabilities) {
+ dev_info(&pf->pdev->dev,
+ "switch_mode=0x%04x, function_valid=0x%08x\n",
+ pf->hw.dev_caps.switch_mode,
+ pf->hw.dev_caps.valid_functions);
+ dev_info(&pf->pdev->dev,
+ "SR-IOV=%d, num_vfs for all function=%u\n",
+ pf->hw.dev_caps.sr_iov_1_1,
+ pf->hw.dev_caps.num_vfs);
+ dev_info(&pf->pdev->dev,
+ "num_vsis=%u, num_rx:%u, num_tx=%u\n",
+ pf->hw.dev_caps.num_vsis,
+ pf->hw.dev_caps.num_rx_qp,
+ pf->hw.dev_caps.num_tx_qp);
+ }
+ }
+ if (list_type == i40e_aqc_opc_list_func_capabilities) {
#define DEF_NUM_VSI (1 + (pf->hw.func_caps.fcoe ? 1 : 0) \
+ pf->hw.func_caps.num_vfs)
- if (pf->hw.revision_id == 0 && (DEF_NUM_VSI > pf->hw.func_caps.num_vsis)) {
- dev_info(&pf->pdev->dev,
- "got num_vsis %d, setting num_vsis to %d\n",
- pf->hw.func_caps.num_vsis, DEF_NUM_VSI);
- pf->hw.func_caps.num_vsis = DEF_NUM_VSI;
+ if (pf->hw.revision_id == 0 &&
+ pf->hw.func_caps.num_vsis < DEF_NUM_VSI) {
+ dev_info(&pf->pdev->dev,
+ "got num_vsis %d, setting num_vsis to %d\n",
+ pf->hw.func_caps.num_vsis, DEF_NUM_VSI);
+ pf->hw.func_caps.num_vsis = DEF_NUM_VSI;
+ }
}
-
return 0;
}
if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED))
return;
- /* find existing VSI and see if it needs configuring */
- vsi = i40e_find_vsi_by_type(pf, I40E_VSI_FDIR);
+ /* find existing VSI and see if it needs configuring */
+ vsi = i40e_find_vsi_by_type(pf, I40E_VSI_FDIR);
+
+ /* create a new VSI if none exists */
+ if (!vsi) {
+ vsi = i40e_vsi_setup(pf, I40E_VSI_FDIR,
+ pf->vsi[pf->lan_vsi]->seid, 0);
+ if (!vsi) {
+ dev_info(&pf->pdev->dev, "Couldn't create FDir VSI\n");
+ pf->flags &= ~I40E_FLAG_FD_SB_ENABLED;
+ pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
+ return;
+ }
+ }
+
+ i40e_vsi_setup_irqhandler(vsi, i40e_fdir_clean_ring);
+ }
+
+ /**
+ * i40e_fdir_teardown - release the Flow Director resources
+ * @pf: board private structure
+ **/
+ static void i40e_fdir_teardown(struct i40e_pf *pf)
+ {
+ struct i40e_vsi *vsi;
+
+ i40e_fdir_filter_exit(pf);
+ vsi = i40e_find_vsi_by_type(pf, I40E_VSI_FDIR);
+ if (vsi)
+ i40e_vsi_release(vsi);
+ }
+
+ /**
+ * i40e_rebuild_cloud_filters - Rebuilds cloud filters for VSIs
+ * @vsi: PF main vsi
+ * @seid: seid of main or channel VSIs
+ *
+ * Rebuilds cloud filters associated with main VSI and channel VSIs if they
+ * existed before reset
+ **/
+ static int i40e_rebuild_cloud_filters(struct i40e_vsi *vsi, u16 seid)
+ {
+ struct i40e_cloud_filter *cfilter;
+ struct i40e_pf *pf = vsi->back;
+ struct hlist_node *node;
+ i40e_status ret;
+
+ /* Add cloud filters back if they exist */
+ hlist_for_each_entry_safe(cfilter, node, &pf->cloud_filter_list,
+ cloud_node) {
+ if (cfilter->seid != seid)
+ continue;
+
+ if (cfilter->dst_port)
+ ret = i40e_add_del_cloud_filter_big_buf(vsi, cfilter,
+ true);
+ else
+ ret = i40e_add_del_cloud_filter(vsi, cfilter, true);
- /* create a new VSI if none exists */
- if (!vsi) {
- vsi = i40e_vsi_setup(pf, I40E_VSI_FDIR,
- pf->vsi[pf->lan_vsi]->seid, 0);
- if (!vsi) {
- dev_info(&pf->pdev->dev, "Couldn't create FDir VSI\n");
- pf->flags &= ~I40E_FLAG_FD_SB_ENABLED;
- return;
+ if (ret) {
+ dev_dbg(&pf->pdev->dev,
+ "Failed to rebuild cloud filter, err %s aq_err %s\n",
+ i40e_stat_str(&pf->hw, ret),
+ i40e_aq_str(&pf->hw,
+ pf->hw.aq.asq_last_status));
+ return ret;
}
}
-
- i40e_vsi_setup_irqhandler(vsi, i40e_fdir_clean_ring);
+ return 0;
}
/**
- * i40e_fdir_teardown - release the Flow Director resources
- * @pf: board private structure
+ * i40e_rebuild_channels - Rebuilds channel VSIs if they existed before reset
+ * @vsi: PF main vsi
+ *
+ * Rebuilds channel VSIs if they existed before reset
**/
- static void i40e_fdir_teardown(struct i40e_pf *pf)
+ static int i40e_rebuild_channels(struct i40e_vsi *vsi)
{
- struct i40e_vsi *vsi;
+ struct i40e_channel *ch, *ch_tmp;
+ i40e_status ret;
- i40e_fdir_filter_exit(pf);
- vsi = i40e_find_vsi_by_type(pf, I40E_VSI_FDIR);
- if (vsi)
- i40e_vsi_release(vsi);
+ if (list_empty(&vsi->ch_list))
+ return 0;
+
+ list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) {
+ if (!ch->initialized)
+ break;
+ /* Proceed with creation of channel (VMDq2) VSI */
+ ret = i40e_add_channel(vsi->back, vsi->uplink_seid, ch);
+ if (ret) {
+ dev_info(&vsi->back->pdev->dev,
+ "failed to rebuild channels using uplink_seid %u\n",
+ vsi->uplink_seid);
+ return ret;
+ }
+ if (ch->max_tx_rate) {
+ u64 credits = ch->max_tx_rate;
+
+ if (i40e_set_bw_limit(vsi, ch->seid,
+ ch->max_tx_rate))
+ return -EINVAL;
+
+ do_div(credits, I40E_BW_CREDIT_DIVISOR);
+ dev_dbg(&vsi->back->pdev->dev,
+ "Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n",
+ ch->max_tx_rate,
+ credits,
+ ch->seid);
+ }
+ ret = i40e_rebuild_cloud_filters(vsi, ch->seid);
+ if (ret) {
+ dev_dbg(&vsi->back->pdev->dev,
+ "Failed to rebuild cloud filters for channel VSI %u\n",
+ ch->seid);
+ return ret;
+ }
+ }
+ return 0;
}
/**
**/
static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
{
+ struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
struct i40e_hw *hw = &pf->hw;
u8 set_fc_aq_fail = 0;
i40e_status ret;
i40e_verify_eeprom(pf);
i40e_clear_pxe_mode(hw);
- ret = i40e_get_capabilities(pf);
+ ret = i40e_get_capabilities(pf, i40e_aqc_opc_list_func_capabilities);
if (ret)
goto end_core_reset;
* If there were VEBs but the reconstitution failed, we'll try
* try to recover minimal use by getting the basic PF VSI working.
*/
- if (pf->vsi[pf->lan_vsi]->uplink_seid != pf->mac_seid) {
+ if (vsi->uplink_seid != pf->mac_seid) {
dev_dbg(&pf->pdev->dev, "attempting to rebuild switch\n");
/* find the one VEB connected to the MAC, and find orphans */
for (v = 0; v < I40E_MAX_VEB; v++) {
dev_info(&pf->pdev->dev,
"rebuild of switch failed: %d, will try to set up simple PF connection\n",
ret);
- pf->vsi[pf->lan_vsi]->uplink_seid
- = pf->mac_seid;
+ vsi->uplink_seid = pf->mac_seid;
break;
} else if (pf->veb[v]->uplink_seid == 0) {
dev_info(&pf->pdev->dev,
}
}
- if (pf->vsi[pf->lan_vsi]->uplink_seid == pf->mac_seid) {
+ if (vsi->uplink_seid == pf->mac_seid) {
dev_dbg(&pf->pdev->dev, "attempting to rebuild PF VSI\n");
/* no VEB, so rebuild only the Main VSI */
- ret = i40e_add_vsi(pf->vsi[pf->lan_vsi]);
+ ret = i40e_add_vsi(vsi);
if (ret) {
dev_info(&pf->pdev->dev,
"rebuild of Main VSI failed: %d\n", ret);
}
}
+ if (vsi->mqprio_qopt.max_rate[0]) {
+ u64 max_tx_rate = vsi->mqprio_qopt.max_rate[0];
+ u64 credits = 0;
+
+ do_div(max_tx_rate, I40E_BW_MBPS_DIVISOR);
+ ret = i40e_set_bw_limit(vsi, vsi->seid, max_tx_rate);
+ if (ret)
+ goto end_unlock;
+
+ credits = max_tx_rate;
+ do_div(credits, I40E_BW_CREDIT_DIVISOR);
+ dev_dbg(&vsi->back->pdev->dev,
+ "Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n",
+ max_tx_rate,
+ credits,
+ vsi->seid);
+ }
+
+ ret = i40e_rebuild_cloud_filters(vsi, vsi->seid);
+ if (ret)
+ goto end_unlock;
+
+ /* PF Main VSI is rebuild by now, go ahead and rebuild channel VSIs
+ * for this main VSI if they exist
+ */
+ ret = i40e_rebuild_channels(vsi);
+ if (ret)
+ goto end_unlock;
+
/* Reconfigure hardware for allowing smaller MSS in the case
* of TSO, so that we avoid the MDD being fired and causing
* a reset in the case of small MSS+TSO.
* i40e_service_timer - timer callback
* @data: pointer to PF struct
**/
- static void i40e_service_timer(unsigned long data)
+ static void i40e_service_timer(struct timer_list *t)
{
- struct i40e_pf *pf = (struct i40e_pf *)data;
+ struct i40e_pf *pf = from_timer(pf, t, service_timer);
mod_timer(&pf->service_timer,
round_jiffies(jiffies + pf->service_timer_period));
/**
* i40e_vsi_alloc_arrays - Allocate queue and vector pointer arrays for the vsi
- * @type: VSI pointer
+ * @vsi: VSI pointer
* @alloc_qvectors: a bool to specify if q_vectors need to be allocated.
*
* On error: returns error code (negative)
pf->num_lan_qps = 1;
pf->num_lan_msix = 1;
- } else if (!vectors_left) {
+ } else if (v_actual != v_budget) {
/* If we have limited resources, we will start with no vectors
* for the special features and then allocate vectors to some
* of these features based on the policy and at the end disable
int vec;
dev_info(&pf->pdev->dev,
- "MSI-X vector limit reached, attempting to redistribute vectors\n");
+ "MSI-X vector limit reached with %d, wanted %d, attempting to redistribute vectors\n",
+ v_actual, v_budget);
/* reserve the misc vector */
vec = v_actual - 1;
(pf->num_fdsb_msix == 0)) {
dev_info(&pf->pdev->dev, "Sideband Flowdir disabled, not enough MSI-X vectors\n");
pf->flags &= ~I40E_FLAG_FD_SB_ENABLED;
+ pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
}
if ((pf->flags & I40E_FLAG_VMDQ_ENABLED) &&
(pf->num_vmdq_msix == 0)) {
I40E_FLAG_FD_SB_ENABLED |
I40E_FLAG_FD_ATR_ENABLED |
I40E_FLAG_VMDQ_ENABLED);
+ pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
/* rework the queue expectations without MSIX */
i40e_determine_queue_usage(pf);
return 0;
}
+ /**
+ * i40e_restore_interrupt_scheme - Restore the interrupt scheme
+ * @pf: private board data structure
+ *
+ * Restore the interrupt scheme that was cleared when we suspended the
+ * device. This should be called during resume to re-allocate the q_vectors
+ * and reacquire IRQs.
+ */
+ static int i40e_restore_interrupt_scheme(struct i40e_pf *pf)
+ {
+ int err, i;
+
+ /* We cleared the MSI and MSI-X flags when disabling the old interrupt
+ * scheme. We need to re-enabled them here in order to attempt to
+ * re-acquire the MSI or MSI-X vectors
+ */
+ pf->flags |= (I40E_FLAG_MSIX_ENABLED | I40E_FLAG_MSI_ENABLED);
+
+ err = i40e_init_interrupt_scheme(pf);
+ if (err)
+ return err;
+
+ /* Now that we've re-acquired IRQs, we need to remap the vectors and
+ * rings together again.
+ */
+ for (i = 0; i < pf->num_alloc_vsi; i++) {
+ if (pf->vsi[i]) {
+ err = i40e_vsi_alloc_q_vectors(pf->vsi[i]);
+ if (err)
+ goto err_unwind;
+ i40e_vsi_map_rings_to_vectors(pf->vsi[i]);
+ }
+ }
+
+ err = i40e_setup_misc_vector(pf);
+ if (err)
+ goto err_unwind;
+
+ return 0;
+
+ err_unwind:
+ while (i--) {
+ if (pf->vsi[i])
+ i40e_vsi_free_q_vectors(pf->vsi[i]);
+ }
+
+ return err;
+ }
+
/**
* i40e_setup_misc_vector - Setup the misc vector to handle non queue events
* @pf: board private structure
struct i40e_hw *hw = &pf->hw;
int err = 0;
- /* Only request the irq if this is the first time through, and
- * not when we're rebuilding after a Reset
- */
- if (!test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) {
+ /* Only request the IRQ once, the first time through. */
+ if (!test_and_set_bit(__I40E_MISC_IRQ_REQUESTED, pf->state)) {
err = request_irq(pf->msix_entries[0].vector,
i40e_intr, 0, pf->int_name, pf);
if (err) {
+ clear_bit(__I40E_MISC_IRQ_REQUESTED, pf->state);
dev_info(&pf->pdev->dev,
"request_irq for %s failed: %d\n",
pf->int_name, err);
i40e_flush(hw);
- i40e_irq_dynamic_enable_icr0(pf, true);
+ i40e_irq_dynamic_enable_icr0(pf);
return err;
}
- /**
- * i40e_config_rss_aq - Prepare for RSS using AQ commands
- * @vsi: vsi structure
- * @seed: RSS hash seed
- **/
- static int i40e_config_rss_aq(struct i40e_vsi *vsi, const u8 *seed,
- u8 *lut, u16 lut_size)
- {
- struct i40e_pf *pf = vsi->back;
- struct i40e_hw *hw = &pf->hw;
- int ret = 0;
-
- if (seed) {
- struct i40e_aqc_get_set_rss_key_data *seed_dw =
- (struct i40e_aqc_get_set_rss_key_data *)seed;
- ret = i40e_aq_set_rss_key(hw, vsi->id, seed_dw);
- if (ret) {
- dev_info(&pf->pdev->dev,
- "Cannot set RSS key, err %s aq_err %s\n",
- i40e_stat_str(hw, ret),
- i40e_aq_str(hw, hw->aq.asq_last_status));
- return ret;
- }
- }
- if (lut) {
- bool pf_lut = vsi->type == I40E_VSI_MAIN ? true : false;
-
- ret = i40e_aq_set_rss_lut(hw, vsi->id, pf_lut, lut, lut_size);
- if (ret) {
- dev_info(&pf->pdev->dev,
- "Cannot set RSS lut, err %s aq_err %s\n",
- i40e_stat_str(hw, ret),
- i40e_aq_str(hw, hw->aq.asq_last_status));
- return ret;
- }
- }
- return ret;
- }
-
/**
* i40e_get_rss_aq - Get RSS keys and lut by using AQ commands
* @vsi: Pointer to vsi structure
return ret;
}
- /**
- * i40e_vsi_config_rss - Prepare for VSI(VMDq) RSS if used
- * @vsi: VSI structure
- **/
- static int i40e_vsi_config_rss(struct i40e_vsi *vsi)
- {
- u8 seed[I40E_HKEY_ARRAY_SIZE];
- struct i40e_pf *pf = vsi->back;
- u8 *lut;
- int ret;
-
- if (!(pf->hw_features & I40E_HW_RSS_AQ_CAPABLE))
- return 0;
-
- if (!vsi->rss_size)
- vsi->rss_size = min_t(int, pf->alloc_rss_size,
- vsi->num_queue_pairs);
- if (!vsi->rss_size)
- return -EINVAL;
-
- lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
- if (!lut)
- return -ENOMEM;
- /* Use the user configured hash keys and lookup table if there is one,
- * otherwise use default
- */
- if (vsi->rss_lut_user)
- memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size);
- else
- i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size);
- if (vsi->rss_hkey_user)
- memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE);
- else
- netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE);
- ret = i40e_config_rss_aq(vsi, seed, lut, vsi->rss_table_size);
- kfree(lut);
-
- return ret;
- }
-
/**
* i40e_config_rss_reg - Configure RSS keys and lut by writing registers
* @vsi: Pointer to vsi structure
I40E_FLAG_MSIX_ENABLED;
/* Set default ITR */
- pf->rx_itr_default = I40E_ITR_DYNAMIC | I40E_ITR_RX_DEF;
- pf->tx_itr_default = I40E_ITR_DYNAMIC | I40E_ITR_TX_DEF;
+ pf->rx_itr_default = I40E_ITR_RX_DEF;
+ pf->tx_itr_default = I40E_ITR_TX_DEF;
/* Depending on PF configurations, it is possible that the RSS
* maximum might end up larger than the available queues
(pf->hw.aq.fw_maj_ver >= 5)))
pf->hw_features |= I40E_HW_USE_SET_LLDP_MIB;
+ /* Enable PTP L4 if FW > v6.0 */
+ if (pf->hw.mac.type == I40E_MAC_XL710 &&
+ pf->hw.aq.fw_maj_ver >= 6)
+ pf->hw_features |= I40E_HW_PTP_L4_CAPABLE;
+
if (pf->hw.func_caps.vmdq) {
pf->num_vmdq_vsis = I40E_DEFAULT_NUM_VMDQ_VSI;
pf->flags |= I40E_FLAG_VMDQ_ENABLED;
/* Enable filters and mark for reset */
if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED))
need_reset = true;
- /* enable FD_SB only if there is MSI-X vector */
- if (pf->num_fdsb_msix > 0)
+ /* enable FD_SB only if there is MSI-X vector and no cloud
+ * filters exist
+ */
+ if (pf->num_fdsb_msix > 0 && !pf->num_cloud_filters) {
pf->flags |= I40E_FLAG_FD_SB_ENABLED;
+ pf->flags &= ~I40E_FLAG_FD_SB_INACTIVE;
+ }
} else {
/* turn off filters, mark for reset and clear SW filter list */
if (pf->flags & I40E_FLAG_FD_SB_ENABLED) {
}
pf->flags &= ~(I40E_FLAG_FD_SB_ENABLED |
I40E_FLAG_FD_SB_AUTO_DISABLED);
+ pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
+
/* reset fd counters */
pf->fd_add_err = 0;
pf->fd_atr_cnt = 0;
else
i40e_vlan_stripping_disable(vsi);
+ if (!(features & NETIF_F_HW_TC) && pf->num_cloud_filters) {
+ dev_err(&pf->pdev->dev,
+ "Offloaded tc filters active, can't turn hw_tc_offload off");
+ return -EINVAL;
+ }
+
need_reset = i40e_set_ntuple(pf, features);
if (need_reset)
- i40e_do_reset(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED), true);
+ i40e_do_reset(pf, I40E_PF_RESET_FLAG, true);
return 0;
}
pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
else
pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED;
- i40e_do_reset(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED),
- true);
+ i40e_do_reset(pf, I40E_PF_RESET_FLAG, true);
break;
}
}
}
/**
- * i40e_xdp - implements ndo_xdp for i40e
+ * i40e_xdp - implements ndo_bpf for i40e
* @dev: netdevice
* @xdp: XDP command
**/
static int i40e_xdp(struct net_device *dev,
- struct netdev_xdp *xdp)
+ struct netdev_bpf *xdp)
{
struct i40e_netdev_priv *np = netdev_priv(dev);
struct i40e_vsi *vsi = np->vsi;
.ndo_features_check = i40e_features_check,
.ndo_bridge_getlink = i40e_ndo_bridge_getlink,
.ndo_bridge_setlink = i40e_ndo_bridge_setlink,
- .ndo_xdp = i40e_xdp,
+ .ndo_bpf = i40e_xdp,
};
/**
netdev->vlan_features |= hw_enc_features | NETIF_F_TSO_MANGLEID;
if (!(pf->flags & I40E_FLAG_MFP_ENABLED))
- netdev->hw_features |= NETIF_F_NTUPLE;
+ netdev->hw_features |= NETIF_F_NTUPLE | NETIF_F_HW_TC;
+
hw_features = hw_enc_features |
NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_CTAG_RX;
enabled_tc = i40e_pf_get_tc_map(pf);
+ /* Source pruning is enabled by default, so the flag is
+ * negative logic - if it's set, we need to fiddle with
+ * the VSI to disable source pruning.
+ */
+ if (pf->flags & I40E_FLAG_SOURCE_PRUNING_DISABLED) {
+ memset(&ctxt, 0, sizeof(ctxt));
+ ctxt.seid = pf->main_vsi_seid;
+ ctxt.pf_num = pf->hw.pf_id;
+ ctxt.vf_num = 0;
+ ctxt.info.valid_sections |=
+ cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
+ ctxt.info.switch_id =
+ cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_LOCAL_LB);
+ ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+ "update vsi failed, err %s aq_err %s\n",
+ i40e_stat_str(&pf->hw, ret),
+ i40e_aq_str(&pf->hw,
+ pf->hw.aq.asq_last_status));
+ ret = -ENOENT;
+ goto err;
+ }
+ }
+
/* MFP mode setup queue map and update VSI */
if ((pf->flags & I40E_FLAG_MFP_ENABLED) &&
!(pf->hw.func_caps.iscsi)) { /* NIC type PF */
*/
if ((pf->hw.pf_id == 0) &&
- !(pf->flags & I40E_FLAG_TRUE_PROMISC_SUPPORT))
+ !(pf->flags & I40E_FLAG_TRUE_PROMISC_SUPPORT)) {
flags = I40E_AQ_SET_SWITCH_CFG_PROMISC;
+ pf->last_sw_conf_flags = flags;
+ }
if (pf->hw.pf_id == 0) {
u16 valid_flags;
valid_flags = I40E_AQ_SET_SWITCH_CFG_PROMISC;
- ret = i40e_aq_set_switch_config(&pf->hw, flags, valid_flags,
+ ret = i40e_aq_set_switch_config(&pf->hw, flags, valid_flags, 0,
NULL);
if (ret && pf->hw.aq.asq_last_status != I40E_AQ_RC_ESRCH) {
dev_info(&pf->pdev->dev,
pf->hw.aq.asq_last_status));
/* not a fatal problem, just keep going */
}
+ pf->last_sw_conf_valid_flags = valid_flags;
}
/* first time setup */
vsi = i40e_vsi_reinit_setup(pf->vsi[pf->lan_vsi]);
if (!vsi) {
dev_info(&pf->pdev->dev, "setup of MAIN VSI failed\n");
+ i40e_cloud_filter_exit(pf);
i40e_fdir_teardown(pf);
return -EAGAIN;
}
static void i40e_determine_queue_usage(struct i40e_pf *pf)
{
int queues_left;
+ int q_max;
pf->num_lan_qps = 0;
I40E_FLAG_DCB_ENABLED |
I40E_FLAG_SRIOV_ENABLED |
I40E_FLAG_VMDQ_ENABLED);
+ pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
} else if (!(pf->flags & (I40E_FLAG_RSS_ENABLED |
I40E_FLAG_FD_SB_ENABLED |
I40E_FLAG_FD_ATR_ENABLED |
I40E_FLAG_FD_ATR_ENABLED |
I40E_FLAG_DCB_ENABLED |
I40E_FLAG_VMDQ_ENABLED);
+ pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
} else {
/* Not enough queues for all TCs */
if ((pf->flags & I40E_FLAG_DCB_CAPABLE) &&
I40E_FLAG_DCB_ENABLED);
dev_info(&pf->pdev->dev, "not enough queues for DCB. DCB is disabled.\n");
}
- pf->num_lan_qps = max_t(int, pf->rss_size_max,
- num_online_cpus());
- pf->num_lan_qps = min_t(int, pf->num_lan_qps,
- pf->hw.func_caps.num_tx_qp);
+
+ /* limit lan qps to the smaller of qps, cpus or msix */
+ q_max = max_t(int, pf->rss_size_max, num_online_cpus());
+ q_max = min_t(int, q_max, pf->hw.func_caps.num_tx_qp);
+ q_max = min_t(int, q_max, pf->hw.func_caps.num_msix_vectors);
+ pf->num_lan_qps = q_max;
queues_left -= pf->num_lan_qps;
}
queues_left -= 1; /* save 1 queue for FD */
} else {
pf->flags &= ~I40E_FLAG_FD_SB_ENABLED;
+ pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
dev_info(&pf->pdev->dev, "not enough queues for Flow Director. Flow Director feature is disabled\n");
}
}
hw->bus.bus_id = pdev->bus->number;
pf->instance = pfs_found;
+ /* Select something other than the 802.1ad ethertype for the
+ * switch to use internally and drop on ingress.
+ */
+ hw->switch_tag = 0xffff;
+ hw->first_tag = ETH_P_8021AD;
+ hw->second_tag = ETH_P_8021Q;
+
INIT_LIST_HEAD(&pf->l3_flex_pit_list);
INIT_LIST_HEAD(&pf->l4_flex_pit_list);
i40e_nvm_version_str(hw));
if (hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR &&
- hw->aq.api_min_ver > I40E_FW_API_VERSION_MINOR)
+ hw->aq.api_min_ver > I40E_FW_MINOR_VERSION(hw))
dev_info(&pdev->dev,
"The driver for the device detected a newer version of the NVM image than expected. Please install the most recent version of the network driver.\n");
- else if (hw->aq.api_maj_ver < I40E_FW_API_VERSION_MAJOR ||
- hw->aq.api_min_ver < (I40E_FW_API_VERSION_MINOR - 1))
+ else if (hw->aq.api_maj_ver == 1 && hw->aq.api_min_ver < 4)
dev_info(&pdev->dev,
"The driver for the device detected an older version of the NVM image than expected. Please update the NVM image.\n");
dev_warn(&pdev->dev, "This device is a pre-production adapter/LOM. Please be aware there may be issues with your hardware. If you are experiencing problems please contact your Intel or hardware representative who provided you with this hardware.\n");
i40e_clear_pxe_mode(hw);
- err = i40e_get_capabilities(pf);
+ err = i40e_get_capabilities(pf, i40e_aqc_opc_list_func_capabilities);
if (err)
goto err_adminq_setup;
#endif /* CONFIG_I40E_DCB */
/* set up periodic task facility */
- setup_timer(&pf->service_timer, i40e_service_timer, (unsigned long)pf);
+ timer_setup(&pf->service_timer, i40e_service_timer, 0);
pf->service_timer_period = HZ;
INIT_WORK(&pf->service_task, i40e_service_task);
dev_info(&pdev->dev, "setup_pf_switch failed: %d\n", err);
goto err_vsis;
}
+ INIT_LIST_HEAD(&pf->vsi[pf->lan_vsi]->ch_list);
/* Make sure flow control is set according to current settings */
err = i40e_set_fc(hw, &set_fc_aq_fail, true);
/* no more scheduling of any task */
set_bit(__I40E_SUSPENDED, pf->state);
set_bit(__I40E_DOWN, pf->state);
- if (pf->service_timer.data)
+ if (pf->service_timer.function)
del_timer_sync(&pf->service_timer);
if (pf->service_task.func)
cancel_work_sync(&pf->service_task);
if (pf->vsi[pf->lan_vsi])
i40e_vsi_release(pf->vsi[pf->lan_vsi]);
+ i40e_cloud_filter_exit(pf);
+
/* remove attached clients */
if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
ret_code = i40e_lan_del_device(pf);
return result;
}
+ /**
+ * i40e_pci_error_reset_prepare - prepare device driver for pci reset
+ * @pdev: PCI device information struct
+ */
+ static void i40e_pci_error_reset_prepare(struct pci_dev *pdev)
+ {
+ struct i40e_pf *pf = pci_get_drvdata(pdev);
+
+ i40e_prep_for_reset(pf, false);
+ }
+
+ /**
+ * i40e_pci_error_reset_done - pci reset done, device driver reset can begin
+ * @pdev: PCI device information struct
+ */
+ static void i40e_pci_error_reset_done(struct pci_dev *pdev)
+ {
+ struct i40e_pf *pf = pci_get_drvdata(pdev);
+
+ i40e_reset_and_rebuild(pf, false, false);
+ }
+
/**
* i40e_pci_error_resume - restart operations after PCI error recovery
* @pdev: PCI device information struct
del_timer_sync(&pf->service_timer);
cancel_work_sync(&pf->service_task);
+ i40e_cloud_filter_exit(pf);
i40e_fdir_teardown(pf);
/* Client close must be called explicitly here because the timer
}
}
- #ifdef CONFIG_PM
/**
- * i40e_suspend - PCI callback for moving to D3
- * @pdev: PCI device information struct
+ * i40e_suspend - PM callback for moving to D3
+ * @dev: generic device information structure
**/
- static int i40e_suspend(struct pci_dev *pdev, pm_message_t state)
+ static int __maybe_unused i40e_suspend(struct device *dev)
{
+ struct pci_dev *pdev = to_pci_dev(dev);
struct i40e_pf *pf = pci_get_drvdata(pdev);
struct i40e_hw *hw = &pf->hw;
- int retval = 0;
- set_bit(__I40E_SUSPENDED, pf->state);
+ /* If we're already suspended, then there is nothing to do */
+ if (test_and_set_bit(__I40E_SUSPENDED, pf->state))
+ return 0;
+
set_bit(__I40E_DOWN, pf->state);
+ /* Ensure service task will not be running */
+ del_timer_sync(&pf->service_timer);
+ cancel_work_sync(&pf->service_task);
+
if (pf->wol_en && (pf->hw_features & I40E_HW_WOL_MC_MAGIC_PKT_WAKE))
i40e_enable_mc_magic_wake(pf);
wr32(hw, I40E_PFPM_APM, (pf->wol_en ? I40E_PFPM_APM_APME_MASK : 0));
wr32(hw, I40E_PFPM_WUFC, (pf->wol_en ? I40E_PFPM_WUFC_MAG_MASK : 0));
- i40e_stop_misc_vector(pf);
- if (pf->msix_entries) {
- synchronize_irq(pf->msix_entries[0].vector);
- free_irq(pf->msix_entries[0].vector, pf);
- }
- retval = pci_save_state(pdev);
- if (retval)
- return retval;
-
- pci_wake_from_d3(pdev, pf->wol_en);
- pci_set_power_state(pdev, PCI_D3hot);
+ /* Clear the interrupt scheme and release our IRQs so that the system
+ * can safely hibernate even when there are a large number of CPUs.
+ * Otherwise hibernation might fail when mapping all the vectors back
+ * to CPU0.
+ */
+ i40e_clear_interrupt_scheme(pf);
- return retval;
+ return 0;
}
/**
- * i40e_resume - PCI callback for waking up from D3
- * @pdev: PCI device information struct
+ * i40e_resume - PM callback for waking up from D3
+ * @dev: generic device information structure
**/
- static int i40e_resume(struct pci_dev *pdev)
+ static int __maybe_unused i40e_resume(struct device *dev)
{
+ struct pci_dev *pdev = to_pci_dev(dev);
struct i40e_pf *pf = pci_get_drvdata(pdev);
- u32 err;
+ int err;
- pci_set_power_state(pdev, PCI_D0);
- pci_restore_state(pdev);
- /* pci_restore_state() clears dev->state_saves, so
- * call pci_save_state() again to restore it.
- */
- pci_save_state(pdev);
+ /* If we're not suspended, then there is nothing to do */
+ if (!test_bit(__I40E_SUSPENDED, pf->state))
+ return 0;
- err = pci_enable_device_mem(pdev);
+ /* We cleared the interrupt scheme when we suspended, so we need to
+ * restore it now to resume device functionality.
+ */
+ err = i40e_restore_interrupt_scheme(pf);
if (err) {
- dev_err(&pdev->dev, "Cannot enable PCI device from suspend\n");
- return err;
+ dev_err(&pdev->dev, "Cannot restore interrupt scheme: %d\n",
+ err);
}
- pci_set_master(pdev);
- /* no wakeup events while running */
- pci_wake_from_d3(pdev, false);
-
- /* handling the reset will rebuild the device state */
- if (test_and_clear_bit(__I40E_SUSPENDED, pf->state)) {
- clear_bit(__I40E_DOWN, pf->state);
- if (pf->msix_entries) {
- err = request_irq(pf->msix_entries[0].vector,
- i40e_intr, 0, pf->int_name, pf);
- if (err) {
- dev_err(&pf->pdev->dev,
- "request_irq for %s failed: %d\n",
- pf->int_name, err);
- }
- }
- i40e_reset_and_rebuild(pf, false, false);
- }
+ clear_bit(__I40E_DOWN, pf->state);
+ i40e_reset_and_rebuild(pf, false, false);
+
+ /* Clear suspended state last after everything is recovered */
+ clear_bit(__I40E_SUSPENDED, pf->state);
+
+ /* Restart the service task */
+ mod_timer(&pf->service_timer,
+ round_jiffies(jiffies + pf->service_timer_period));
return 0;
}
- #endif
static const struct pci_error_handlers i40e_err_handler = {
.error_detected = i40e_pci_error_detected,
.slot_reset = i40e_pci_error_slot_reset,
+ .reset_prepare = i40e_pci_error_reset_prepare,
+ .reset_done = i40e_pci_error_reset_done,
.resume = i40e_pci_error_resume,
};
+ static SIMPLE_DEV_PM_OPS(i40e_pm_ops, i40e_suspend, i40e_resume);
+
static struct pci_driver i40e_driver = {
.name = i40e_driver_name,
.id_table = i40e_pci_tbl,
.probe = i40e_probe,
.remove = i40e_remove,
- #ifdef CONFIG_PM
- .suspend = i40e_suspend,
- .resume = i40e_resume,
- #endif
+ .driver = {
+ .pm = &i40e_pm_ops,
+ },
.shutdown = i40e_shutdown,
.err_handler = &i40e_err_handler,
.sriov_configure = i40e_pci_sriov_configure,
/* write operations, indexed using DWORDS */
#define wr32(reg, val) \
do { \
- u8 __iomem *hw_addr = ACCESS_ONCE((hw)->hw_addr); \
+ u8 __iomem *hw_addr = READ_ONCE((hw)->hw_addr); \
if (!E1000_REMOVED(hw_addr)) \
writel((val), &hw_addr[(reg)]); \
} while (0)
#define E1000_I210_FLA 0x1201C
+ #define E1000_I210_DTXMXPKTSZ 0x355C
+
+ #define E1000_I210_TXDCTL(_n) (0x0E028 + ((_n) * 0x40))
+
+ #define E1000_I210_TQAVCTRL 0x3570
+ #define E1000_I210_TQAVCC(_n) (0x3004 + ((_n) * 0x40))
+ #define E1000_I210_TQAVHC(_n) (0x300C + ((_n) * 0x40))
+
#define E1000_INVM_DATA_REG(_n) (0x12120 + 4*(_n))
#define E1000_INVM_SIZE 64 /* Number of INVM Data Registers */
#include <linux/slab.h>
#include <net/checksum.h>
#include <net/ip6_checksum.h>
+ #include <net/pkt_sched.h>
#include <linux/net_tstamp.h>
#include <linux/mii.h>
#include <linux/ethtool.h>
#define BUILD 0
#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
__stringify(BUILD) "-k"
+
+ enum queue_mode {
+ QUEUE_MODE_STRICT_PRIORITY,
+ QUEUE_MODE_STREAM_RESERVATION,
+ };
+
+ enum tx_queue_prio {
+ TX_QUEUE_PRIO_HIGH,
+ TX_QUEUE_PRIO_LOW,
+ };
+
char igb_driver_name[] = "igb";
char igb_driver_version[] = DRV_VERSION;
static const char igb_driver_string[] =
static void igb_clean_tx_ring(struct igb_ring *);
static void igb_clean_rx_ring(struct igb_ring *);
static void igb_set_rx_mode(struct net_device *);
- static void igb_update_phy_info(unsigned long);
- static void igb_watchdog(unsigned long);
+ static void igb_update_phy_info(struct timer_list *);
+ static void igb_watchdog(struct timer_list *);
static void igb_watchdog_task(struct work_struct *);
static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
static void igb_get_stats64(struct net_device *dev,
u32 igb_rd32(struct e1000_hw *hw, u32 reg)
{
struct igb_adapter *igb = container_of(hw, struct igb_adapter, hw);
- u8 __iomem *hw_addr = ACCESS_ONCE(hw->hw_addr);
+ u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr);
u32 value = 0;
if (E1000_REMOVED(hw_addr))
ring->count = adapter->tx_ring_count;
ring->queue_index = txr_idx;
+ ring->cbs_enable = false;
+ ring->idleslope = 0;
+ ring->sendslope = 0;
+ ring->hicredit = 0;
+ ring->locredit = 0;
+
u64_stats_init(&ring->tx_syncp);
u64_stats_init(&ring->tx_syncp2);
ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
}
+ static void enable_fqtss(struct igb_adapter *adapter, bool enable)
+ {
+ struct net_device *netdev = adapter->netdev;
+ struct e1000_hw *hw = &adapter->hw;
+
+ WARN_ON(hw->mac.type != e1000_i210);
+
+ if (enable)
+ adapter->flags |= IGB_FLAG_FQTSS;
+ else
+ adapter->flags &= ~IGB_FLAG_FQTSS;
+
+ if (netif_running(netdev))
+ schedule_work(&adapter->reset_task);
+ }
+
+ static bool is_fqtss_enabled(struct igb_adapter *adapter)
+ {
+ return (adapter->flags & IGB_FLAG_FQTSS) ? true : false;
+ }
+
+ static void set_tx_desc_fetch_prio(struct e1000_hw *hw, int queue,
+ enum tx_queue_prio prio)
+ {
+ u32 val;
+
+ WARN_ON(hw->mac.type != e1000_i210);
+ WARN_ON(queue < 0 || queue > 4);
+
+ val = rd32(E1000_I210_TXDCTL(queue));
+
+ if (prio == TX_QUEUE_PRIO_HIGH)
+ val |= E1000_TXDCTL_PRIORITY;
+ else
+ val &= ~E1000_TXDCTL_PRIORITY;
+
+ wr32(E1000_I210_TXDCTL(queue), val);
+ }
+
+ static void set_queue_mode(struct e1000_hw *hw, int queue, enum queue_mode mode)
+ {
+ u32 val;
+
+ WARN_ON(hw->mac.type != e1000_i210);
+ WARN_ON(queue < 0 || queue > 1);
+
+ val = rd32(E1000_I210_TQAVCC(queue));
+
+ if (mode == QUEUE_MODE_STREAM_RESERVATION)
+ val |= E1000_TQAVCC_QUEUEMODE;
+ else
+ val &= ~E1000_TQAVCC_QUEUEMODE;
+
+ wr32(E1000_I210_TQAVCC(queue), val);
+ }
+
+ /**
+ * igb_configure_cbs - Configure Credit-Based Shaper (CBS)
+ * @adapter: pointer to adapter struct
+ * @queue: queue number
+ * @enable: true = enable CBS, false = disable CBS
+ * @idleslope: idleSlope in kbps
+ * @sendslope: sendSlope in kbps
+ * @hicredit: hiCredit in bytes
+ * @locredit: loCredit in bytes
+ *
+ * Configure CBS for a given hardware queue. When disabling, idleslope,
+ * sendslope, hicredit, locredit arguments are ignored. Returns 0 if
+ * success. Negative otherwise.
+ **/
+ static void igb_configure_cbs(struct igb_adapter *adapter, int queue,
+ bool enable, int idleslope, int sendslope,
+ int hicredit, int locredit)
+ {
+ struct net_device *netdev = adapter->netdev;
+ struct e1000_hw *hw = &adapter->hw;
+ u32 tqavcc;
+ u16 value;
+
+ WARN_ON(hw->mac.type != e1000_i210);
+ WARN_ON(queue < 0 || queue > 1);
+
+ if (enable) {
+ set_tx_desc_fetch_prio(hw, queue, TX_QUEUE_PRIO_HIGH);
+ set_queue_mode(hw, queue, QUEUE_MODE_STREAM_RESERVATION);
+
+ /* According to i210 datasheet section 7.2.7.7, we should set
+ * the 'idleSlope' field from TQAVCC register following the
+ * equation:
+ *
+ * For 100 Mbps link speed:
+ *
+ * value = BW * 0x7735 * 0.2 (E1)
+ *
+ * For 1000Mbps link speed:
+ *
+ * value = BW * 0x7735 * 2 (E2)
+ *
+ * E1 and E2 can be merged into one equation as shown below.
+ * Note that 'link-speed' is in Mbps.
+ *
+ * value = BW * 0x7735 * 2 * link-speed
+ * -------------- (E3)
+ * 1000
+ *
+ * 'BW' is the percentage bandwidth out of full link speed
+ * which can be found with the following equation. Note that
+ * idleSlope here is the parameter from this function which
+ * is in kbps.
+ *
+ * BW = idleSlope
+ * ----------------- (E4)
+ * link-speed * 1000
+ *
+ * That said, we can come up with a generic equation to
+ * calculate the value we should set it TQAVCC register by
+ * replacing 'BW' in E3 by E4. The resulting equation is:
+ *
+ * value = idleSlope * 0x7735 * 2 * link-speed
+ * ----------------- -------------- (E5)
+ * link-speed * 1000 1000
+ *
+ * 'link-speed' is present in both sides of the fraction so
+ * it is canceled out. The final equation is the following:
+ *
+ * value = idleSlope * 61034
+ * ----------------- (E6)
+ * 1000000
+ */
+ value = DIV_ROUND_UP_ULL(idleslope * 61034ULL, 1000000);
+
+ tqavcc = rd32(E1000_I210_TQAVCC(queue));
+ tqavcc &= ~E1000_TQAVCC_IDLESLOPE_MASK;
+ tqavcc |= value;
+ wr32(E1000_I210_TQAVCC(queue), tqavcc);
+
+ wr32(E1000_I210_TQAVHC(queue), 0x80000000 + hicredit * 0x7735);
+ } else {
+ set_tx_desc_fetch_prio(hw, queue, TX_QUEUE_PRIO_LOW);
+ set_queue_mode(hw, queue, QUEUE_MODE_STRICT_PRIORITY);
+
+ /* Set idleSlope to zero. */
+ tqavcc = rd32(E1000_I210_TQAVCC(queue));
+ tqavcc &= ~E1000_TQAVCC_IDLESLOPE_MASK;
+ wr32(E1000_I210_TQAVCC(queue), tqavcc);
+
+ /* Set hiCredit to zero. */
+ wr32(E1000_I210_TQAVHC(queue), 0);
+ }
+
+ /* XXX: In i210 controller the sendSlope and loCredit parameters from
+ * CBS are not configurable by software so we don't do any 'controller
+ * configuration' in respect to these parameters.
+ */
+
+ netdev_dbg(netdev, "CBS %s: queue %d idleslope %d sendslope %d hiCredit %d locredit %d\n",
+ (enable) ? "enabled" : "disabled", queue,
+ idleslope, sendslope, hicredit, locredit);
+ }
+
+ static int igb_save_cbs_params(struct igb_adapter *adapter, int queue,
+ bool enable, int idleslope, int sendslope,
+ int hicredit, int locredit)
+ {
+ struct igb_ring *ring;
+
+ if (queue < 0 || queue > adapter->num_tx_queues)
+ return -EINVAL;
+
+ ring = adapter->tx_ring[queue];
+
+ ring->cbs_enable = enable;
+ ring->idleslope = idleslope;
+ ring->sendslope = sendslope;
+ ring->hicredit = hicredit;
+ ring->locredit = locredit;
+
+ return 0;
+ }
+
+ static bool is_any_cbs_enabled(struct igb_adapter *adapter)
+ {
+ struct igb_ring *ring;
+ int i;
+
+ for (i = 0; i < adapter->num_tx_queues; i++) {
+ ring = adapter->tx_ring[i];
+
+ if (ring->cbs_enable)
+ return true;
+ }
+
+ return false;
+ }
+
+ static void igb_setup_tx_mode(struct igb_adapter *adapter)
+ {
+ struct net_device *netdev = adapter->netdev;
+ struct e1000_hw *hw = &adapter->hw;
+ u32 val;
+
+ /* Only i210 controller supports changing the transmission mode. */
+ if (hw->mac.type != e1000_i210)
+ return;
+
+ if (is_fqtss_enabled(adapter)) {
+ int i, max_queue;
+
+ /* Configure TQAVCTRL register: set transmit mode to 'Qav',
+ * set data fetch arbitration to 'round robin' and set data
+ * transfer arbitration to 'credit shaper algorithm.
+ */
+ val = rd32(E1000_I210_TQAVCTRL);
+ val |= E1000_TQAVCTRL_XMIT_MODE | E1000_TQAVCTRL_DATATRANARB;
+ val &= ~E1000_TQAVCTRL_DATAFETCHARB;
+ wr32(E1000_I210_TQAVCTRL, val);
+
+ /* Configure Tx and Rx packet buffers sizes as described in
+ * i210 datasheet section 7.2.7.7.
+ */
+ val = rd32(E1000_TXPBS);
+ val &= ~I210_TXPBSIZE_MASK;
+ val |= I210_TXPBSIZE_PB0_8KB | I210_TXPBSIZE_PB1_8KB |
+ I210_TXPBSIZE_PB2_4KB | I210_TXPBSIZE_PB3_4KB;
+ wr32(E1000_TXPBS, val);
+
+ val = rd32(E1000_RXPBS);
+ val &= ~I210_RXPBSIZE_MASK;
+ val |= I210_RXPBSIZE_PB_32KB;
+ wr32(E1000_RXPBS, val);
+
+ /* Section 8.12.9 states that MAX_TPKT_SIZE from DTXMXPKTSZ
+ * register should not exceed the buffer size programmed in
+ * TXPBS. The smallest buffer size programmed in TXPBS is 4kB
+ * so according to the datasheet we should set MAX_TPKT_SIZE to
+ * 4kB / 64.
+ *
+ * However, when we do so, no frame from queue 2 and 3 are
+ * transmitted. It seems the MAX_TPKT_SIZE should not be great
+ * or _equal_ to the buffer size programmed in TXPBS. For this
+ * reason, we set set MAX_ TPKT_SIZE to (4kB - 1) / 64.
+ */
+ val = (4096 - 1) / 64;
+ wr32(E1000_I210_DTXMXPKTSZ, val);
+
+ /* Since FQTSS mode is enabled, apply any CBS configuration
+ * previously set. If no previous CBS configuration has been
+ * done, then the initial configuration is applied, which means
+ * CBS is disabled.
+ */
+ max_queue = (adapter->num_tx_queues < I210_SR_QUEUES_NUM) ?
+ adapter->num_tx_queues : I210_SR_QUEUES_NUM;
+
+ for (i = 0; i < max_queue; i++) {
+ struct igb_ring *ring = adapter->tx_ring[i];
+
+ igb_configure_cbs(adapter, i, ring->cbs_enable,
+ ring->idleslope, ring->sendslope,
+ ring->hicredit, ring->locredit);
+ }
+ } else {
+ wr32(E1000_RXPBS, I210_RXPBSIZE_DEFAULT);
+ wr32(E1000_TXPBS, I210_TXPBSIZE_DEFAULT);
+ wr32(E1000_I210_DTXMXPKTSZ, I210_DTXMXPKTSZ_DEFAULT);
+
+ val = rd32(E1000_I210_TQAVCTRL);
+ /* According to Section 8.12.21, the other flags we've set when
+ * enabling FQTSS are not relevant when disabling FQTSS so we
+ * don't set they here.
+ */
+ val &= ~E1000_TQAVCTRL_XMIT_MODE;
+ wr32(E1000_I210_TQAVCTRL, val);
+ }
+
+ netdev_dbg(netdev, "FQTSS %s\n", (is_fqtss_enabled(adapter)) ?
+ "enabled" : "disabled");
+ }
+
/**
* igb_configure - configure the hardware for RX and TX
* @adapter: private board structure
igb_get_hw_control(adapter);
igb_set_rx_mode(netdev);
+ igb_setup_tx_mode(adapter);
igb_restore_vlan(adapter);
return features;
}
+ static int igb_offload_cbs(struct igb_adapter *adapter,
+ struct tc_cbs_qopt_offload *qopt)
+ {
+ struct e1000_hw *hw = &adapter->hw;
+ int err;
+
+ /* CBS offloading is only supported by i210 controller. */
+ if (hw->mac.type != e1000_i210)
+ return -EOPNOTSUPP;
+
+ /* CBS offloading is only supported by queue 0 and queue 1. */
+ if (qopt->queue < 0 || qopt->queue > 1)
+ return -EINVAL;
+
+ err = igb_save_cbs_params(adapter, qopt->queue, qopt->enable,
+ qopt->idleslope, qopt->sendslope,
+ qopt->hicredit, qopt->locredit);
+ if (err)
+ return err;
+
+ if (is_fqtss_enabled(adapter)) {
+ igb_configure_cbs(adapter, qopt->queue, qopt->enable,
+ qopt->idleslope, qopt->sendslope,
+ qopt->hicredit, qopt->locredit);
+
+ if (!is_any_cbs_enabled(adapter))
+ enable_fqtss(adapter, false);
+
+ } else {
+ enable_fqtss(adapter, true);
+ }
+
+ return 0;
+ }
+
+ static int igb_setup_tc(struct net_device *dev, enum tc_setup_type type,
+ void *type_data)
+ {
+ struct igb_adapter *adapter = netdev_priv(dev);
+
+ switch (type) {
+ case TC_SETUP_QDISC_CBS:
+ return igb_offload_cbs(adapter, type_data);
+
+ default:
+ return -EOPNOTSUPP;
+ }
+ }
+
static const struct net_device_ops igb_netdev_ops = {
.ndo_open = igb_open,
.ndo_stop = igb_close,
.ndo_set_features = igb_set_features,
.ndo_fdb_add = igb_ndo_fdb_add,
.ndo_features_check = igb_features_check,
+ .ndo_setup_tc = igb_setup_tc,
};
/**
wr32(E1000_TXPBS, I210_TXPBSIZE_DEFAULT);
}
- setup_timer(&adapter->watchdog_timer, igb_watchdog,
- (unsigned long) adapter);
- setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
- (unsigned long) adapter);
+ timer_setup(&adapter->watchdog_timer, igb_watchdog, 0);
+ timer_setup(&adapter->phy_info_timer, igb_update_phy_info, 0);
INIT_WORK(&adapter->reset_task, igb_reset_task);
INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
/* Setup and initialize a copy of the hw vlan table array */
adapter->shadow_vfta = kcalloc(E1000_VLAN_FILTER_TBL_SIZE, sizeof(u32),
GFP_ATOMIC);
+ if (!adapter->shadow_vfta)
+ return -ENOMEM;
/* This call may decrease the number of queues */
if (igb_init_interrupt_scheme(adapter, true)) {
/* Need to wait a few seconds after link up to get diagnostic information from
* the phy
*/
- static void igb_update_phy_info(unsigned long data)
+ static void igb_update_phy_info(struct timer_list *t)
{
- struct igb_adapter *adapter = (struct igb_adapter *) data;
+ struct igb_adapter *adapter = from_timer(adapter, t, phy_info_timer);
igb_get_phy_info(&adapter->hw);
}
* igb_watchdog - Timer Call-back
* @data: pointer to adapter cast into an unsigned long
**/
- static void igb_watchdog(unsigned long data)
+ static void igb_watchdog(struct timer_list *t)
{
- struct igb_adapter *adapter = (struct igb_adapter *)data;
+ struct igb_adapter *adapter = from_timer(adapter, t, watchdog_timer);
/* Do the rest outside of interrupt context */
schedule_work(&adapter->watchdog_task);
}
*/
u32 ixgbe_read_reg(struct ixgbe_hw *hw, u32 reg)
{
- u8 __iomem *reg_addr = ACCESS_ONCE(hw->hw_addr);
+ u8 __iomem *reg_addr = READ_ONCE(hw->hw_addr);
u32 value;
if (ixgbe_removed(reg_addr))
bi->page = page;
bi->page_offset = ixgbe_rx_offset(rx_ring);
bi->pagecnt_bias = 1;
+ rx_ring->rx_stats.alloc_rx_page++;
return true;
}
#if L1_CACHE_BYTES < 128
prefetch(xdp->data + L1_CACHE_BYTES);
#endif
+ /* Note, we get here by enabling legacy-rx via:
+ *
+ * ethtool --set-priv-flags <dev> legacy-rx on
+ *
+ * In this mode, we currently get 0 extra XDP headroom as
+ * opposed to having legacy-rx off, where we process XDP
+ * packets going to stack via ixgbe_build_skb(). The latter
+ * provides us currently with 192 bytes of headroom.
+ *
+ * For ixgbe_construct_skb() mode it means that the
+ * xdp->data_meta will always point to xdp->data, since
+ * the helper cannot expand the head. Should this ever
+ * change in future for legacy-rx mode on, then lets also
+ * add xdp->data_meta handling here.
+ */
/* allocate a skb to store the frags */
skb = napi_alloc_skb(&rx_ring->q_vector->napi, IXGBE_RX_HDR_SIZE);
struct xdp_buff *xdp,
union ixgbe_adv_rx_desc *rx_desc)
{
+ unsigned int metasize = xdp->data - xdp->data_meta;
#if (PAGE_SIZE < 8192)
unsigned int truesize = ixgbe_rx_pg_size(rx_ring) / 2;
#else
#endif
struct sk_buff *skb;
- /* prefetch first cache line of first page */
- prefetch(xdp->data);
+ /* Prefetch first cache line of first page. If xdp->data_meta
+ * is unused, this points extactly as xdp->data, otherwise we
+ * likely have a consumer accessing first few bytes of meta
+ * data, and then actual data.
+ */
+ prefetch(xdp->data_meta);
#if L1_CACHE_BYTES < 128
- prefetch(xdp->data + L1_CACHE_BYTES);
+ prefetch(xdp->data_meta + L1_CACHE_BYTES);
#endif
/* build an skb to around the page buffer */
/* update pointers within the skb to store the data */
skb_reserve(skb, xdp->data - xdp->data_hard_start);
__skb_put(skb, xdp->data_end - xdp->data);
+ if (metasize)
+ skb_metadata_set(skb, metasize);
/* record DMA address if this is the start of a chain of buffers */
if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))
if (!skb) {
xdp.data = page_address(rx_buffer->page) +
rx_buffer->page_offset;
+ xdp.data_meta = xdp.data;
xdp.data_hard_start = xdp.data -
ixgbe_rx_offset(rx_ring);
xdp.data_end = xdp.data + size;
static void ixgbe_update_itr(struct ixgbe_q_vector *q_vector,
struct ixgbe_ring_container *ring_container)
{
- int bytes = ring_container->total_bytes;
- int packets = ring_container->total_packets;
- u32 timepassed_us;
- u64 bytes_perint;
- u8 itr_setting = ring_container->itr;
+ unsigned int itr = IXGBE_ITR_ADAPTIVE_MIN_USECS |
+ IXGBE_ITR_ADAPTIVE_LATENCY;
+ unsigned int avg_wire_size, packets, bytes;
+ unsigned long next_update = jiffies;
- if (packets == 0)
+ /* If we don't have any rings just leave ourselves set for maximum
+ * possible latency so we take ourselves out of the equation.
+ */
+ if (!ring_container->ring)
return;
- /* simple throttlerate management
- * 0-10MB/s lowest (100000 ints/s)
- * 10-20MB/s low (20000 ints/s)
- * 20-1249MB/s bulk (12000 ints/s)
+ /* If we didn't update within up to 1 - 2 jiffies we can assume
+ * that either packets are coming in so slow there hasn't been
+ * any work, or that there is so much work that NAPI is dealing
+ * with interrupt moderation and we don't need to do anything.
*/
- /* what was last interrupt timeslice? */
- timepassed_us = q_vector->itr >> 2;
- if (timepassed_us == 0)
- return;
+ if (time_after(next_update, ring_container->next_update))
+ goto clear_counts;
- bytes_perint = bytes / timepassed_us; /* bytes/usec */
+ packets = ring_container->total_packets;
- switch (itr_setting) {
- case lowest_latency:
- if (bytes_perint > 10)
- itr_setting = low_latency;
- break;
- case low_latency:
- if (bytes_perint > 20)
- itr_setting = bulk_latency;
- else if (bytes_perint <= 10)
- itr_setting = lowest_latency;
+ /* We have no packets to actually measure against. This means
+ * either one of the other queues on this vector is active or
+ * we are a Tx queue doing TSO with too high of an interrupt rate.
+ *
+ * When this occurs just tick up our delay by the minimum value
+ * and hope that this extra delay will prevent us from being called
+ * without any work on our queue.
+ */
+ if (!packets) {
+ itr = (q_vector->itr >> 2) + IXGBE_ITR_ADAPTIVE_MIN_INC;
+ if (itr > IXGBE_ITR_ADAPTIVE_MAX_USECS)
+ itr = IXGBE_ITR_ADAPTIVE_MAX_USECS;
+ itr += ring_container->itr & IXGBE_ITR_ADAPTIVE_LATENCY;
+ goto clear_counts;
+ }
+
+ bytes = ring_container->total_bytes;
+
+ /* If packets are less than 4 or bytes are less than 9000 assume
+ * insufficient data to use bulk rate limiting approach. We are
+ * likely latency driven.
+ */
+ if (packets < 4 && bytes < 9000) {
+ itr = IXGBE_ITR_ADAPTIVE_LATENCY;
+ goto adjust_by_size;
+ }
+
+ /* Between 4 and 48 we can assume that our current interrupt delay
+ * is only slightly too low. As such we should increase it by a small
+ * fixed amount.
+ */
+ if (packets < 48) {
+ itr = (q_vector->itr >> 2) + IXGBE_ITR_ADAPTIVE_MIN_INC;
+ if (itr > IXGBE_ITR_ADAPTIVE_MAX_USECS)
+ itr = IXGBE_ITR_ADAPTIVE_MAX_USECS;
+ goto clear_counts;
+ }
+
+ /* Between 48 and 96 is our "goldilocks" zone where we are working
+ * out "just right". Just report that our current ITR is good for us.
+ */
+ if (packets < 96) {
+ itr = q_vector->itr >> 2;
+ goto clear_counts;
+ }
+
+ /* If packet count is 96 or greater we are likely looking at a slight
+ * overrun of the delay we want. Try halving our delay to see if that
+ * will cut the number of packets in half per interrupt.
+ */
+ if (packets < 256) {
+ itr = q_vector->itr >> 3;
+ if (itr < IXGBE_ITR_ADAPTIVE_MIN_USECS)
+ itr = IXGBE_ITR_ADAPTIVE_MIN_USECS;
+ goto clear_counts;
+ }
+
+ /* The paths below assume we are dealing with a bulk ITR since number
+ * of packets is 256 or greater. We are just going to have to compute
+ * a value and try to bring the count under control, though for smaller
+ * packet sizes there isn't much we can do as NAPI polling will likely
+ * be kicking in sooner rather than later.
+ */
+ itr = IXGBE_ITR_ADAPTIVE_BULK;
+
+ adjust_by_size:
+ /* If packet counts are 256 or greater we can assume we have a gross
+ * overestimation of what the rate should be. Instead of trying to fine
+ * tune it just use the formula below to try and dial in an exact value
+ * give the current packet size of the frame.
+ */
+ avg_wire_size = bytes / packets;
+
+ /* The following is a crude approximation of:
+ * wmem_default / (size + overhead) = desired_pkts_per_int
+ * rate / bits_per_byte / (size + ethernet overhead) = pkt_rate
+ * (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value
+ *
+ * Assuming wmem_default is 212992 and overhead is 640 bytes per
+ * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the
+ * formula down to
+ *
+ * (170 * (size + 24)) / (size + 640) = ITR
+ *
+ * We first do some math on the packet size and then finally bitshift
+ * by 8 after rounding up. We also have to account for PCIe link speed
+ * difference as ITR scales based on this.
+ */
+ if (avg_wire_size <= 60) {
+ /* Start at 50k ints/sec */
+ avg_wire_size = 5120;
+ } else if (avg_wire_size <= 316) {
+ /* 50K ints/sec to 16K ints/sec */
+ avg_wire_size *= 40;
+ avg_wire_size += 2720;
+ } else if (avg_wire_size <= 1084) {
+ /* 16K ints/sec to 9.2K ints/sec */
+ avg_wire_size *= 15;
+ avg_wire_size += 11452;
+ } else if (avg_wire_size <= 1980) {
+ /* 9.2K ints/sec to 8K ints/sec */
+ avg_wire_size *= 5;
+ avg_wire_size += 22420;
+ } else {
+ /* plateau at a limit of 8K ints/sec */
+ avg_wire_size = 32256;
+ }
+
+ /* If we are in low latency mode half our delay which doubles the rate
+ * to somewhere between 100K to 16K ints/sec
+ */
+ if (itr & IXGBE_ITR_ADAPTIVE_LATENCY)
+ avg_wire_size >>= 1;
+
+ /* Resultant value is 256 times larger than it needs to be. This
+ * gives us room to adjust the value as needed to either increase
+ * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc.
+ *
+ * Use addition as we have already recorded the new latency flag
+ * for the ITR value.
+ */
+ switch (q_vector->adapter->link_speed) {
+ case IXGBE_LINK_SPEED_10GB_FULL:
+ case IXGBE_LINK_SPEED_100_FULL:
+ default:
+ itr += DIV_ROUND_UP(avg_wire_size,
+ IXGBE_ITR_ADAPTIVE_MIN_INC * 256) *
+ IXGBE_ITR_ADAPTIVE_MIN_INC;
break;
- case bulk_latency:
- if (bytes_perint <= 20)
- itr_setting = low_latency;
+ case IXGBE_LINK_SPEED_2_5GB_FULL:
+ case IXGBE_LINK_SPEED_1GB_FULL:
+ case IXGBE_LINK_SPEED_10_FULL:
+ itr += DIV_ROUND_UP(avg_wire_size,
+ IXGBE_ITR_ADAPTIVE_MIN_INC * 64) *
+ IXGBE_ITR_ADAPTIVE_MIN_INC;
break;
}
- /* clear work counters since we have the values we need */
+ clear_counts:
+ /* write back value */
+ ring_container->itr = itr;
+
+ /* next update should occur within next jiffy */
+ ring_container->next_update = next_update + 1;
+
ring_container->total_bytes = 0;
ring_container->total_packets = 0;
-
- /* write updated itr to ring container */
- ring_container->itr = itr_setting;
}
/**
static void ixgbe_set_itr(struct ixgbe_q_vector *q_vector)
{
- u32 new_itr = q_vector->itr;
- u8 current_itr;
+ u32 new_itr;
ixgbe_update_itr(q_vector, &q_vector->tx);
ixgbe_update_itr(q_vector, &q_vector->rx);
- current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
+ /* use the smallest value of new ITR delay calculations */
+ new_itr = min(q_vector->rx.itr, q_vector->tx.itr);
- switch (current_itr) {
- /* counts and packets in update_itr are dependent on these numbers */
- case lowest_latency:
- new_itr = IXGBE_100K_ITR;
- break;
- case low_latency:
- new_itr = IXGBE_20K_ITR;
- break;
- case bulk_latency:
- new_itr = IXGBE_12K_ITR;
- break;
- default:
- break;
- }
+ /* Clear latency flag if set, shift into correct position */
+ new_itr &= ~IXGBE_ITR_ADAPTIVE_LATENCY;
+ new_itr <<= 2;
if (new_itr != q_vector->itr) {
- /* do an exponential smoothing */
- new_itr = (10 * new_itr * q_vector->itr) /
- ((9 * new_itr) + q_vector->itr);
-
/* save the algorithm value here */
q_vector->itr = new_itr;
u32 i, missed_rx = 0, mpc, bprc, lxon, lxoff, xon_off_tot;
u64 non_eop_descs = 0, restart_queue = 0, tx_busy = 0;
u64 alloc_rx_page_failed = 0, alloc_rx_buff_failed = 0;
+ u64 alloc_rx_page = 0;
u64 bytes = 0, packets = 0, hw_csum_rx_error = 0;
if (test_bit(__IXGBE_DOWN, &adapter->state) ||
for (i = 0; i < adapter->num_rx_queues; i++) {
struct ixgbe_ring *rx_ring = adapter->rx_ring[i];
non_eop_descs += rx_ring->rx_stats.non_eop_descs;
+ alloc_rx_page += rx_ring->rx_stats.alloc_rx_page;
alloc_rx_page_failed += rx_ring->rx_stats.alloc_rx_page_failed;
alloc_rx_buff_failed += rx_ring->rx_stats.alloc_rx_buff_failed;
hw_csum_rx_error += rx_ring->rx_stats.csum_err;
packets += rx_ring->stats.packets;
}
adapter->non_eop_descs = non_eop_descs;
+ adapter->alloc_rx_page = alloc_rx_page;
adapter->alloc_rx_page_failed = alloc_rx_page_failed;
adapter->alloc_rx_buff_failed = alloc_rx_buff_failed;
adapter->hw_csum_rx_error = hw_csum_rx_error;
* ixgbe_service_timer - Timer Call-back
* @data: pointer to adapter cast into an unsigned long
**/
- static void ixgbe_service_timer(unsigned long data)
+ static void ixgbe_service_timer(struct timer_list *t)
{
- struct ixgbe_adapter *adapter = (struct ixgbe_adapter *)data;
+ struct ixgbe_adapter *adapter = from_timer(adapter, t, service_timer);
unsigned long next_event_offset;
/* poll faster when waiting for link */
rcu_read_lock();
for (i = 0; i < adapter->num_rx_queues; i++) {
- struct ixgbe_ring *ring = ACCESS_ONCE(adapter->rx_ring[i]);
+ struct ixgbe_ring *ring = READ_ONCE(adapter->rx_ring[i]);
u64 bytes, packets;
unsigned int start;
}
for (i = 0; i < adapter->num_tx_queues; i++) {
- struct ixgbe_ring *ring = ACCESS_ONCE(adapter->tx_ring[i]);
+ struct ixgbe_ring *ring = READ_ONCE(adapter->tx_ring[i]);
ixgbe_get_ring_stats64(stats, ring);
}
for (i = 0; i < adapter->num_xdp_queues; i++) {
- struct ixgbe_ring *ring = ACCESS_ONCE(adapter->xdp_ring[i]);
+ struct ixgbe_ring *ring = READ_ONCE(adapter->xdp_ring[i]);
ixgbe_get_ring_stats64(stats, ring);
}
return err;
}
- static int ixgbe_setup_tc_cls_u32(struct net_device *dev,
+ static int ixgbe_setup_tc_cls_u32(struct ixgbe_adapter *adapter,
struct tc_cls_u32_offload *cls_u32)
{
- struct ixgbe_adapter *adapter = netdev_priv(dev);
-
- if (!is_classid_clsact_ingress(cls_u32->common.classid) ||
- cls_u32->common.chain_index)
+ if (cls_u32->common.chain_index)
return -EOPNOTSUPP;
switch (cls_u32->command) {
}
}
+ static int ixgbe_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+ void *cb_priv)
+ {
+ struct ixgbe_adapter *adapter = cb_priv;
+
+ if (!tc_can_offload(adapter->netdev))
+ return -EOPNOTSUPP;
+
+ switch (type) {
+ case TC_SETUP_CLSU32:
+ return ixgbe_setup_tc_cls_u32(adapter, type_data);
+ default:
+ return -EOPNOTSUPP;
+ }
+ }
+
+ static int ixgbe_setup_tc_block(struct net_device *dev,
+ struct tc_block_offload *f)
+ {
+ struct ixgbe_adapter *adapter = netdev_priv(dev);
+
+ if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+ return -EOPNOTSUPP;
+
+ switch (f->command) {
+ case TC_BLOCK_BIND:
+ return tcf_block_cb_register(f->block, ixgbe_setup_tc_block_cb,
+ adapter, adapter);
+ case TC_BLOCK_UNBIND:
+ tcf_block_cb_unregister(f->block, ixgbe_setup_tc_block_cb,
+ adapter);
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+ }
+
static int ixgbe_setup_tc_mqprio(struct net_device *dev,
struct tc_mqprio_qopt *mqprio)
{
void *type_data)
{
switch (type) {
- case TC_SETUP_CLSU32:
- return ixgbe_setup_tc_cls_u32(dev, type_data);
- case TC_SETUP_MQPRIO:
+ case TC_SETUP_BLOCK:
+ return ixgbe_setup_tc_block(dev, type_data);
+ case TC_SETUP_QDISC_MQPRIO:
return ixgbe_setup_tc_mqprio(dev, type_data);
default:
return -EOPNOTSUPP;
limit = find_last_bit(&adapter->fwd_bitmask, 32);
adapter->ring_feature[RING_F_VMDQ].limit = limit + 1;
ixgbe_fwd_ring_down(fwd_adapter->netdev, fwd_adapter);
+
+ /* go back to full RSS if we're done with our VMQs */
+ if (adapter->ring_feature[RING_F_VMDQ].limit == 1) {
+ int rss = min_t(int, ixgbe_max_rss_indices(adapter),
+ num_online_cpus());
+
+ adapter->flags &= ~IXGBE_FLAG_VMDQ_ENABLED;
+ adapter->flags &= ~IXGBE_FLAG_SRIOV_ENABLED;
+ adapter->ring_feature[RING_F_RSS].limit = rss;
+ }
+
ixgbe_setup_tc(pdev, netdev_get_num_tc(pdev));
netdev_dbg(pdev, "pool %i:%i queues %i:%i VSI bitmask %lx\n",
fwd_adapter->pool, adapter->num_rx_pools,
return 0;
}
- static int ixgbe_xdp(struct net_device *dev, struct netdev_xdp *xdp)
+ static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp)
{
struct ixgbe_adapter *adapter = netdev_priv(dev);
.ndo_udp_tunnel_add = ixgbe_add_udp_tunnel_port,
.ndo_udp_tunnel_del = ixgbe_del_udp_tunnel_port,
.ndo_features_check = ixgbe_features_check,
- .ndo_xdp = ixgbe_xdp,
+ .ndo_bpf = ixgbe_xdp,
.ndo_xdp_xmit = ixgbe_xdp_xmit,
.ndo_xdp_flush = ixgbe_xdp_flush,
};
ether_addr_copy(hw->mac.addr, hw->mac.perm_addr);
ixgbe_mac_set_default_filter(adapter);
- setup_timer(&adapter->service_timer, &ixgbe_service_timer,
- (unsigned long) adapter);
+ timer_setup(&adapter->service_timer, ixgbe_service_timer, 0);
if (ixgbe_removed(hw->hw_addr)) {
err = -EIO;
#endif /* CONFIG_PCI_IOV */
if (!test_bit(__IXGBE_SERVICE_INITED, &adapter->state))
return PCI_ERS_RESULT_DISCONNECT;
+
+ if (!netif_device_present(netdev))
+ return PCI_ERS_RESULT_DISCONNECT;
rtnl_lock();
netif_device_detach(netdev);
u32 ixgbevf_read_reg(struct ixgbe_hw *hw, u32 reg)
{
- u8 __iomem *reg_addr = ACCESS_ONCE(hw->hw_addr);
+ u8 __iomem *reg_addr = READ_ONCE(hw->hw_addr);
u32 value;
if (IXGBE_REMOVED(reg_addr))
* ixgbevf_service_timer - Timer Call-back
* @data: pointer to adapter cast into an unsigned long
**/
- static void ixgbevf_service_timer(unsigned long data)
+ static void ixgbevf_service_timer(struct timer_list *t)
{
- struct ixgbevf_adapter *adapter = (struct ixgbevf_adapter *)data;
+ struct ixgbevf_adapter *adapter = from_timer(adapter, t,
+ service_timer);
/* Reset the timer */
mod_timer(&adapter->service_timer, (HZ * 2) + jiffies);
goto err_sw_init;
}
- setup_timer(&adapter->service_timer, &ixgbevf_service_timer,
- (unsigned long)adapter);
+ timer_setup(&adapter->service_timer, ixgbevf_service_timer, 0);
INIT_WORK(&adapter->service_task, ixgbevf_service_task);
set_bit(__IXGBEVF_SERVICE_INITED, &adapter->state);
index = cons_index & size_mask;
cqe = mlx4_en_get_cqe(buf, index, priv->cqe_size) + factor;
- last_nr_txbb = ACCESS_ONCE(ring->last_nr_txbb);
- ring_cons = ACCESS_ONCE(ring->cons);
+ last_nr_txbb = READ_ONCE(ring->last_nr_txbb);
+ ring_cons = READ_ONCE(ring->cons);
ring_index = ring_cons & size_mask;
stamp_index = ring_index;
wmb();
/* we want to dirty this cache line once */
- ACCESS_ONCE(ring->last_nr_txbb) = last_nr_txbb;
- ACCESS_ONCE(ring->cons) = ring_cons + txbbs_skipped;
+ WRITE_ONCE(ring->last_nr_txbb, last_nr_txbb);
+ WRITE_ONCE(ring->cons, ring_cons + txbbs_skipped);
if (cq->type == TX_XDP)
return done < budget;
#else
iowrite32be(
#endif
- ring->doorbell_qpn,
+ (__force u32)ring->doorbell_qpn,
ring->bf.uar->map + MLX4_SEND_DOORBELL);
}
goto tx_drop;
/* fetch ring->cons far ahead before needing it to avoid stall */
- ring_cons = ACCESS_ONCE(ring->cons);
+ ring_cons = READ_ONCE(ring->cons);
real_size = get_real_size(skb, shinfo, dev, &lso_header_size,
&inline_ok, &fragptr);
*/
smp_rmb();
- ring_cons = ACCESS_ONCE(ring->cons);
+ ring_cons = READ_ONCE(ring->cons);
if (unlikely(!mlx4_en_is_tx_ring_full(ring))) {
netif_tx_wake_queue(ring->tx_queue);
ring->wake_queue++;
#define MLX4_EN_XDP_TX_REAL_SZ (((CTRL_SIZE + MLX4_EN_XDP_TX_NRTXBB * DS_SIZE) \
/ 16) & 0x3f)
+ void mlx4_en_init_tx_xdp_ring_descs(struct mlx4_en_priv *priv,
+ struct mlx4_en_tx_ring *ring)
+ {
+ int i;
+
+ for (i = 0; i < ring->size; i++) {
+ struct mlx4_en_tx_info *tx_info = &ring->tx_info[i];
+ struct mlx4_en_tx_desc *tx_desc = ring->buf +
+ (i << LOG_TXBB_SIZE);
+
+ tx_info->map0_byte_count = PAGE_SIZE;
+ tx_info->nr_txbb = MLX4_EN_XDP_TX_NRTXBB;
+ tx_info->data_offset = offsetof(struct mlx4_en_tx_desc, data);
+ tx_info->ts_requested = 0;
+ tx_info->nr_maps = 1;
+ tx_info->linear = 1;
+ tx_info->inl = 0;
+
+ tx_desc->data.lkey = ring->mr_key;
+ tx_desc->ctrl.qpn_vlan.fence_size = MLX4_EN_XDP_TX_REAL_SZ;
+ tx_desc->ctrl.srcrb_flags = priv->ctrl_flags;
+ }
+ }
+
netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
struct mlx4_en_rx_alloc *frame,
- struct net_device *dev, unsigned int length,
+ struct mlx4_en_priv *priv, unsigned int length,
int tx_ind, bool *doorbell_pending)
{
- struct mlx4_en_priv *priv = netdev_priv(dev);
- union mlx4_wqe_qpn_vlan qpn_vlan = {};
struct mlx4_en_tx_desc *tx_desc;
struct mlx4_en_tx_info *tx_info;
struct mlx4_wqe_data_seg *data;
tx_info->page = frame->page;
frame->page = NULL;
tx_info->map0_dma = dma;
- tx_info->map0_byte_count = PAGE_SIZE;
- tx_info->nr_txbb = MLX4_EN_XDP_TX_NRTXBB;
tx_info->nr_bytes = max_t(unsigned int, length, ETH_ZLEN);
- tx_info->data_offset = offsetof(struct mlx4_en_tx_desc, data);
- tx_info->ts_requested = 0;
- tx_info->nr_maps = 1;
- tx_info->linear = 1;
- tx_info->inl = 0;
dma_sync_single_range_for_device(priv->ddev, dma, frame->page_offset,
length, PCI_DMA_TODEVICE);
data->addr = cpu_to_be64(dma + frame->page_offset);
- data->lkey = ring->mr_key;
dma_wmb();
data->byte_count = cpu_to_be32(length);
/* tx completion can avoid cache line miss for common cases */
- tx_desc->ctrl.srcrb_flags = priv->ctrl_flags;
op_own = cpu_to_be32(MLX4_OPCODE_SEND) |
((ring->prod & ring->size) ?
ring->prod += MLX4_EN_XDP_TX_NRTXBB;
- qpn_vlan.fence_size = MLX4_EN_XDP_TX_REAL_SZ;
+ /* Ensure new descriptor hits memory
+ * before setting ownership of this descriptor to HW
+ */
+ dma_wmb();
+ tx_desc->ctrl.owner_opcode = op_own;
+ ring->xmit_more++;
- mlx4_en_tx_write_desc(ring, tx_desc, qpn_vlan, TXBB_SIZE, 0,
- op_own, false, false);
*doorbell_pending = true;
return NETDEV_TX_OK;
struct netdev_hw_addr *ha;
struct vxgedev *vdev;
int i, mcast_cnt = 0;
- struct __vxge_hw_device *hldev;
struct vxge_vpath *vpath;
enum vxge_hw_status status = VXGE_HW_OK;
struct macInfo mac_info;
"%s:%d", __func__, __LINE__);
vdev = netdev_priv(dev);
- hldev = vdev->devh;
if (unlikely(!is_vxge_card_up(vdev)))
return;
{
struct sockaddr *addr = p;
struct vxgedev *vdev;
- struct __vxge_hw_device *hldev;
enum vxge_hw_status status = VXGE_HW_OK;
struct macInfo mac_info_new, mac_info_old;
int vpath_idx = 0;
vxge_debug_entryexit(VXGE_TRACE, "%s:%d", __func__, __LINE__);
vdev = netdev_priv(dev);
- hldev = vdev->devh;
if (!is_valid_ether_addr(addr->sa_data))
return -EINVAL;
*/
static irqreturn_t vxge_isr_napi(int irq, void *dev_id)
{
- struct net_device *dev;
struct __vxge_hw_device *hldev;
u64 reason;
enum vxge_hw_status status;
vxge_debug_intr(VXGE_TRACE, "%s:%d", __func__, __LINE__);
- dev = vdev->ndev;
hldev = pci_get_drvdata(vdev->pdev);
if (pci_channel_offline(vdev->pdev))
return VXGE_HW_OK;
}
- static void vxge_poll_vp_reset(unsigned long data)
+ static void vxge_poll_vp_reset(struct timer_list *t)
{
- struct vxgedev *vdev = (struct vxgedev *)data;
+ struct vxgedev *vdev = from_timer(vdev, t, vp_reset_timer);
int i, j = 0;
for (i = 0; i < vdev->no_of_vpath; i++) {
mod_timer(&vdev->vp_reset_timer, jiffies + HZ / 2);
}
- static void vxge_poll_vp_lockup(unsigned long data)
+ static void vxge_poll_vp_lockup(struct timer_list *t)
{
- struct vxgedev *vdev = (struct vxgedev *)data;
+ struct vxgedev *vdev = from_timer(vdev, t, vp_lockup_timer);
enum vxge_hw_status status = VXGE_HW_OK;
struct vxge_vpath *vpath;
struct vxge_ring *ring;
ring = &vdev->vpaths[i].ring;
/* Truncated to machine word size number of frames */
- rx_frms = ACCESS_ONCE(ring->stats.rx_frms);
+ rx_frms = READ_ONCE(ring->stats.rx_frms);
/* Did this vpath received any packets */
if (ring->stats.prev_rx_frms == rx_frms) {
struct vxge_vpath *vpath;
int ret = 0;
int i;
- u64 val64, function_mode;
+ u64 val64;
vxge_debug_entryexit(VXGE_TRACE,
"%s: %s:%d", dev->name, __func__, __LINE__);
vdev = netdev_priv(dev);
hldev = pci_get_drvdata(vdev->pdev);
- function_mode = vdev->config.device_hw_info.function_mode;
/* make sure you have link off by default every time Nic is
* initialized */
vdev->config.rx_pause_enable);
if (vdev->vp_reset_timer.function == NULL)
- vxge_os_timer(&vdev->vp_reset_timer, vxge_poll_vp_reset, vdev,
+ vxge_os_timer(&vdev->vp_reset_timer, vxge_poll_vp_reset,
HZ / 2);
/* There is no need to check for RxD leak and RxD lookup on Titan1A */
if (vdev->titan1 && vdev->vp_lockup_timer.function == NULL)
- vxge_os_timer(&vdev->vp_lockup_timer, vxge_poll_vp_lockup, vdev,
+ vxge_os_timer(&vdev->vp_lockup_timer, vxge_poll_vp_lockup,
HZ / 2);
set_bit(__VXGE_STATE_CARD_UP, &vdev->state);
efx->rx_packet_len_offset =
ES_DZ_RX_PREFIX_PKTLEN_OFST - ES_DZ_RX_PREFIX_SIZE;
+ if (nic_data->datapath_caps &
+ (1 << MC_CMD_GET_CAPABILITIES_OUT_RX_INCLUDE_FCS_LBN))
+ efx->net_dev->hw_features |= NETIF_F_RXFCS;
+
rc = efx_mcdi_port_get_number(efx);
if (rc < 0)
goto fail5;
netif_vdbg(efx, intr, efx->net_dev,
"IRQ %d on CPU %d\n", irq, raw_smp_processor_id());
- if (likely(ACCESS_ONCE(efx->irq_soft_enabled))) {
+ if (likely(READ_ONCE(efx->irq_soft_enabled))) {
/* Note test interrupts */
if (context->index == efx->irq_level)
efx->last_irq_cpu = raw_smp_processor_id();
static irqreturn_t efx_ef10_legacy_interrupt(int irq, void *dev_id)
{
struct efx_nic *efx = dev_id;
- bool soft_enabled = ACCESS_ONCE(efx->irq_soft_enabled);
+ bool soft_enabled = READ_ONCE(efx->irq_soft_enabled);
struct efx_channel *channel;
efx_dword_t reg;
u32 queues;
const efx_qword_t *event)
{
struct efx_nic *efx = channel->efx;
+ bool handled = false;
if (EFX_QWORD_FIELD(*event, ESF_DZ_RX_ECRC_ERR)) {
- if (!efx->loopback_selftest)
- channel->n_rx_eth_crc_err += n_packets;
- return EFX_RX_PKT_DISCARD;
+ if (!(efx->net_dev->features & NETIF_F_RXALL)) {
+ if (!efx->loopback_selftest)
+ channel->n_rx_eth_crc_err += n_packets;
+ return EFX_RX_PKT_DISCARD;
+ }
+ handled = true;
}
if (EFX_QWORD_FIELD(*event, ESF_DZ_RX_IPCKSUM_ERR)) {
if (unlikely(rx_encap_hdr != ESE_EZ_ENCAP_HDR_VXLAN &&
return 0;
}
- WARN_ON(1); /* No error bits were recognised */
+ WARN_ON(!handled); /* No error bits were recognised */
return 0;
}
bool rx_cont;
u16 flags = 0;
- if (unlikely(ACCESS_ONCE(efx->reset_pending)))
+ if (unlikely(READ_ONCE(efx->reset_pending)))
return 0;
/* Basic packet information */
unsigned int tx_ev_q_label;
int tx_descs = 0;
- if (unlikely(ACCESS_ONCE(efx->reset_pending)))
+ if (unlikely(READ_ONCE(efx->reset_pending)))
return 0;
if (unlikely(EFX_QWORD_FIELD(*event, ESF_DZ_TX_DROP_EVENT)))
int i;
for (i = 0; i < HUNT_FILTER_TBL_ROWS; i++) {
- if (ACCESS_ONCE(table->entry[i].spec) &
+ if (READ_ONCE(table->entry[i].spec) &
EFX_EF10_FILTER_FLAG_AUTO_OLD) {
rc = efx_ef10_filter_remove_internal(efx,
1U << EFX_FILTER_PRI_AUTO, i, true);
* MCFW do not support VFs.
*/
rc = efx_ef10_vport_set_mac_address(efx);
- } else {
+ } else if (rc) {
efx_mcdi_display_error(efx, MC_CMD_VADAPTOR_SET_MAC,
sizeof(inbuf), NULL, 0, rc);
}
rx_queue = &channel->rx_queue;
rx_queue->efx = efx;
- setup_timer(&rx_queue->slow_fill, efx_rx_slow_fill,
- (unsigned long)rx_queue);
+ timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0);
return channel;
}
rx_queue = &channel->rx_queue;
rx_queue->buffer = NULL;
memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd));
- setup_timer(&rx_queue->slow_fill, efx_rx_slow_fill,
- (unsigned long)rx_queue);
+ timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0);
return channel;
}
return rc;
}
- /* If Rx VLAN filter is changed, update filters via mac_reconfigure */
- if ((net_dev->features ^ data) & NETIF_F_HW_VLAN_CTAG_FILTER) {
+ /* If Rx VLAN filter is changed, update filters via mac_reconfigure.
+ * If rx-fcs is changed, mac_reconfigure updates that too.
+ */
+ if ((net_dev->features ^ data) & (NETIF_F_HW_VLAN_CTAG_FILTER |
+ NETIF_F_RXFCS)) {
/* efx_set_rx_mode() will schedule MAC work to update filters
* when a new features are finally set in net_dev.
*/
unsigned long pending;
enum reset_type method;
- pending = ACCESS_ONCE(efx->reset_pending);
+ pending = READ_ONCE(efx->reset_pending);
method = fls(pending) - 1;
if (method == RESET_TYPE_MC_BIST)
/* If we're not READY then just leave the flags set as the cue
* to abort probing or reschedule the reset later.
*/
- if (ACCESS_ONCE(efx->state) != STATE_READY)
+ if (READ_ONCE(efx->state) != STATE_READY)
return;
/* efx_process_channel() will no longer read events once a
/* Determine netdevice features */
net_dev->features |= (efx->type->offload_features | NETIF_F_SG |
- NETIF_F_TSO | NETIF_F_RXCSUM);
+ NETIF_F_TSO | NETIF_F_RXCSUM | NETIF_F_RXALL);
if (efx->type->offload_features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
net_dev->features |= NETIF_F_TSO6;
/* Check whether device supports TSO */
NETIF_F_HIGHDMA | NETIF_F_ALL_TSO |
NETIF_F_RXCSUM);
- net_dev->hw_features = net_dev->features & ~efx->fixed_features;
+ net_dev->hw_features |= net_dev->features & ~efx->fixed_features;
+
+ /* Disable receiving frames with bad FCS, by default. */
+ net_dev->features &= ~NETIF_F_RXALL;
/* Disable VLAN filtering by default. It may be enforced if
* the feature is fixed (i.e. VLAN filters are required to
rx_queue = &channel->rx_queue;
rx_queue->efx = efx;
- setup_timer(&rx_queue->slow_fill, ef4_rx_slow_fill,
- (unsigned long)rx_queue);
+ timer_setup(&rx_queue->slow_fill, ef4_rx_slow_fill, 0);
return channel;
}
rx_queue = &channel->rx_queue;
rx_queue->buffer = NULL;
memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd));
- setup_timer(&rx_queue->slow_fill, ef4_rx_slow_fill,
- (unsigned long)rx_queue);
+ timer_setup(&rx_queue->slow_fill, ef4_rx_slow_fill, 0);
return channel;
}
unsigned long pending;
enum reset_type method;
- pending = ACCESS_ONCE(efx->reset_pending);
+ pending = READ_ONCE(efx->reset_pending);
method = fls(pending) - 1;
if ((method == RESET_TYPE_RECOVER_OR_DISABLE ||
/* If we're not READY then just leave the flags set as the cue
* to abort probing or reschedule the reset later.
*/
- if (ACCESS_ONCE(efx->state) != STATE_READY)
+ if (READ_ONCE(efx->state) != STATE_READY)
return;
queue_work(reset_workqueue, &efx->reset_work);
"IRQ %d on CPU %d status " EF4_OWORD_FMT "\n",
irq, raw_smp_processor_id(), EF4_OWORD_VAL(*int_ker));
- if (!likely(ACCESS_ONCE(efx->irq_soft_enabled)))
+ if (!likely(READ_ONCE(efx->irq_soft_enabled)))
return IRQ_HANDLED;
/* Check to see if we have a serious error condition */
ef4_oword_t reg;
int link_speed, isolate;
- isolate = !!ACCESS_ONCE(efx->reset_pending);
+ isolate = !!READ_ONCE(efx->reset_pending);
switch (link_state->speed) {
case 10000: link_speed = 3; break;
}
}
- static void falcon_stats_timer_func(unsigned long context)
+ static void falcon_stats_timer_func(struct timer_list *t)
{
- struct ef4_nic *efx = (struct ef4_nic *)context;
- struct falcon_nic_data *nic_data = efx->nic_data;
+ struct falcon_nic_data *nic_data = from_timer(nic_data, t,
+ stats_timer);
+ struct ef4_nic *efx = nic_data->efx;
spin_lock(&efx->stats_lock);
if (!nic_data)
return -ENOMEM;
efx->nic_data = nic_data;
+ nic_data->efx = efx;
rc = -ENODEV;
}
nic_data->stats_disable_count = 1;
- setup_timer(&nic_data->stats_timer, &falcon_stats_timer_func,
- (unsigned long)efx);
+ timer_setup(&nic_data->stats_timer, falcon_stats_timer_func, 0);
return 0;
static inline bool __ef4_nic_tx_is_empty(struct ef4_tx_queue *tx_queue,
unsigned int write_count)
{
- unsigned int empty_read_count = ACCESS_ONCE(tx_queue->empty_read_count);
+ unsigned int empty_read_count = READ_ONCE(tx_queue->empty_read_count);
if (empty_read_count == 0)
return false;
/**
* struct falcon_nic_data - Falcon NIC state
* @pci_dev2: Secondary function of Falcon A
+ * @efx: ef4_nic pointer
* @board: Board state and functions
* @stats: Hardware statistics
* @stats_disable_count: Nest count for disabling statistics fetches
*/
struct falcon_nic_data {
struct pci_dev *pci_dev2;
+ struct ef4_nic *efx;
struct falcon_board board;
u64 stats[FALCON_STAT_COUNT];
unsigned int stats_disable_count;
static inline int ef4_nic_event_test_irq_cpu(struct ef4_channel *channel)
{
- return ACCESS_ONCE(channel->event_test_cpu);
+ return READ_ONCE(channel->event_test_cpu);
}
static inline int ef4_nic_irq_test_irq_cpu(struct ef4_nic *efx)
{
- return ACCESS_ONCE(efx->last_irq_cpu);
+ return READ_ONCE(efx->last_irq_cpu);
}
/* Global Resources */
*/
netif_tx_stop_queue(txq1->core_txq);
smp_mb();
- txq1->old_read_count = ACCESS_ONCE(txq1->read_count);
- txq2->old_read_count = ACCESS_ONCE(txq2->read_count);
+ txq1->old_read_count = READ_ONCE(txq1->read_count);
+ txq2->old_read_count = READ_ONCE(txq2->read_count);
fill_level = max(txq1->insert_count - txq1->old_read_count,
txq2->insert_count - txq2->old_read_count);
unsigned tc, num_tc;
int rc;
- if (type != TC_SETUP_MQPRIO)
+ if (type != TC_SETUP_QDISC_MQPRIO)
return -EOPNOTSUPP;
num_tc = mqprio->num_tc;
/* Check whether the hardware queue is now empty */
if ((int)(tx_queue->read_count - tx_queue->old_write_count) >= 0) {
- tx_queue->old_write_count = ACCESS_ONCE(tx_queue->write_count);
+ tx_queue->old_write_count = READ_ONCE(tx_queue->write_count);
if (tx_queue->read_count == tx_queue->old_write_count) {
smp_mb();
tx_queue->empty_read_count =
struct efx_nic *efx = channel->efx;
int tx_packets = 0;
- if (unlikely(ACCESS_ONCE(efx->reset_pending)))
+ if (unlikely(READ_ONCE(efx->reset_pending)))
return 0;
if (likely(EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_COMP))) {
}
#endif
+ if (efx->net_dev->features & NETIF_F_RXALL)
+ /* don't discard frame for CRC error */
+ rx_ev_eth_crc_err = false;
+
/* The frame must be discarded if any of these are true. */
return (rx_ev_eth_crc_err | rx_ev_frm_trunc |
rx_ev_tobe_disc | rx_ev_pause_frm) ?
struct efx_rx_queue *rx_queue;
struct efx_nic *efx = channel->efx;
- if (unlikely(ACCESS_ONCE(efx->reset_pending)))
+ if (unlikely(READ_ONCE(efx->reset_pending)))
return;
rx_ev_cont = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_JUMBO_CONT);
irqreturn_t efx_farch_legacy_interrupt(int irq, void *dev_id)
{
struct efx_nic *efx = dev_id;
- bool soft_enabled = ACCESS_ONCE(efx->irq_soft_enabled);
+ bool soft_enabled = READ_ONCE(efx->irq_soft_enabled);
efx_oword_t *int_ker = efx->irq_status.addr;
irqreturn_t result = IRQ_NONE;
struct efx_channel *channel;
"IRQ %d on CPU %d status " EFX_OWORD_FMT "\n",
irq, raw_smp_processor_id(), EFX_OWORD_VAL(*int_ker));
- if (!likely(ACCESS_ONCE(efx->irq_soft_enabled)))
+ if (!likely(READ_ONCE(efx->irq_soft_enabled)))
return IRQ_HANDLED;
/* Handle non-event-queue sources */
struct pps_event_time now;
struct timespec64 limit;
struct efx_ptp_data *ptp = efx->ptp_data;
- struct timespec64 start;
int *mc_running = ptp->start.addr;
pps_get_ts(&now);
- start = now.ts_real;
limit = now.ts_real;
timespec64_add_ns(&limit, SYNCHRONISE_PERIOD_NS);
/* Write host time for specified period or until MC is done */
while ((timespec64_compare(&now.ts_real, &limit) < 0) &&
- ACCESS_ONCE(*mc_running)) {
+ READ_ONCE(*mc_running)) {
struct timespec64 update_time;
unsigned int host_time;
do {
pps_get_ts(&now);
} while ((timespec64_compare(&now.ts_real, &update_time) < 0) &&
- ACCESS_ONCE(*mc_running));
+ READ_ONCE(*mc_running));
/* Synchronise NIC with single word of time only */
host_time = (now.ts_real.tv_sec << MC_NANOSECOND_BITS |
ptp->start.dma_addr);
/* Clear flag that signals MC ready */
- ACCESS_ONCE(*start) = 0;
+ WRITE_ONCE(*start, 0);
rc = efx_mcdi_rpc_start(efx, MC_CMD_PTP, synch_buf,
MC_CMD_PTP_IN_SYNCHRONIZE_LEN);
EFX_WARN_ON_ONCE_PARANOID(rc);
/* Wait for start from MCDI (or timeout) */
timeout = jiffies + msecs_to_jiffies(MAX_SYNCHRONISE_WAIT_MS);
- while (!ACCESS_ONCE(*start) && (time_before(jiffies, timeout))) {
+ while (!READ_ONCE(*start) && (time_before(jiffies, timeout))) {
udelay(20); /* Usually start MCDI execution quickly */
loops++;
}
if (!time_before(jiffies, timeout))
++ptp->sync_timeouts;
- if (ACCESS_ONCE(*start))
+ if (READ_ONCE(*start))
efx_ptp_send_times(efx, &last_time);
/* Collect results */
*/
netif_tx_stop_queue(txq1->core_txq);
smp_mb();
- txq1->old_read_count = ACCESS_ONCE(txq1->read_count);
- txq2->old_read_count = ACCESS_ONCE(txq2->read_count);
+ txq1->old_read_count = READ_ONCE(txq1->read_count);
+ txq2->old_read_count = READ_ONCE(txq2->read_count);
fill_level = max(txq1->insert_count - txq1->old_read_count,
txq2->insert_count - txq2->old_read_count);
unsigned tc, num_tc;
int rc;
- if (type != TC_SETUP_MQPRIO)
+ if (type != TC_SETUP_QDISC_MQPRIO)
return -EOPNOTSUPP;
num_tc = mqprio->num_tc;
/* Check whether the hardware queue is now empty */
if ((int)(tx_queue->read_count - tx_queue->old_write_count) >= 0) {
- tx_queue->old_write_count = ACCESS_ONCE(tx_queue->write_count);
+ tx_queue->old_write_count = READ_ONCE(tx_queue->write_count);
if (tx_queue->read_count == tx_queue->old_write_count) {
smp_mb();
tx_queue->empty_read_count =
return err;
}
- static void niu_timer(unsigned long __opaque)
+ static void niu_timer(struct timer_list *t)
{
- struct niu *np = (struct niu *) __opaque;
+ struct niu *np = from_timer(np, t, timer);
unsigned long off;
int err, link_up;
err = niu_init_hw(np);
if (!err) {
- init_timer(&np->timer);
+ timer_setup(&np->timer, niu_timer, 0);
np->timer.expires = jiffies + HZ;
- np->timer.data = (unsigned long) np;
- np->timer.function = niu_timer;
err = niu_enable_interrupts(np, 1);
if (err)
pkts = dropped = errors = bytes = 0;
- rx_rings = ACCESS_ONCE(np->rx_rings);
+ rx_rings = READ_ONCE(np->rx_rings);
if (!rx_rings)
goto no_rings;
pkts = errors = bytes = 0;
- tx_rings = ACCESS_ONCE(np->tx_rings);
+ tx_rings = READ_ONCE(np->tx_rings);
if (!tx_rings)
goto no_rings;
err = niu_init_hw(np);
if (!err) {
- init_timer(&np->timer);
+ timer_setup(&np->timer, niu_timer, 0);
np->timer.expires = jiffies + HZ;
- np->timer.data = (unsigned long) np;
- np->timer.function = niu_timer;
err = niu_enable_interrupts(np, 1);
if (err)
static struct yam_mcs *yam_data;
-static DEFINE_TIMER(yam_timer, NULL, 0, 0);
+static DEFINE_TIMER(yam_timer, NULL);
/* --------------------------------------------------------------------- */
yam_start_tx(dev, yp);
}
- static void yam_dotimer(unsigned long dummy)
+ static void yam_dotimer(struct timer_list *unused)
{
int i;
}
- yam_timer.function = yam_dotimer;
+ timer_setup(&yam_timer, yam_dotimer, 0);
yam_timer.expires = jiffies + HZ / 100;
add_timer(&yam_timer);
#include <linux/skb_array.h>
#include <linux/bpf.h>
#include <linux/bpf_trace.h>
+ #include <linux/mutex.h>
#include <linux/uaccess.h>
#define TUN_VNET_BE 0x40000000
#define TUN_FEATURES (IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR | \
- IFF_MULTI_QUEUE)
+ IFF_MULTI_QUEUE | IFF_NAPI | IFF_NAPI_FRAGS)
+
#define GOODCOPY_LEN 128
#define FLT_EXACT_COUNT 8
u16 queue_index;
unsigned int ifindex;
};
+ struct napi_struct napi;
+ bool napi_enabled;
+ struct mutex napi_mutex; /* Protects access to the above napi */
struct list_head next;
struct tun_struct *detached;
struct skb_array tx_array;
struct bpf_prog __rcu *xdp_prog;
};
+ static int tun_napi_receive(struct napi_struct *napi, int budget)
+ {
+ struct tun_file *tfile = container_of(napi, struct tun_file, napi);
+ struct sk_buff_head *queue = &tfile->sk.sk_write_queue;
+ struct sk_buff_head process_queue;
+ struct sk_buff *skb;
+ int received = 0;
+
+ __skb_queue_head_init(&process_queue);
+
+ spin_lock(&queue->lock);
+ skb_queue_splice_tail_init(queue, &process_queue);
+ spin_unlock(&queue->lock);
+
+ while (received < budget && (skb = __skb_dequeue(&process_queue))) {
+ napi_gro_receive(napi, skb);
+ ++received;
+ }
+
+ if (!skb_queue_empty(&process_queue)) {
+ spin_lock(&queue->lock);
+ skb_queue_splice(&process_queue, queue);
+ spin_unlock(&queue->lock);
+ }
+
+ return received;
+ }
+
+ static int tun_napi_poll(struct napi_struct *napi, int budget)
+ {
+ unsigned int received;
+
+ received = tun_napi_receive(napi, budget);
+
+ if (received < budget)
+ napi_complete_done(napi, received);
+
+ return received;
+ }
+
+ static void tun_napi_init(struct tun_struct *tun, struct tun_file *tfile,
+ bool napi_en)
+ {
+ tfile->napi_enabled = napi_en;
+ if (napi_en) {
+ netif_napi_add(tun->dev, &tfile->napi, tun_napi_poll,
+ NAPI_POLL_WEIGHT);
+ napi_enable(&tfile->napi);
+ mutex_init(&tfile->napi_mutex);
+ }
+ }
+
+ static void tun_napi_disable(struct tun_struct *tun, struct tun_file *tfile)
+ {
+ if (tfile->napi_enabled)
+ napi_disable(&tfile->napi);
+ }
+
+ static void tun_napi_del(struct tun_struct *tun, struct tun_file *tfile)
+ {
+ if (tfile->napi_enabled)
+ netif_napi_del(&tfile->napi);
+ }
+
+ static bool tun_napi_frags_enabled(const struct tun_struct *tun)
+ {
+ return READ_ONCE(tun->flags) & IFF_NAPI_FRAGS;
+ }
+
#ifdef CONFIG_TUN_VNET_CROSS_LE
static inline bool tun_legacy_is_little_endian(struct tun_struct *tun)
{
tun_debug(KERN_INFO, tun, "tun_flow_cleanup\n");
- spin_lock_bh(&tun->lock);
+ spin_lock(&tun->lock);
for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) {
struct tun_flow_entry *e;
struct hlist_node *n;
hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) {
unsigned long this_timer;
- count++;
+
this_timer = e->updated + delay;
- if (time_before_eq(this_timer, jiffies))
+ if (time_before_eq(this_timer, jiffies)) {
tun_flow_delete(tun, e);
- else if (time_before(this_timer, next_timer))
+ continue;
+ }
+ count++;
+ if (time_before(this_timer, next_timer))
next_timer = this_timer;
}
}
if (count)
mod_timer(&tun->flow_gc_timer, round_jiffies_up(next_timer));
- spin_unlock_bh(&tun->lock);
+ spin_unlock(&tun->lock);
}
static void tun_flow_update(struct tun_struct *tun, u32 rxhash,
u32 numqueues = 0;
rcu_read_lock();
- numqueues = ACCESS_ONCE(tun->numqueues);
+ numqueues = READ_ONCE(tun->numqueues);
txq = __skb_get_hash_symmetric(skb);
if (txq) {
tun = rtnl_dereference(tfile->tun);
+ if (tun && clean) {
+ tun_napi_disable(tun, tfile);
+ tun_napi_del(tun, tfile);
+ }
+
if (tun && !tfile->detached) {
u16 index = tfile->queue_index;
BUG_ON(index >= tun->numqueues);
for (i = 0; i < n; i++) {
tfile = rtnl_dereference(tun->tfiles[i]);
BUG_ON(!tfile);
+ tun_napi_disable(tun, tfile);
tfile->socket.sk->sk_shutdown = RCV_SHUTDOWN;
tfile->socket.sk->sk_data_ready(tfile->socket.sk);
RCU_INIT_POINTER(tfile->tun, NULL);
synchronize_net();
for (i = 0; i < n; i++) {
tfile = rtnl_dereference(tun->tfiles[i]);
+ tun_napi_del(tun, tfile);
/* Drop read queue */
tun_queue_purge(tfile);
sock_put(&tfile->sk);
module_put(THIS_MODULE);
}
- static int tun_attach(struct tun_struct *tun, struct file *file, bool skip_filter)
+ static int tun_attach(struct tun_struct *tun, struct file *file,
+ bool skip_filter, bool napi)
{
struct tun_file *tfile = file->private_data;
struct net_device *dev = tun->dev;
rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile);
tun->numqueues++;
- if (tfile->detached)
+ if (tfile->detached) {
tun_enable_queue(tfile);
- else
+ } else {
sock_hold(&tfile->sk);
+ tun_napi_init(tun, tfile, napi);
+ }
tun_set_real_num_queues(tun);
return err;
}
- static struct tun_struct *__tun_get(struct tun_file *tfile)
+ static struct tun_struct *tun_get(struct tun_file *tfile)
{
struct tun_struct *tun;
return tun;
}
- static struct tun_struct *tun_get(struct file *file)
- {
- return __tun_get(file->private_data);
- }
-
static void tun_put(struct tun_struct *tun)
{
dev_put(tun->dev);
rcu_read_lock();
tfile = rcu_dereference(tun->tfiles[txq]);
- numqueues = ACCESS_ONCE(tun->numqueues);
+ numqueues = READ_ONCE(tun->numqueues);
/* Drop packet if interface is not attached */
if (txq >= numqueues)
* Tun only receives frames when:
* 1) the char device endpoint gets data from user space
* 2) the tun socket gets a sendmsg call from user space
- * Since both of those are synchronous operations, we are guaranteed
- * never to have pending data when we poll for it
- * so there is nothing to do here but return.
+ * If NAPI is not enabled, since both of those are synchronous
+ * operations, we are guaranteed never to have pending data when we poll
+ * for it so there is nothing to do here but return.
* We need this though so netpoll recognizes us as an interface that
* supports polling, which enables bridge devices in virt setups to
* still use netconsole
+ * If NAPI is enabled, however, we need to schedule polling for all
+ * queues unless we are using napi_gro_frags(), which we call in
+ * process context and not in NAPI context.
*/
+ struct tun_struct *tun = netdev_priv(dev);
+
+ if (tun->flags & IFF_NAPI) {
+ struct tun_file *tfile;
+ int i;
+
+ if (tun_napi_frags_enabled(tun))
+ return;
+
+ rcu_read_lock();
+ for (i = 0; i < tun->numqueues; i++) {
+ tfile = rcu_dereference(tun->tfiles[i]);
+ if (tfile->napi_enabled)
+ napi_schedule(&tfile->napi);
+ }
+ rcu_read_unlock();
+ }
return;
}
#endif
return 0;
}
- static int tun_xdp(struct net_device *dev, struct netdev_xdp *xdp)
+ static int tun_xdp(struct net_device *dev, struct netdev_bpf *xdp)
{
switch (xdp->command) {
case XDP_SETUP_PROG:
.ndo_features_check = passthru_features_check,
.ndo_set_rx_headroom = tun_set_headroom,
.ndo_get_stats64 = tun_net_get_stats64,
- .ndo_xdp = tun_xdp,
+ .ndo_bpf = tun_xdp,
};
static void tun_flow_init(struct tun_struct *tun)
tun->ageing_time = TUN_FLOW_EXPIRE;
setup_timer(&tun->flow_gc_timer, tun_flow_cleanup, (unsigned long)tun);
- mod_timer(&tun->flow_gc_timer,
- round_jiffies_up(jiffies + tun->ageing_time));
}
static void tun_flow_uninit(struct tun_struct *tun)
static unsigned int tun_chr_poll(struct file *file, poll_table *wait)
{
struct tun_file *tfile = file->private_data;
- struct tun_struct *tun = __tun_get(tfile);
+ struct tun_struct *tun = tun_get(tfile);
struct sock *sk;
unsigned int mask = 0;
return mask;
}
+ static struct sk_buff *tun_napi_alloc_frags(struct tun_file *tfile,
+ size_t len,
+ const struct iov_iter *it)
+ {
+ struct sk_buff *skb;
+ size_t linear;
+ int err;
+ int i;
+
+ if (it->nr_segs > MAX_SKB_FRAGS + 1)
+ return ERR_PTR(-ENOMEM);
+
+ local_bh_disable();
+ skb = napi_get_frags(&tfile->napi);
+ local_bh_enable();
+ if (!skb)
+ return ERR_PTR(-ENOMEM);
+
+ linear = iov_iter_single_seg_count(it);
+ err = __skb_grow(skb, linear);
+ if (err)
+ goto free;
+
+ skb->len = len;
+ skb->data_len = len - linear;
+ skb->truesize += skb->data_len;
+
+ for (i = 1; i < it->nr_segs; i++) {
+ size_t fragsz = it->iov[i].iov_len;
+ unsigned long offset;
+ struct page *page;
+ void *data;
+
+ if (fragsz == 0 || fragsz > PAGE_SIZE) {
+ err = -EINVAL;
+ goto free;
+ }
+
+ local_bh_disable();
+ data = napi_alloc_frag(fragsz);
+ local_bh_enable();
+ if (!data) {
+ err = -ENOMEM;
+ goto free;
+ }
+
+ page = virt_to_head_page(data);
+ offset = data - page_address(page);
+ skb_fill_page_desc(skb, i - 1, page, offset, fragsz);
+ }
+
+ return skb;
+ free:
+ /* frees skb and all frags allocated with napi_alloc_frag() */
+ napi_free_frags(&tfile->napi);
+ return ERR_PTR(err);
+ }
+
/* prepad is the amount to reserve at front. len is length after that.
* linear is a hint as to how much to copy (usually headers). */
static struct sk_buff *tun_alloc_skb(struct tun_file *tfile,
xdp.data_hard_start = buf;
xdp.data = buf + pad;
+ xdp_set_data_meta_invalid(&xdp);
xdp.data_end = xdp.data + len;
orig_data = xdp.data;
act = bpf_prog_run_xdp(xdp_prog, &xdp);
int err;
u32 rxhash;
int skb_xdp = 1;
+ bool frags = tun_napi_frags_enabled(tun);
if (!(tun->dev->flags & IFF_UP))
return -EIO;
zerocopy = true;
}
- if (tun_can_build_skb(tun, tfile, len, noblock, zerocopy)) {
+ if (!frags && tun_can_build_skb(tun, tfile, len, noblock, zerocopy)) {
/* For the packet that is not easy to be processed
* (e.g gso or jumbo packet), we will do it at after
* skb was created with generic XDP routine.
linear = tun16_to_cpu(tun, gso.hdr_len);
}
- skb = tun_alloc_skb(tfile, align, copylen, linear, noblock);
+ if (frags) {
+ mutex_lock(&tfile->napi_mutex);
+ skb = tun_napi_alloc_frags(tfile, copylen, from);
+ /* tun_napi_alloc_frags() enforces a layout for the skb.
+ * If zerocopy is enabled, then this layout will be
+ * overwritten by zerocopy_sg_from_iter().
+ */
+ zerocopy = false;
+ } else {
+ skb = tun_alloc_skb(tfile, align, copylen, linear,
+ noblock);
+ }
+
if (IS_ERR(skb)) {
if (PTR_ERR(skb) != -EAGAIN)
this_cpu_inc(tun->pcpu_stats->rx_dropped);
+ if (frags)
+ mutex_unlock(&tfile->napi_mutex);
return PTR_ERR(skb);
}
if (err) {
this_cpu_inc(tun->pcpu_stats->rx_dropped);
kfree_skb(skb);
+ if (frags) {
+ tfile->napi.skb = NULL;
+ mutex_unlock(&tfile->napi_mutex);
+ }
+
return -EFAULT;
}
}
if (virtio_net_hdr_to_skb(skb, &gso, tun_is_little_endian(tun))) {
this_cpu_inc(tun->pcpu_stats->rx_frame_errors);
kfree_skb(skb);
+ if (frags) {
+ tfile->napi.skb = NULL;
+ mutex_unlock(&tfile->napi_mutex);
+ }
+
return -EINVAL;
}
skb->dev = tun->dev;
break;
case IFF_TAP:
- skb->protocol = eth_type_trans(skb, tun->dev);
+ if (!frags)
+ skb->protocol = eth_type_trans(skb, tun->dev);
break;
}
}
rxhash = __skb_get_hash_symmetric(skb);
- #ifndef CONFIG_4KSTACKS
- tun_rx_batched(tun, tfile, skb, more);
- #else
- netif_rx_ni(skb);
- #endif
+
+ if (frags) {
+ /* Exercise flow dissector code path. */
+ u32 headlen = eth_get_headlen(skb->data, skb_headlen(skb));
+
+ if (unlikely(headlen > skb_headlen(skb))) {
+ this_cpu_inc(tun->pcpu_stats->rx_dropped);
+ napi_free_frags(&tfile->napi);
+ mutex_unlock(&tfile->napi_mutex);
+ WARN_ON(1);
+ return -ENOMEM;
+ }
+
+ local_bh_disable();
+ napi_gro_frags(&tfile->napi);
+ local_bh_enable();
+ mutex_unlock(&tfile->napi_mutex);
+ } else if (tfile->napi_enabled) {
+ struct sk_buff_head *queue = &tfile->sk.sk_write_queue;
+ int queue_len;
+
+ spin_lock_bh(&queue->lock);
+ __skb_queue_tail(queue, skb);
+ queue_len = skb_queue_len(queue);
+ spin_unlock(&queue->lock);
+
+ if (!more || queue_len > NAPI_POLL_WEIGHT)
+ napi_schedule(&tfile->napi);
+
+ local_bh_enable();
+ } else if (!IS_ENABLED(CONFIG_4KSTACKS)) {
+ tun_rx_batched(tun, tfile, skb, more);
+ } else {
+ netif_rx_ni(skb);
+ }
stats = get_cpu_ptr(tun->pcpu_stats);
u64_stats_update_begin(&stats->syncp);
static ssize_t tun_chr_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
- struct tun_struct *tun = tun_get(file);
struct tun_file *tfile = file->private_data;
+ struct tun_struct *tun = tun_get(tfile);
ssize_t result;
if (!tun)
{
struct file *file = iocb->ki_filp;
struct tun_file *tfile = file->private_data;
- struct tun_struct *tun = __tun_get(tfile);
+ struct tun_struct *tun = tun_get(tfile);
ssize_t len = iov_iter_count(to), ret;
if (!tun)
{
int ret;
struct tun_file *tfile = container_of(sock, struct tun_file, socket);
- struct tun_struct *tun = __tun_get(tfile);
+ struct tun_struct *tun = tun_get(tfile);
if (!tun)
return -EBADFD;
int flags)
{
struct tun_file *tfile = container_of(sock, struct tun_file, socket);
- struct tun_struct *tun = __tun_get(tfile);
+ struct tun_struct *tun = tun_get(tfile);
int ret;
if (!tun)
struct tun_struct *tun;
int ret = 0;
- tun = __tun_get(tfile);
+ tun = tun_get(tfile);
if (!tun)
return 0;
if (tfile->detached)
return -EINVAL;
+ if ((ifr->ifr_flags & IFF_NAPI_FRAGS)) {
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ if (!(ifr->ifr_flags & IFF_NAPI) ||
+ (ifr->ifr_flags & TUN_TYPE_MASK) != IFF_TAP)
+ return -EINVAL;
+ }
+
dev = __dev_get_by_name(net, ifr->ifr_name);
if (dev) {
if (ifr->ifr_flags & IFF_TUN_EXCL)
if (err < 0)
return err;
- err = tun_attach(tun, file, ifr->ifr_flags & IFF_NOFILTER);
+ err = tun_attach(tun, file, ifr->ifr_flags & IFF_NOFILTER,
+ ifr->ifr_flags & IFF_NAPI);
if (err < 0)
return err;
NETIF_F_HW_VLAN_STAG_TX);
INIT_LIST_HEAD(&tun->disabled);
- err = tun_attach(tun, file, false);
+ err = tun_attach(tun, file, false, ifr->ifr_flags & IFF_NAPI);
if (err < 0)
goto err_free_flow;
ret = security_tun_dev_attach_queue(tun->security);
if (ret < 0)
goto unlock;
- ret = tun_attach(tun, file, false);
+ ret = tun_attach(tun, file, false, tun->flags & IFF_NAPI);
} else if (ifr->ifr_flags & IFF_DETACH_QUEUE) {
tun = rtnl_dereference(tfile->tun);
if (!tun || !(tun->flags & IFF_MULTI_QUEUE) || tfile->detached)
ret = 0;
rtnl_lock();
- tun = __tun_get(tfile);
+ tun = tun_get(tfile);
if (cmd == TUNSETIFF) {
ret = -EEXIST;
if (tun)
}
#ifdef CONFIG_PROC_FS
- static void tun_chr_show_fdinfo(struct seq_file *m, struct file *f)
+ static void tun_chr_show_fdinfo(struct seq_file *m, struct file *file)
{
+ struct tun_file *tfile = file->private_data;
struct tun_struct *tun;
struct ifreq ifr;
memset(&ifr, 0, sizeof(ifr));
rtnl_lock();
- tun = tun_get(f);
+ tun = tun_get(tfile);
if (tun)
tun_get_iff(current->nsproxy->net_ns, tun, &ifr);
rtnl_unlock();
static int neigh_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
- struct nd_msg *msg;
- const struct ipv6hdr *iphdr;
const struct in6_addr *daddr;
- struct neighbour *n;
+ const struct ipv6hdr *iphdr;
struct inet6_dev *in6_dev;
+ struct neighbour *n;
+ struct nd_msg *msg;
in6_dev = __in6_dev_get(dev);
if (!in6_dev)
goto out;
- if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + sizeof(struct nd_msg)))
- goto out;
-
iphdr = ipv6_hdr(skb);
daddr = &iphdr->daddr;
-
msg = (struct nd_msg *)(iphdr + 1);
- if (msg->icmph.icmp6_code != 0 ||
- msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION)
- goto out;
if (ipv6_addr_loopback(daddr) ||
ipv6_addr_is_multicast(&msg->target))
static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
+ struct vxlan_rdst *rdst, *fdst = NULL;
const struct ip_tunnel_info *info;
- struct ethhdr *eth;
bool did_rsc = false;
- struct vxlan_rdst *rdst, *fdst = NULL;
struct vxlan_fdb *f;
+ struct ethhdr *eth;
__be32 vni = 0;
info = skb_tunnel_info(skb);
if (ntohs(eth->h_proto) == ETH_P_ARP)
return arp_reduce(dev, skb, vni);
#if IS_ENABLED(CONFIG_IPV6)
- else if (ntohs(eth->h_proto) == ETH_P_IPV6) {
- struct ipv6hdr *hdr, _hdr;
- if ((hdr = skb_header_pointer(skb,
- skb_network_offset(skb),
- sizeof(_hdr), &_hdr)) &&
- hdr->nexthdr == IPPROTO_ICMPV6)
+ else if (ntohs(eth->h_proto) == ETH_P_IPV6 &&
+ pskb_may_pull(skb, sizeof(struct ipv6hdr) +
+ sizeof(struct nd_msg)) &&
+ ipv6_hdr(skb)->nexthdr == IPPROTO_ICMPV6) {
+ struct nd_msg *m = (struct nd_msg *)(ipv6_hdr(skb) + 1);
+
+ if (m->icmph.icmp6_code == 0 &&
+ m->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION)
return neigh_reduce(dev, skb, vni);
}
#endif
}
/* Walk the forwarding table and purge stale entries */
-static void vxlan_cleanup(unsigned long arg)
+static void vxlan_cleanup(struct timer_list *t)
{
- struct vxlan_dev *vxlan = (struct vxlan_dev *) arg;
+ struct vxlan_dev *vxlan = from_timer(vxlan, t, age_timer);
unsigned long next_timer = jiffies + FDB_AGE_INTERVAL;
unsigned int h;
INIT_LIST_HEAD(&vxlan->next);
spin_lock_init(&vxlan->hash_lock);
- init_timer_deferrable(&vxlan->age_timer);
- vxlan->age_timer.function = vxlan_cleanup;
- vxlan->age_timer.data = (unsigned long) vxlan;
+ timer_setup(&vxlan->age_timer, vxlan_cleanup, TIMER_DEFERRABLE);
vxlan->dev = dev;
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
struct vxlan_dev *vxlan, *next;
struct net_device *dev, *aux;
+ unsigned int h;
LIST_HEAD(list);
rtnl_lock();
unregister_netdevice_many(&list);
rtnl_unlock();
+
+ for (h = 0; h < PORT_HASH_SIZE; ++h)
+ WARN_ON_ONCE(!hlist_empty(&vn->sock_list[h]));
}
static struct pernet_operations vxlan_net_ops = {
#define I_HMB_HOST_INT I_HMB_SW3 /* Miscellaneous Interrupt */
/* tohostmailboxdata */
- #define HMB_DATA_NAKHANDLED 1 /* retransmit NAK'd frame */
- #define HMB_DATA_DEVREADY 2 /* talk to host after enable */
- #define HMB_DATA_FC 4 /* per prio flowcontrol update flag */
- #define HMB_DATA_FWREADY 8 /* fw ready for protocol activity */
+ #define HMB_DATA_NAKHANDLED 0x0001 /* retransmit NAK'd frame */
+ #define HMB_DATA_DEVREADY 0x0002 /* talk to host after enable */
+ #define HMB_DATA_FC 0x0004 /* per prio flowcontrol update flag */
+ #define HMB_DATA_FWREADY 0x0008 /* fw ready for protocol activity */
+ #define HMB_DATA_FWHALT 0x0010 /* firmware halted */
#define HMB_DATA_FCDATA_MASK 0xff000000
#define HMB_DATA_FCDATA_SHIFT 24
offsetof(struct sdpcmd_regs, tosbmailbox));
bus->sdcnt.f1regdata += 2;
+ /* dongle indicates the firmware has halted/crashed */
+ if (hmb_data & HMB_DATA_FWHALT)
+ brcmf_err("mailbox indicates firmware halted\n");
+
/* Dongle recomposed rx frames, accept them again */
if (hmb_data & HMB_DATA_NAKHANDLED) {
brcmf_dbg(SDIO, "Dongle reports NAK handled, expect rtx of %d\n",
HMB_DATA_NAKHANDLED |
HMB_DATA_FC |
HMB_DATA_FWREADY |
+ HMB_DATA_FWHALT |
HMB_DATA_FCDATA_MASK | HMB_DATA_VERSION_MASK))
brcmf_err("Unknown mailbox data content: 0x%02x\n",
hmb_data);
bus->dpc_running = true;
wmb();
- while (ACCESS_ONCE(bus->dpc_triggered)) {
+ while (READ_ONCE(bus->dpc_triggered)) {
bus->dpc_triggered = false;
brcmf_sdio_dpc(bus);
bus->idlecount = 0;
}
}
+ static int brcmf_sdio_get_fwname(struct device *dev, u32 chip, u32 chiprev,
+ u8 *fw_name)
+ {
+ struct brcmf_bus *bus_if = dev_get_drvdata(dev);
+ struct brcmf_sdio_dev *sdiodev = bus_if->bus_priv.sdio;
+ int ret = 0;
+
+ if (sdiodev->fw_name[0] != '\0')
+ strlcpy(fw_name, sdiodev->fw_name, BRCMF_FW_NAME_LEN);
+ else
+ ret = brcmf_fw_map_chip_to_name(chip, chiprev,
+ brcmf_sdio_fwnames,
+ ARRAY_SIZE(brcmf_sdio_fwnames),
+ fw_name, NULL);
+
+ return ret;
+ }
+
static const struct brcmf_bus_ops brcmf_sdio_bus_ops = {
.stop = brcmf_sdio_bus_stop,
.preinit = brcmf_sdio_bus_preinit,
.wowl_config = brcmf_sdio_wowl_config,
.get_ramsize = brcmf_sdio_bus_get_ramsize,
.get_memdump = brcmf_sdio_bus_get_memdump,
+ .get_fwname = brcmf_sdio_get_fwname,
};
static void brcmf_sdio_firmware_callback(struct device *dev, int err,
init_waitqueue_head(&bus->dcmd_resp_wait);
/* Set up the watchdog timer */
- init_timer(&bus->timer);
- bus->timer.data = (unsigned long)bus;
- bus->timer.function = brcmf_sdio_watchdog;
-
+ setup_timer(&bus->timer, brcmf_sdio_watchdog,
+ (unsigned long)bus);
/* Initialize watchdog thread */
init_completion(&bus->watchdog_wait);
bus->watchdog_tsk = kthread_run(brcmf_sdio_watchdog_thread,
#include "time-event.h"
#include "fw-api.h"
#include "fw/api/scan.h"
+ #include "fw/acpi.h"
#define DRV_DESCRIPTION "The new Intel(R) wireless AGN driver for Linux"
MODULE_DESCRIPTION(DRV_DESCRIPTION);
* Access is done through binary search
*/
static const struct iwl_hcmd_names iwl_mvm_mac_conf_names[] = {
- HCMD_NAME(LINK_QUALITY_MEASUREMENT_CMD),
- HCMD_NAME(LINK_QUALITY_MEASUREMENT_COMPLETE_NOTIF),
HCMD_NAME(CHANNEL_SWITCH_NOA_NOTIF),
};
static void iwl_mvm_async_handlers_wk(struct work_struct *wk);
static void iwl_mvm_d0i3_exit_work(struct work_struct *wk);
- static u32 calc_min_backoff(struct iwl_trans *trans, const struct iwl_cfg *cfg)
+ static u32 iwl_mvm_min_backoff(struct iwl_mvm *mvm)
{
- const struct iwl_pwr_tx_backoff *pwr_tx_backoff = cfg->pwr_tx_backoffs;
+ const struct iwl_pwr_tx_backoff *backoff = mvm->cfg->pwr_tx_backoffs;
+ u64 dflt_pwr_limit;
- if (!pwr_tx_backoff)
+ if (!backoff)
return 0;
- while (pwr_tx_backoff->pwr) {
- if (trans->dflt_pwr_limit >= pwr_tx_backoff->pwr)
- return pwr_tx_backoff->backoff;
+ dflt_pwr_limit = iwl_acpi_get_pwr_limit(mvm->dev);
- pwr_tx_backoff++;
+ while (backoff->pwr) {
+ if (dflt_pwr_limit >= backoff->pwr)
+ return backoff->backoff;
+
+ backoff++;
}
return 0;
trans_cfg.cb_data_offs = offsetof(struct ieee80211_tx_info,
driver_data[2]);
- trans_cfg.sdio_adma_addr = fw->sdio_adma_addr;
trans_cfg.sw_csum_tx = IWL_MVM_SW_TX_CSUM_OFFLOAD;
/* Set a short watchdog for the command queue */
goto out_free;
mvm->hw_registered = true;
- min_backoff = calc_min_backoff(trans, cfg);
+ min_backoff = iwl_mvm_min_backoff(mvm);
iwl_mvm_thermal_initialize(mvm, min_backoff);
err = iwl_mvm_dbgfs_register(mvm, dbgfs_dir);
static bool iwl_mvm_set_hw_rfkill_state(struct iwl_op_mode *op_mode, bool state)
{
struct iwl_mvm *mvm = IWL_OP_MODE_GET_MVM(op_mode);
- bool calibrating = ACCESS_ONCE(mvm->calibrating);
+ bool calibrating = READ_ONCE(mvm->calibrating);
if (state)
set_bit(IWL_MVM_STATUS_HW_RFKILL, &mvm->status);
return -1;
} else if (info.control.vif->type == NL80211_IFTYPE_STATION &&
is_multicast_ether_addr(hdr->addr1)) {
- u8 ap_sta_id = ACCESS_ONCE(mvmvif->ap_sta_id);
+ u8 ap_sta_id = READ_ONCE(mvmvif->ap_sta_id);
if (ap_sta_id != IWL_MVM_INVALID_STA)
sta_id = ap_sta_id;
snap_ip_tcp = 8 + skb_transport_header(skb) - skb_network_header(skb) +
tcp_hdrlen(skb);
- dbg_max_amsdu_len = ACCESS_ONCE(mvm->max_amsdu_len);
+ dbg_max_amsdu_len = READ_ONCE(mvm->max_amsdu_len);
if (!sta->max_amsdu_len ||
!ieee80211_is_data_qos(hdr->frame_control) ||
mvmsta->tid_data[tid].tx_time =
le16_to_cpu(tx_resp->wireless_media_time);
mvmsta->tid_data[tid].lq_color =
- (tx_resp->tlc_info & TX_RES_RATE_TABLE_COLOR_MSK) >>
- TX_RES_RATE_TABLE_COLOR_POS;
+ TX_RES_RATE_TABLE_COL_GET(tx_resp->tlc_info);
}
rcu_read_unlock();
if (iwl_mvm_has_new_tx_api(mvm)) {
struct iwl_mvm_compressed_ba_notif *ba_res =
(void *)pkt->data;
+ u8 lq_color = TX_RES_RATE_TABLE_COL_GET(ba_res->tlc_rate_info);
int i;
sta_id = ba_res->sta_id;
if (!le16_to_cpu(ba_res->tfd_cnt))
goto out;
+ rcu_read_lock();
+
+ mvmsta = iwl_mvm_sta_from_staid_rcu(mvm, sta_id);
+ if (!mvmsta)
+ goto out_unlock;
+
/* Free per TID */
for (i = 0; i < le16_to_cpu(ba_res->tfd_cnt); i++) {
struct iwl_mvm_compressed_ba_tfd *ba_tfd =
&ba_res->tfd[i];
+ mvmsta->tid_data[i].lq_color = lq_color;
iwl_mvm_tx_reclaim(mvm, sta_id, ba_tfd->tid,
(int)(le16_to_cpu(ba_tfd->q_num)),
le16_to_cpu(ba_tfd->tfd_index),
le32_to_cpu(ba_res->tx_rate));
}
+ out_unlock:
+ rcu_read_unlock();
out:
IWL_DEBUG_TX_REPLY(mvm,
"BA_NOTIFICATION Received from sta_id = %d, flags %x, sent:%d, acked:%d\n",
#define IWL_FW_MEM_EXTENDED_START 0x40000
#define IWL_FW_MEM_EXTENDED_END 0x57FFF
+ static void iwl_trans_pcie_dump_regs(struct iwl_trans *trans)
+ {
+ #define PCI_DUMP_SIZE 64
+ #define PREFIX_LEN 32
+ struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+ struct pci_dev *pdev = trans_pcie->pci_dev;
+ u32 i, pos, alloc_size, *ptr, *buf;
+ char *prefix;
+
+ if (trans_pcie->pcie_dbg_dumped_once)
+ return;
+
+ /* Should be a multiple of 4 */
+ BUILD_BUG_ON(PCI_DUMP_SIZE > 4096 || PCI_DUMP_SIZE & 0x3);
+ /* Alloc a max size buffer */
+ if (PCI_ERR_ROOT_ERR_SRC + 4 > PCI_DUMP_SIZE)
+ alloc_size = PCI_ERR_ROOT_ERR_SRC + 4 + PREFIX_LEN;
+ else
+ alloc_size = PCI_DUMP_SIZE + PREFIX_LEN;
+ buf = kmalloc(alloc_size, GFP_ATOMIC);
+ if (!buf)
+ return;
+ prefix = (char *)buf + alloc_size - PREFIX_LEN;
+
+ IWL_ERR(trans, "iwlwifi transaction failed, dumping registers\n");
+
+ /* Print wifi device registers */
+ sprintf(prefix, "iwlwifi %s: ", pci_name(pdev));
+ IWL_ERR(trans, "iwlwifi device config registers:\n");
+ for (i = 0, ptr = buf; i < PCI_DUMP_SIZE; i += 4, ptr++)
+ if (pci_read_config_dword(pdev, i, ptr))
+ goto err_read;
+ print_hex_dump(KERN_ERR, prefix, DUMP_PREFIX_OFFSET, 32, 4, buf, i, 0);
+
+ IWL_ERR(trans, "iwlwifi device memory mapped registers:\n");
+ for (i = 0, ptr = buf; i < PCI_DUMP_SIZE; i += 4, ptr++)
+ *ptr = iwl_read32(trans, i);
+ print_hex_dump(KERN_ERR, prefix, DUMP_PREFIX_OFFSET, 32, 4, buf, i, 0);
+
+ pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR);
+ if (pos) {
+ IWL_ERR(trans, "iwlwifi device AER capability structure:\n");
+ for (i = 0, ptr = buf; i < PCI_ERR_ROOT_COMMAND; i += 4, ptr++)
+ if (pci_read_config_dword(pdev, pos + i, ptr))
+ goto err_read;
+ print_hex_dump(KERN_ERR, prefix, DUMP_PREFIX_OFFSET,
+ 32, 4, buf, i, 0);
+ }
+
+ /* Print parent device registers next */
+ if (!pdev->bus->self)
+ goto out;
+
+ pdev = pdev->bus->self;
+ sprintf(prefix, "iwlwifi %s: ", pci_name(pdev));
+
+ IWL_ERR(trans, "iwlwifi parent port (%s) config registers:\n",
+ pci_name(pdev));
+ for (i = 0, ptr = buf; i < PCI_DUMP_SIZE; i += 4, ptr++)
+ if (pci_read_config_dword(pdev, i, ptr))
+ goto err_read;
+ print_hex_dump(KERN_ERR, prefix, DUMP_PREFIX_OFFSET, 32, 4, buf, i, 0);
+
+ /* Print root port AER registers */
+ pos = 0;
+ pdev = pcie_find_root_port(pdev);
+ if (pdev)
+ pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR);
+ if (pos) {
+ IWL_ERR(trans, "iwlwifi root port (%s) AER cap structure:\n",
+ pci_name(pdev));
+ sprintf(prefix, "iwlwifi %s: ", pci_name(pdev));
+ for (i = 0, ptr = buf; i <= PCI_ERR_ROOT_ERR_SRC; i += 4, ptr++)
+ if (pci_read_config_dword(pdev, pos + i, ptr))
+ goto err_read;
+ print_hex_dump(KERN_ERR, prefix, DUMP_PREFIX_OFFSET, 32,
+ 4, buf, i, 0);
+ }
+
+ err_read:
+ print_hex_dump(KERN_ERR, prefix, DUMP_PREFIX_OFFSET, 32, 4, buf, i, 0);
+ IWL_ERR(trans, "Read failed at 0x%X\n", i);
+ out:
+ trans_pcie->pcie_dbg_dumped_once = 1;
+ kfree(buf);
+ }
+
static void iwl_pcie_free_fw_monitor(struct iwl_trans *trans)
{
struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
trans_pcie->ucode_write_complete, 5 * HZ);
if (!ret) {
IWL_ERR(trans, "Failed to load firmware chunk!\n");
+ iwl_trans_pcie_dump_regs(trans);
return -ETIMEDOUT;
}
(CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY |
CSR_GP_CNTRL_REG_FLAG_GOING_TO_SLEEP), 15000);
if (unlikely(ret < 0)) {
+ iwl_trans_pcie_dump_regs(trans);
iwl_write32(trans, CSR_RESET, CSR_RESET_REG_FLAG_FORCE_NMI);
WARN_ONCE(1,
"Timeout waiting for hardware access (CSR_GP_CNTRL 0x%08x)\n",
IWL_DEBUG_TX_QUEUES(trans, "Emptying queue %d...\n", txq_idx);
txq = trans_pcie->txq[txq_idx];
- wr_ptr = ACCESS_ONCE(txq->write_ptr);
+ wr_ptr = READ_ONCE(txq->write_ptr);
- while (txq->read_ptr != ACCESS_ONCE(txq->write_ptr) &&
+ while (txq->read_ptr != READ_ONCE(txq->write_ptr) &&
!time_after(jiffies,
now + msecs_to_jiffies(IWL_FLUSH_WAIT_MS))) {
- u8 write_ptr = ACCESS_ONCE(txq->write_ptr);
+ u8 write_ptr = READ_ONCE(txq->write_ptr);
if (WARN_ONCE(wr_ptr != write_ptr,
"WR pointer moved while flushing %d -> %d\n",
spin_lock(&rxq->lock);
- r = le16_to_cpu(ACCESS_ONCE(rxq->rb_stts->closed_rb_num)) & 0x0FFF;
+ r = le16_to_cpu(READ_ONCE(rxq->rb_stts->closed_rb_num)) & 0x0FFF;
for (i = rxq->read, j = 0;
i != r && j < allocated_rb_nums;
/* Dump RBs is supported only for pre-9000 devices (1 queue) */
struct iwl_rxq *rxq = &trans_pcie->rxq[0];
/* RBs */
- num_rbs = le16_to_cpu(ACCESS_ONCE(rxq->rb_stts->closed_rb_num))
+ num_rbs = le16_to_cpu(READ_ONCE(rxq->rb_stts->closed_rb_num))
& 0x0FFF;
num_rbs = (num_rbs - rxq->read) & RX_QUEUE_MASK;
len += num_rbs * (sizeof(*data) +
.ref = iwl_trans_pcie_ref, \
.unref = iwl_trans_pcie_unref, \
.dump_data = iwl_trans_pcie_dump_data, \
+ .dump_regs = iwl_trans_pcie_dump_regs, \
.d3_suspend = iwl_trans_pcie_d3_suspend, \
.d3_resume = iwl_trans_pcie_d3_resume
if (!tb[QCA_WLAN_VENDOR_ATTR_TEST])
return -EINVAL;
val = nla_get_u32(tb[QCA_WLAN_VENDOR_ATTR_TEST]);
- wiphy_debug(wiphy, "%s: test=%u\n", __func__, val);
+ wiphy_dbg(wiphy, "%s: test=%u\n", __func__, val);
/* Send a vendor event as a test. Note that this would not normally be
* done within a command handler, but rather, based on some other
if (!vp->assoc)
return;
- wiphy_debug(data->hw->wiphy,
- "%s: send PS-Poll to %pM for aid %d\n",
- __func__, vp->bssid, vp->aid);
+ wiphy_dbg(data->hw->wiphy,
+ "%s: send PS-Poll to %pM for aid %d\n",
+ __func__, vp->bssid, vp->aid);
skb = dev_alloc_skb(sizeof(*pspoll));
if (!skb)
if (!vp->assoc)
return;
- wiphy_debug(data->hw->wiphy,
- "%s: send data::nullfunc to %pM ps=%d\n",
- __func__, vp->bssid, ps);
+ wiphy_dbg(data->hw->wiphy,
+ "%s: send data::nullfunc to %pM ps=%d\n",
+ __func__, vp->bssid, ps);
skb = dev_alloc_skb(sizeof(*hdr));
if (!skb)
msg_head = genlmsg_put(skb, 0, 0, &hwsim_genl_family, 0,
HWSIM_CMD_FRAME);
if (msg_head == NULL) {
- printk(KERN_DEBUG "mac80211_hwsim: problem with msg_head\n");
+ pr_debug("mac80211_hwsim: problem with msg_head\n");
goto nla_put_failure;
}
nla_put_failure:
nlmsg_free(skb);
err_free_txskb:
- printk(KERN_DEBUG "mac80211_hwsim: error occurred in %s\n", __func__);
+ pr_debug("mac80211_hwsim: error occurred in %s\n", __func__);
ieee80211_free_txskb(hw, my_skb);
data->tx_failed++;
}
}
if (data->idle && !data->tmp_chan) {
- wiphy_debug(hw->wiphy, "Trying to TX when idle - reject\n");
+ wiphy_dbg(hw->wiphy, "Trying to TX when idle - reject\n");
ieee80211_free_txskb(hw, skb);
return;
}
mac80211_hwsim_monitor_rx(hw, skb, channel);
/* wmediumd mode check */
- _portid = ACCESS_ONCE(data->wmediumd);
+ _portid = READ_ONCE(data->wmediumd);
if (_portid)
return mac80211_hwsim_tx_frame_nl(hw, skb, _portid);
static int mac80211_hwsim_start(struct ieee80211_hw *hw)
{
struct mac80211_hwsim_data *data = hw->priv;
- wiphy_debug(hw->wiphy, "%s\n", __func__);
+ wiphy_dbg(hw->wiphy, "%s\n", __func__);
data->started = true;
return 0;
}
struct mac80211_hwsim_data *data = hw->priv;
data->started = false;
tasklet_hrtimer_cancel(&data->beacon_timer);
- wiphy_debug(hw->wiphy, "%s\n", __func__);
+ wiphy_dbg(hw->wiphy, "%s\n", __func__);
}
static int mac80211_hwsim_add_interface(struct ieee80211_hw *hw,
struct ieee80211_vif *vif)
{
- wiphy_debug(hw->wiphy, "%s (type=%d mac_addr=%pM)\n",
- __func__, ieee80211_vif_type_p2p(vif),
- vif->addr);
+ wiphy_dbg(hw->wiphy, "%s (type=%d mac_addr=%pM)\n",
+ __func__, ieee80211_vif_type_p2p(vif),
+ vif->addr);
hwsim_set_magic(vif);
vif->cab_queue = 0;
bool newp2p)
{
newtype = ieee80211_iftype_p2p(newtype, newp2p);
- wiphy_debug(hw->wiphy,
- "%s (old type=%d, new type=%d, mac_addr=%pM)\n",
- __func__, ieee80211_vif_type_p2p(vif),
+ wiphy_dbg(hw->wiphy,
+ "%s (old type=%d, new type=%d, mac_addr=%pM)\n",
+ __func__, ieee80211_vif_type_p2p(vif),
newtype, vif->addr);
hwsim_check_magic(vif);
static void mac80211_hwsim_remove_interface(
struct ieee80211_hw *hw, struct ieee80211_vif *vif)
{
- wiphy_debug(hw->wiphy, "%s (type=%d mac_addr=%pM)\n",
- __func__, ieee80211_vif_type_p2p(vif),
- vif->addr);
+ wiphy_dbg(hw->wiphy, "%s (type=%d mac_addr=%pM)\n",
+ __func__, ieee80211_vif_type_p2p(vif),
+ vif->addr);
hwsim_check_magic(vif);
hwsim_clear_magic(vif);
}
struct ieee80211_channel *chan)
{
struct mac80211_hwsim_data *data = hw->priv;
- u32 _pid = ACCESS_ONCE(data->wmediumd);
+ u32 _pid = READ_ONCE(data->wmediumd);
if (ieee80211_hw_check(hw, SUPPORTS_RC_TABLE)) {
struct ieee80211_tx_info *txi = IEEE80211_SKB_CB(skb);
int idx;
if (conf->chandef.chan)
- wiphy_debug(hw->wiphy,
- "%s (freq=%d(%d - %d)/%s idle=%d ps=%d smps=%s)\n",
- __func__,
- conf->chandef.chan->center_freq,
- conf->chandef.center_freq1,
- conf->chandef.center_freq2,
- hwsim_chanwidths[conf->chandef.width],
- !!(conf->flags & IEEE80211_CONF_IDLE),
- !!(conf->flags & IEEE80211_CONF_PS),
- smps_modes[conf->smps_mode]);
+ wiphy_dbg(hw->wiphy,
+ "%s (freq=%d(%d - %d)/%s idle=%d ps=%d smps=%s)\n",
+ __func__,
+ conf->chandef.chan->center_freq,
+ conf->chandef.center_freq1,
+ conf->chandef.center_freq2,
+ hwsim_chanwidths[conf->chandef.width],
+ !!(conf->flags & IEEE80211_CONF_IDLE),
+ !!(conf->flags & IEEE80211_CONF_PS),
+ smps_modes[conf->smps_mode]);
else
- wiphy_debug(hw->wiphy,
- "%s (freq=0 idle=%d ps=%d smps=%s)\n",
- __func__,
- !!(conf->flags & IEEE80211_CONF_IDLE),
- !!(conf->flags & IEEE80211_CONF_PS),
- smps_modes[conf->smps_mode]);
+ wiphy_dbg(hw->wiphy,
+ "%s (freq=0 idle=%d ps=%d smps=%s)\n",
+ __func__,
+ !!(conf->flags & IEEE80211_CONF_IDLE),
+ !!(conf->flags & IEEE80211_CONF_PS),
+ smps_modes[conf->smps_mode]);
data->idle = !!(conf->flags & IEEE80211_CONF_IDLE);
{
struct mac80211_hwsim_data *data = hw->priv;
- wiphy_debug(hw->wiphy, "%s\n", __func__);
+ wiphy_dbg(hw->wiphy, "%s\n", __func__);
data->rx_filter = 0;
if (*total_flags & FIF_ALLMULTI)
hwsim_check_magic(vif);
- wiphy_debug(hw->wiphy, "%s(changed=0x%x vif->addr=%pM)\n",
- __func__, changed, vif->addr);
+ wiphy_dbg(hw->wiphy, "%s(changed=0x%x vif->addr=%pM)\n",
+ __func__, changed, vif->addr);
if (changed & BSS_CHANGED_BSSID) {
- wiphy_debug(hw->wiphy, "%s: BSSID changed: %pM\n",
- __func__, info->bssid);
+ wiphy_dbg(hw->wiphy, "%s: BSSID changed: %pM\n",
+ __func__, info->bssid);
memcpy(vp->bssid, info->bssid, ETH_ALEN);
}
if (changed & BSS_CHANGED_ASSOC) {
- wiphy_debug(hw->wiphy, " ASSOC: assoc=%d aid=%d\n",
- info->assoc, info->aid);
+ wiphy_dbg(hw->wiphy, " ASSOC: assoc=%d aid=%d\n",
+ info->assoc, info->aid);
vp->assoc = info->assoc;
vp->aid = info->aid;
}
if (changed & BSS_CHANGED_BEACON_ENABLED) {
- wiphy_debug(hw->wiphy, " BCN EN: %d (BI=%u)\n",
- info->enable_beacon, info->beacon_int);
+ wiphy_dbg(hw->wiphy, " BCN EN: %d (BI=%u)\n",
+ info->enable_beacon, info->beacon_int);
vp->bcn_en = info->enable_beacon;
if (data->started &&
!hrtimer_is_queued(&data->beacon_timer.timer) &&
ieee80211_iterate_active_interfaces_atomic(
data->hw, IEEE80211_IFACE_ITER_NORMAL,
mac80211_hwsim_bcn_en_iter, &count);
- wiphy_debug(hw->wiphy, " beaconing vifs remaining: %u",
- count);
+ wiphy_dbg(hw->wiphy, " beaconing vifs remaining: %u",
+ count);
if (count == 0) {
tasklet_hrtimer_cancel(&data->beacon_timer);
data->beacon_int = 0;
}
if (changed & BSS_CHANGED_ERP_CTS_PROT) {
- wiphy_debug(hw->wiphy, " ERP_CTS_PROT: %d\n",
- info->use_cts_prot);
+ wiphy_dbg(hw->wiphy, " ERP_CTS_PROT: %d\n",
+ info->use_cts_prot);
}
if (changed & BSS_CHANGED_ERP_PREAMBLE) {
- wiphy_debug(hw->wiphy, " ERP_PREAMBLE: %d\n",
- info->use_short_preamble);
+ wiphy_dbg(hw->wiphy, " ERP_PREAMBLE: %d\n",
+ info->use_short_preamble);
}
if (changed & BSS_CHANGED_ERP_SLOT) {
- wiphy_debug(hw->wiphy, " ERP_SLOT: %d\n", info->use_short_slot);
+ wiphy_dbg(hw->wiphy, " ERP_SLOT: %d\n", info->use_short_slot);
}
if (changed & BSS_CHANGED_HT) {
- wiphy_debug(hw->wiphy, " HT: op_mode=0x%x\n",
- info->ht_operation_mode);
+ wiphy_dbg(hw->wiphy, " HT: op_mode=0x%x\n",
+ info->ht_operation_mode);
}
if (changed & BSS_CHANGED_BASIC_RATES) {
- wiphy_debug(hw->wiphy, " BASIC_RATES: 0x%llx\n",
- (unsigned long long) info->basic_rates);
+ wiphy_dbg(hw->wiphy, " BASIC_RATES: 0x%llx\n",
+ (unsigned long long) info->basic_rates);
}
if (changed & BSS_CHANGED_TXPOWER)
- wiphy_debug(hw->wiphy, " TX Power: %d dBm\n", info->txpower);
+ wiphy_dbg(hw->wiphy, " TX Power: %d dBm\n", info->txpower);
}
static int mac80211_hwsim_sta_add(struct ieee80211_hw *hw,
struct ieee80211_vif *vif, u16 queue,
const struct ieee80211_tx_queue_params *params)
{
- wiphy_debug(hw->wiphy,
- "%s (queue=%d txop=%d cw_min=%d cw_max=%d aifs=%d)\n",
- __func__, queue,
- params->txop, params->cw_min,
- params->cw_max, params->aifs);
+ wiphy_dbg(hw->wiphy,
+ "%s (queue=%d txop=%d cw_min=%d cw_max=%d aifs=%d)\n",
+ __func__, queue,
+ params->txop, params->cw_min,
+ params->cw_max, params->aifs);
return 0;
}
.aborted = false,
};
- wiphy_debug(hwsim->hw->wiphy, "hw scan complete\n");
+ wiphy_dbg(hwsim->hw->wiphy, "hw scan complete\n");
ieee80211_scan_completed(hwsim->hw, &info);
hwsim->hw_scan_request = NULL;
hwsim->hw_scan_vif = NULL;
return;
}
- wiphy_debug(hwsim->hw->wiphy, "hw scan %d MHz\n",
- req->channels[hwsim->scan_chan_idx]->center_freq);
+ wiphy_dbg(hwsim->hw->wiphy, "hw scan %d MHz\n",
+ req->channels[hwsim->scan_chan_idx]->center_freq);
hwsim->tmp_chan = req->channels[hwsim->scan_chan_idx];
if (hwsim->tmp_chan->flags & (IEEE80211_CHAN_NO_IR |
memset(hwsim->survey_data, 0, sizeof(hwsim->survey_data));
mutex_unlock(&hwsim->mutex);
- wiphy_debug(hw->wiphy, "hwsim hw_scan request\n");
+ wiphy_dbg(hw->wiphy, "hwsim hw_scan request\n");
ieee80211_queue_delayed_work(hwsim->hw, &hwsim->hw_scan, 0);
.aborted = true,
};
- wiphy_debug(hw->wiphy, "hwsim cancel_hw_scan\n");
+ wiphy_dbg(hw->wiphy, "hwsim cancel_hw_scan\n");
cancel_delayed_work_sync(&hwsim->hw_scan);
mutex_lock(&hwsim->mutex);
if (hwsim->scanning) {
- printk(KERN_DEBUG "two hwsim sw_scans detected!\n");
+ pr_debug("two hwsim sw_scans detected!\n");
goto out;
}
- printk(KERN_DEBUG "hwsim sw_scan request, prepping stuff\n");
+ pr_debug("hwsim sw_scan request, prepping stuff\n");
memcpy(hwsim->scan_addr, mac_addr, ETH_ALEN);
hwsim->scanning = true;
mutex_lock(&hwsim->mutex);
- printk(KERN_DEBUG "hwsim sw_scan_complete\n");
+ pr_debug("hwsim sw_scan_complete\n");
hwsim->scanning = false;
eth_zero_addr(hwsim->scan_addr);
mutex_lock(&hwsim->mutex);
- wiphy_debug(hwsim->hw->wiphy, "hwsim ROC begins\n");
+ wiphy_dbg(hwsim->hw->wiphy, "hwsim ROC begins\n");
hwsim->tmp_chan = hwsim->roc_chan;
ieee80211_ready_on_channel(hwsim->hw);
hwsim->tmp_chan = NULL;
mutex_unlock(&hwsim->mutex);
- wiphy_debug(hwsim->hw->wiphy, "hwsim ROC expired\n");
+ wiphy_dbg(hwsim->hw->wiphy, "hwsim ROC expired\n");
}
static int mac80211_hwsim_roc(struct ieee80211_hw *hw,
hwsim->roc_duration = duration;
mutex_unlock(&hwsim->mutex);
- wiphy_debug(hw->wiphy, "hwsim ROC (%d MHz, %d ms)\n",
- chan->center_freq, duration);
+ wiphy_dbg(hw->wiphy, "hwsim ROC (%d MHz, %d ms)\n",
+ chan->center_freq, duration);
ieee80211_queue_delayed_work(hw, &hwsim->roc_start, HZ/50);
return 0;
hwsim->tmp_chan = NULL;
mutex_unlock(&hwsim->mutex);
- wiphy_debug(hw->wiphy, "hwsim ROC canceled\n");
+ wiphy_dbg(hw->wiphy, "hwsim ROC canceled\n");
return 0;
}
struct ieee80211_chanctx_conf *ctx)
{
hwsim_set_chanctx_magic(ctx);
- wiphy_debug(hw->wiphy,
- "add channel context control: %d MHz/width: %d/cfreqs:%d/%d MHz\n",
- ctx->def.chan->center_freq, ctx->def.width,
- ctx->def.center_freq1, ctx->def.center_freq2);
+ wiphy_dbg(hw->wiphy,
+ "add channel context control: %d MHz/width: %d/cfreqs:%d/%d MHz\n",
+ ctx->def.chan->center_freq, ctx->def.width,
+ ctx->def.center_freq1, ctx->def.center_freq2);
return 0;
}
static void mac80211_hwsim_remove_chanctx(struct ieee80211_hw *hw,
struct ieee80211_chanctx_conf *ctx)
{
- wiphy_debug(hw->wiphy,
- "remove channel context control: %d MHz/width: %d/cfreqs:%d/%d MHz\n",
- ctx->def.chan->center_freq, ctx->def.width,
- ctx->def.center_freq1, ctx->def.center_freq2);
+ wiphy_dbg(hw->wiphy,
+ "remove channel context control: %d MHz/width: %d/cfreqs:%d/%d MHz\n",
+ ctx->def.chan->center_freq, ctx->def.width,
+ ctx->def.center_freq1, ctx->def.center_freq2);
hwsim_check_chanctx_magic(ctx);
hwsim_clear_chanctx_magic(ctx);
}
u32 changed)
{
hwsim_check_chanctx_magic(ctx);
- wiphy_debug(hw->wiphy,
- "change channel context control: %d MHz/width: %d/cfreqs:%d/%d MHz\n",
- ctx->def.chan->center_freq, ctx->def.width,
- ctx->def.center_freq1, ctx->def.center_freq2);
+ wiphy_dbg(hw->wiphy,
+ "change channel context control: %d MHz/width: %d/cfreqs:%d/%d MHz\n",
+ ctx->def.chan->center_freq, ctx->def.width,
+ ctx->def.center_freq1, ctx->def.center_freq2);
}
static int mac80211_hwsim_assign_vif_chanctx(struct ieee80211_hw *hw,
ops = &mac80211_hwsim_mchan_ops;
hw = ieee80211_alloc_hw_nm(sizeof(*data), ops, param->hwname);
if (!hw) {
- printk(KERN_DEBUG "mac80211_hwsim: ieee80211_alloc_hw failed\n");
+ pr_debug("mac80211_hwsim: ieee80211_alloc_hw failed\n");
err = -ENOMEM;
goto failed;
}
data->dev->driver = &mac80211_hwsim_driver.driver;
err = device_bind_driver(data->dev);
if (err != 0) {
- printk(KERN_DEBUG "mac80211_hwsim: device_bind_driver failed (%d)\n",
+ pr_debug("mac80211_hwsim: device_bind_driver failed (%d)\n",
err);
goto failed_bind;
}
err = ieee80211_register_hw(hw);
if (err < 0) {
- printk(KERN_DEBUG "mac80211_hwsim: ieee80211_register_hw failed (%d)\n",
+ pr_debug("mac80211_hwsim: ieee80211_register_hw failed (%d)\n",
err);
goto failed_hw;
}
- wiphy_debug(hw->wiphy, "hwaddr %pM registered\n", hw->wiphy->perm_addr);
+ wiphy_dbg(hw->wiphy, "hwaddr %pM registered\n", hw->wiphy->perm_addr);
if (param->reg_alpha2) {
data->alpha2[0] = param->reg_alpha2[0];
return 0;
err:
- printk(KERN_DEBUG "mac80211_hwsim: error occurred in %s\n", __func__);
+ pr_debug("mac80211_hwsim: error occurred in %s\n", __func__);
out:
dev_kfree_skb(skb);
return -EINVAL;
hwsim_register_wmediumd(net, info->snd_portid);
- printk(KERN_DEBUG "mac80211_hwsim: received a REGISTER, "
+ pr_debug("mac80211_hwsim: received a REGISTER, "
"switching to wmediumd mode with pid %d\n", info->snd_portid);
return 0;
return 0;
failure:
- printk(KERN_DEBUG "mac80211_hwsim: error occurred in %s\n", __func__);
+ pr_debug("mac80211_hwsim: error occurred in %s\n", __func__);
return -EINVAL;
}
static void __exit exit_mac80211_hwsim(void)
{
- printk(KERN_DEBUG "mac80211_hwsim: unregister radios\n");
+ pr_debug("mac80211_hwsim: unregister radios\n");
hwsim_exit_netlink();
static struct kmem_cache *qeth_qdio_outbuf_cache;
static struct device *qeth_core_root_dev;
- static unsigned int known_devices[][6] = QETH_MODELLIST_ARRAY;
static struct lock_class_key qdio_out_skb_queue_key;
static struct mutex qeth_mod_mutex;
QETH_DBF_TEXT(SETUP, 4, "intqdinf");
atomic_set(&card->qdio.state, QETH_QDIO_UNINITIALIZED);
/* inbound */
+ card->qdio.no_in_queues = 1;
card->qdio.in_buf_size = QETH_IN_BUF_SIZE_DEFAULT;
if (card->info.type == QETH_CARD_TYPE_IQD)
card->qdio.init_pool.buf_count = QETH_IN_BUF_COUNT_HSDEFAULT;
return NULL;
}
- static int qeth_determine_card_type(struct qeth_card *card)
+ static void qeth_determine_card_type(struct qeth_card *card)
{
- int i = 0;
-
QETH_DBF_TEXT(SETUP, 2, "detcdtyp");
card->qdio.do_prio_queueing = QETH_PRIOQ_DEFAULT;
card->qdio.default_out_queue = QETH_DEFAULT_QUEUE;
- while (known_devices[i][QETH_DEV_MODEL_IND]) {
- if ((CARD_RDEV(card)->id.dev_type ==
- known_devices[i][QETH_DEV_TYPE_IND]) &&
- (CARD_RDEV(card)->id.dev_model ==
- known_devices[i][QETH_DEV_MODEL_IND])) {
- card->info.type = known_devices[i][QETH_DEV_MODEL_IND];
- card->qdio.no_out_queues =
- known_devices[i][QETH_QUEUE_NO_IND];
- card->qdio.no_in_queues = 1;
- card->info.is_multicast_different =
- known_devices[i][QETH_MULTICAST_IND];
- qeth_update_from_chp_desc(card);
- return 0;
- }
- i++;
- }
- card->info.type = QETH_CARD_TYPE_UNKNOWN;
- dev_err(&card->gdev->dev, "The adapter hardware is of an "
- "unknown type\n");
- return -ENOENT;
+ card->info.type = CARD_RDEV(card)->id.driver_info;
+ card->qdio.no_out_queues = QETH_MAX_QUEUES;
+ if (card->info.type == QETH_CARD_TYPE_IQD)
+ card->info.is_multicast_different = 0x0103;
+ qeth_update_from_chp_desc(card);
}
static int qeth_clear_channel(struct qeth_channel *channel)
spin_lock_irqsave(&card->lock, flags);
list_add_tail(&reply->list, &card->cmd_waiter_list);
spin_unlock_irqrestore(&card->lock, flags);
- QETH_DBF_HEX(CTRL, 2, iob->data, QETH_DBF_CTRL_LEN);
while (atomic_cmpxchg(&card->write.irq_pending, 0, 1)) ;
qeth_prepare_control_data(card, len, iob);
static int qeth_get_initial_mtu_for_card(struct qeth_card *card)
{
switch (card->info.type) {
- case QETH_CARD_TYPE_UNKNOWN:
- return 1500;
case QETH_CARD_TYPE_IQD:
return card->info.max_mtu;
case QETH_CARD_TYPE_OSD:
- switch (card->info.link_type) {
- case QETH_LINK_TYPE_HSTR:
- case QETH_LINK_TYPE_LANE_TR:
- return 2000;
- default:
- return card->options.layer2 ? 1500 : 1492;
- }
- case QETH_CARD_TYPE_OSM:
case QETH_CARD_TYPE_OSX:
- return card->options.layer2 ? 1500 : 1492;
+ if (!card->options.layer2)
+ return ETH_DATA_LEN - 8; /* L3: allow for LLC + SNAP */
+ /* fall through */
default:
- return 1500;
+ return ETH_DATA_LEN;
}
}
return ((mtu >= 576) &&
(mtu <= card->info.max_mtu));
case QETH_CARD_TYPE_OSN:
- case QETH_CARD_TYPE_UNKNOWN:
default:
return 1;
}
return flush_cnt;
}
- int qeth_do_send_packet_fast(struct qeth_card *card,
- struct qeth_qdio_out_q *queue, struct sk_buff *skb,
+ int qeth_do_send_packet_fast(struct qeth_qdio_out_q *queue, struct sk_buff *skb,
struct qeth_hdr *hdr, unsigned int offset,
unsigned int hd_len)
{
- struct qeth_qdio_out_buffer *buffer;
- int index;
+ int index = queue->next_buf_to_fill;
+ struct qeth_qdio_out_buffer *buffer = queue->bufs[index];
- /* spin until we get the queue ... */
- while (atomic_cmpxchg(&queue->state, QETH_OUT_Q_UNLOCKED,
- QETH_OUT_Q_LOCKED) != QETH_OUT_Q_UNLOCKED);
- /* ... now we've got the queue */
- index = queue->next_buf_to_fill;
- buffer = queue->bufs[queue->next_buf_to_fill];
/*
* check if buffer is empty to make sure that we do not 'overtake'
* ourselves and try to fill a buffer that is already primed
*/
if (atomic_read(&buffer->state) != QETH_QDIO_BUF_EMPTY)
- goto out;
- queue->next_buf_to_fill = (queue->next_buf_to_fill + 1) %
- QDIO_MAX_BUFFERS_PER_Q;
- atomic_set(&queue->state, QETH_OUT_Q_UNLOCKED);
+ return -EBUSY;
+ queue->next_buf_to_fill = (index + 1) % QDIO_MAX_BUFFERS_PER_Q;
qeth_fill_buffer(queue, buffer, skb, hdr, offset, hd_len);
qeth_flush_buffers(queue, index, 1);
return 0;
- out:
- atomic_set(&queue->state, QETH_OUT_Q_UNLOCKED);
- return -EBUSY;
}
EXPORT_SYMBOL_GPL(qeth_do_send_packet_fast);
if (card->options.cq == QETH_CQ_ENABLED) {
int offset = QDIO_MAX_BUFFERS_PER_Q *
(card->qdio.no_in_queues - 1);
- i = QDIO_MAX_BUFFERS_PER_Q * (card->qdio.no_in_queues - 1);
for (i = 0; i < QDIO_MAX_BUFFERS_PER_Q; ++i) {
in_sbal_ptrs[offset + i] = (struct qdio_buffer *)
virt_to_phys(card->qdio.c_q->bufs[i].buffer);
}
EXPORT_SYMBOL_GPL(qeth_core_hardsetup_card);
- static int qeth_create_skb_frag(struct qeth_qdio_buffer *qethbuffer,
- struct qdio_buffer_element *element,
- struct sk_buff **pskb, int offset, int *pfrag,
- int data_len)
+ static void qeth_create_skb_frag(struct qdio_buffer_element *element,
+ struct sk_buff *skb, int offset, int data_len)
{
struct page *page = virt_to_page(element->addr);
- if (*pskb == NULL) {
- if (qethbuffer->rx_skb) {
- /* only if qeth_card.options.cq == QETH_CQ_ENABLED */
- *pskb = qethbuffer->rx_skb;
- qethbuffer->rx_skb = NULL;
- } else {
- *pskb = dev_alloc_skb(QETH_RX_PULL_LEN + ETH_HLEN);
- if (!(*pskb))
- return -ENOMEM;
- }
+ unsigned int next_frag;
- skb_reserve(*pskb, ETH_HLEN);
- if (data_len <= QETH_RX_PULL_LEN) {
- skb_put_data(*pskb, element->addr + offset, data_len);
- } else {
- get_page(page);
- skb_put_data(*pskb, element->addr + offset,
- QETH_RX_PULL_LEN);
- skb_fill_page_desc(*pskb, *pfrag, page,
- offset + QETH_RX_PULL_LEN,
- data_len - QETH_RX_PULL_LEN);
- (*pskb)->data_len += data_len - QETH_RX_PULL_LEN;
- (*pskb)->len += data_len - QETH_RX_PULL_LEN;
- (*pskb)->truesize += data_len - QETH_RX_PULL_LEN;
- (*pfrag)++;
- }
- } else {
- get_page(page);
- skb_fill_page_desc(*pskb, *pfrag, page, offset, data_len);
- (*pskb)->data_len += data_len;
- (*pskb)->len += data_len;
- (*pskb)->truesize += data_len;
- (*pfrag)++;
- }
+ /* first fill the linear space */
+ if (!skb->len) {
+ unsigned int linear = min(data_len, skb_tailroom(skb));
+ skb_put_data(skb, element->addr + offset, linear);
+ data_len -= linear;
+ if (!data_len)
+ return;
+ offset += linear;
+ /* fall through to add page frag for remaining data */
+ }
- return 0;
+ next_frag = skb_shinfo(skb)->nr_frags;
+ get_page(page);
+ skb_add_rx_frag(skb, next_frag, page, offset, data_len, data_len);
}
static inline int qeth_is_last_sbale(struct qdio_buffer_element *sbale)
struct qdio_buffer_element *element = *__element;
struct qdio_buffer *buffer = qethbuffer->buffer;
int offset = *__offset;
- struct sk_buff *skb = NULL;
+ struct sk_buff *skb;
int skb_len = 0;
void *data_ptr;
int data_len;
int headroom = 0;
int use_rx_sg = 0;
- int frag = 0;
/* qeth_hdr must not cross element boundaries */
- if (element->length < offset + sizeof(struct qeth_hdr)) {
+ while (element->length < offset + sizeof(struct qeth_hdr)) {
if (qeth_is_last_sbale(element))
return NULL;
element++;
offset = 0;
- if (element->length < sizeof(struct qeth_hdr))
- return NULL;
}
*hdr = element->addr + offset;
if (((skb_len >= card->options.rx_sg_cb) &&
(!(card->info.type == QETH_CARD_TYPE_OSN)) &&
(!atomic_read(&card->force_alloc_skb))) ||
- (card->options.cq == QETH_CQ_ENABLED)) {
+ (card->options.cq == QETH_CQ_ENABLED))
use_rx_sg = 1;
+
+ if (use_rx_sg && qethbuffer->rx_skb) {
+ /* QETH_CQ_ENABLED only: */
+ skb = qethbuffer->rx_skb;
+ qethbuffer->rx_skb = NULL;
} else {
- skb = dev_alloc_skb(skb_len + headroom);
- if (!skb)
- goto no_mem;
- if (headroom)
- skb_reserve(skb, headroom);
+ unsigned int linear = (use_rx_sg) ? QETH_RX_PULL_LEN : skb_len;
+
+ skb = dev_alloc_skb(linear + headroom);
}
+ if (!skb)
+ goto no_mem;
+ if (headroom)
+ skb_reserve(skb, headroom);
data_ptr = element->addr + offset;
while (skb_len) {
data_len = min(skb_len, (int)(element->length - offset));
if (data_len) {
- if (use_rx_sg) {
- if (qeth_create_skb_frag(qethbuffer, element,
- &skb, offset, &frag, data_len))
- goto no_mem;
- } else {
+ if (use_rx_sg)
+ qeth_create_skb_frag(element, skb, offset,
+ data_len);
+ else
skb_put_data(skb, data_ptr, data_len);
- }
}
skb_len -= data_len;
if (skb_len) {
}
}
- napi_complete(napi);
+ napi_complete_done(napi, work_done);
if (qdio_start_irq(card->data.ccwdev, 0))
napi_schedule(&card->napi);
out:
gdev->cdev[1]->handler = qeth_irq;
gdev->cdev[2]->handler = qeth_irq;
- rc = qeth_determine_card_type(card);
- if (rc) {
- QETH_DBF_TEXT_(SETUP, 2, "3err%d", rc);
- goto err_card;
- }
+ qeth_determine_card_type(card);
rc = qeth_setup_card(card);
if (rc) {
QETH_DBF_TEXT_(SETUP, 2, "2err%d", rc);
.owner = THIS_MODULE,
.name = "qeth",
},
+ .ccw_driver = &qeth_ccw_driver,
.setup = qeth_core_probe_device,
.remove = qeth_core_remove_device,
.set_online = qeth_core_set_online,
return rc;
}
- /* try to restore device features on a device after recovery */
- int qeth_recover_features(struct net_device *dev)
+ #define QETH_HW_FEATURES (NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_TSO)
+
+ /**
+ * qeth_recover_features() - Restore device features after recovery
+ * @dev: the recovering net_device
+ *
+ * Caller must hold rtnl lock.
+ */
+ void qeth_recover_features(struct net_device *dev)
{
+ netdev_features_t features = dev->features;
struct qeth_card *card = dev->ml_priv;
- netdev_features_t recover = dev->features;
- if (recover & NETIF_F_IP_CSUM) {
- if (qeth_set_ipa_csum(card, 1, IPA_OUTBOUND_CHECKSUM))
- recover ^= NETIF_F_IP_CSUM;
- }
- if (recover & NETIF_F_RXCSUM) {
- if (qeth_set_ipa_csum(card, 1, IPA_INBOUND_CHECKSUM))
- recover ^= NETIF_F_RXCSUM;
- }
- if (recover & NETIF_F_TSO) {
- if (qeth_set_ipa_tso(card, 1))
- recover ^= NETIF_F_TSO;
- }
-
- if (recover == dev->features)
- return 0;
+ /* force-off any feature that needs an IPA sequence.
+ * netdev_update_features() will restart them.
+ */
+ dev->features &= ~QETH_HW_FEATURES;
+ netdev_update_features(dev);
+ if (features == dev->features)
+ return;
dev_warn(&card->gdev->dev,
"Device recovery failed to restore all offload features\n");
- dev->features = recover;
- return -EIO;
}
EXPORT_SYMBOL_GPL(qeth_recover_features);
/* if the card isn't up, remove features that require hw changes */
if (card->state == CARD_STATE_DOWN ||
card->state == CARD_STATE_RECOVER)
- features = features & ~(NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
- NETIF_F_TSO);
+ features &= ~QETH_HW_FEATURES;
QETH_DBF_HEX(SETUP, 2, &features, sizeof(features));
return features;
}
/* Returns how many objects can be queued, < 0 indicates over limit. */
static inline int dql_avail(const struct dql *dql)
{
- return ACCESS_ONCE(dql->adj_limit) - ACCESS_ONCE(dql->num_queued);
+ return READ_ONCE(dql->adj_limit) - READ_ONCE(dql->num_queued);
}
/* Record number of completed objects and recalculate the limit. */
void dql_reset(struct dql *dql);
/* Initialize dql state */
- int dql_init(struct dql *dql, unsigned hold_time);
+ void dql_init(struct dql *dql, unsigned int hold_time);
#endif /* _KERNEL_ */
int length;
void *value;
struct property *next;
+#if defined(CONFIG_OF_DYNAMIC) || defined(CONFIG_SPARC)
unsigned long _flags;
+#endif
+#if defined(CONFIG_OF_PROMTREE)
unsigned int unique_id;
+#endif
+#if defined(CONFIG_OF_KOBJ)
struct bin_attribute attr;
+#endif
};
#if defined(CONFIG_SPARC)
struct device_node *parent;
struct device_node *child;
struct device_node *sibling;
+#if defined(CONFIG_OF_KOBJ)
struct kobject kobj;
+#endif
unsigned long _flags;
void *data;
#if defined(CONFIG_SPARC)
extern const struct fwnode_operations of_fwnode_ops;
static inline void of_node_init(struct device_node *node)
{
+#if defined(CONFIG_OF_KOBJ)
kobject_init(&node->kobj, &of_node_ktype);
+#endif
node->fwnode.ops = &of_fwnode_ops;
}
-/* true when node is initialized */
-static inline int of_node_is_initialized(struct device_node *node)
-{
- return node && node->kobj.state_initialized;
-}
-
-/* true when node is attached (i.e. present on sysfs) */
-static inline int of_node_is_attached(struct device_node *node)
-{
- return node && node->kobj.state_in_sysfs;
-}
+#if defined(CONFIG_OF_KOBJ)
+#define of_node_kobj(n) (&(n)->kobj)
+#else
+#define of_node_kobj(n) NULL
+#endif
#ifdef CONFIG_OF_DYNAMIC
extern struct device_node *of_node_get(struct device_node *node);
clear_bit(flag, &n->_flags);
}
+#if defined(CONFIG_OF_DYNAMIC) || defined(CONFIG_SPARC)
static inline int of_property_check_flag(struct property *p, unsigned long flag)
{
return test_bit(flag, &p->_flags);
{
clear_bit(flag, &p->_flags);
}
+#endif
extern struct device_node *__of_find_all_nodes(struct device_node *prev);
extern struct device_node *of_find_all_nodes(struct device_node *prev);
return -ENOSYS;
}
- static inline int of_property_read_u32_index(const struct device_node *np,
- const char *propname, u32 index, u32 *out_value)
- {
- return -ENOSYS;
- }
-
static inline int of_property_read_u8_array(const struct device_node *np,
const char *propname, u8 *out_values, size_t sz)
{
return -ENOSYS;
}
- static inline int of_property_read_string(const struct device_node *np,
- const char *propname,
- const char **out_string)
+ static inline int of_property_read_u32_index(const struct device_node *np,
+ const char *propname, u32 index, u32 *out_value)
{
return -ENOSYS;
}
- static inline int of_property_read_string_helper(const struct device_node *np,
- const char *propname,
- const char **out_strs, size_t sz, int index)
+ static inline int of_property_read_u64_index(const struct device_node *np,
+ const char *propname, u32 index, u64 *out_value)
{
return -ENOSYS;
}
return 0;
}
+ static inline int of_property_read_variable_u8_array(const struct device_node *np,
+ const char *propname, u8 *out_values,
+ size_t sz_min, size_t sz_max)
+ {
+ return -ENOSYS;
+ }
+
+ static inline int of_property_read_variable_u16_array(const struct device_node *np,
+ const char *propname, u16 *out_values,
+ size_t sz_min, size_t sz_max)
+ {
+ return -ENOSYS;
+ }
+
+ static inline int of_property_read_variable_u32_array(const struct device_node *np,
+ const char *propname,
+ u32 *out_values,
+ size_t sz_min,
+ size_t sz_max)
+ {
+ return -ENOSYS;
+ }
+
static inline int of_property_read_u64(const struct device_node *np,
const char *propname, u64 *out_value)
{
return -ENOSYS;
}
+ static inline int of_property_read_variable_u64_array(const struct device_node *np,
+ const char *propname,
+ u64 *out_values,
+ size_t sz_min,
+ size_t sz_max)
+ {
+ return -ENOSYS;
+ }
+
+ static inline int of_property_read_string(const struct device_node *np,
+ const char *propname,
+ const char **out_string)
+ {
+ return -ENOSYS;
+ }
+
static inline int of_property_match_string(const struct device_node *np,
const char *propname,
const char *string)
return -ENOSYS;
}
+ static inline int of_property_read_string_helper(const struct device_node *np,
+ const char *propname,
+ const char **out_strs, size_t sz, int index)
+ {
+ return -ENOSYS;
+ }
+
static inline struct device_node *of_parse_phandle(const struct device_node *np,
const char *phandle_name,
int index)
}
#endif /* CONFIG_OF_DYNAMIC */
-/* CONFIG_OF_RESOLVE api */
-extern int of_resolve_phandles(struct device_node *tree);
-
/**
* of_device_is_system_power_controller - Tells if system-power-controller is found for device_node
* @np: Pointer to the given device_node
*/
enum of_overlay_notify_action {
- OF_OVERLAY_PRE_APPLY,
+ OF_OVERLAY_PRE_APPLY = 0,
OF_OVERLAY_POST_APPLY,
OF_OVERLAY_PRE_REMOVE,
OF_OVERLAY_POST_REMOVE,
#ifdef CONFIG_OF_OVERLAY
/* ID based overlays; the API for external users */
-int of_overlay_create(struct device_node *tree);
-int of_overlay_destroy(int id);
-int of_overlay_destroy_all(void);
+int of_overlay_apply(struct device_node *tree, int *ovcs_id);
+int of_overlay_remove(int *ovcs_id);
+int of_overlay_remove_all(void);
int of_overlay_notifier_register(struct notifier_block *nb);
int of_overlay_notifier_unregister(struct notifier_block *nb);
#else
-static inline int of_overlay_create(struct device_node *tree)
+static inline int of_overlay_apply(struct device_node *tree, int *ovcs_id)
{
return -ENOTSUPP;
}
-static inline int of_overlay_destroy(int id)
+static inline int of_overlay_remove(int *ovcs_id)
{
return -ENOTSUPP;
}
-static inline int of_overlay_destroy_all(void)
+static inline int of_overlay_remove_all(void)
{
return -ENOTSUPP;
}
u32 id, long expires, u32 error);
void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change, gfp_t flags);
+ void rtmsg_ifinfo_newnet(int type, struct net_device *dev, unsigned int change,
+ gfp_t flags, int *new_nsid);
struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
unsigned change, u32 event,
- gfp_t flags);
+ gfp_t flags, int *new_nsid);
void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev,
gfp_t flags);
* @p: The pointer to read, prior to dereferencing
*
* Return the value of the specified RCU-protected pointer, but omit
- * both the smp_read_barrier_depends() and the ACCESS_ONCE(), because
+ * both the smp_read_barrier_depends() and the READ_ONCE(), because
* caller holds RTNL.
*/
#define rtnl_dereference(p) \
* @flush: deactivate element in the next generation
* @remove: remove element from set
* @walk: iterate over all set elemeennts
+ * @get: get set elements
* @privsize: function to return size of set private data
* @init: initialize private data of new set instance
* @destroy: destroy private data of set instance
void (*walk)(const struct nft_ctx *ctx,
struct nft_set *set,
struct nft_set_iter *iter);
+ void * (*get)(const struct net *net,
+ const struct nft_set *set,
+ const struct nft_set_elem *elem,
+ unsigned int flags);
unsigned int (*privsize)(const struct nlattr * const nla[],
const struct nft_set_desc *desc);
static inline u8 nft_genmask_cur(const struct net *net)
{
- /* Use ACCESS_ONCE() to prevent refetching the value for atomicity */
- return 1 << ACCESS_ONCE(net->nft.gencursor);
+ /* Use READ_ONCE() to prevent refetching the value for atomicity */
+ return 1 << READ_ONCE(net->nft.gencursor);
}
#define NFT_GENMASK_ANY ((1 << 0) | (1 << 1))
struct perf_event_context *task_ctx = cpuctx->task_ctx;
int ret = 0;
- WARN_ON_ONCE(!irqs_disabled());
+ lockdep_assert_irqs_disabled();
perf_ctx_lock(cpuctx, task_ctx);
/*
struct task_struct *task = READ_ONCE(ctx->task);
struct perf_event_context *task_ctx = NULL;
- WARN_ON_ONCE(!irqs_disabled());
+ lockdep_assert_irqs_disabled();
if (task) {
if (task == TASK_TOMBSTONE)
return event->clock();
}
+/*
+ * State based event timekeeping...
+ *
+ * The basic idea is to use event->state to determine which (if any) time
+ * fields to increment with the current delta. This means we only need to
+ * update timestamps when we change state or when they are explicitly requested
+ * (read).
+ *
+ * Event groups make things a little more complicated, but not terribly so. The
+ * rules for a group are that if the group leader is OFF the entire group is
+ * OFF, irrespecive of what the group member states are. This results in
+ * __perf_effective_state().
+ *
+ * A futher ramification is that when a group leader flips between OFF and
+ * !OFF, we need to update all group member times.
+ *
+ *
+ * NOTE: perf_event_time() is based on the (cgroup) context time, and thus we
+ * need to make sure the relevant context time is updated before we try and
+ * update our timestamps.
+ */
+
+static __always_inline enum perf_event_state
+__perf_effective_state(struct perf_event *event)
+{
+ struct perf_event *leader = event->group_leader;
+
+ if (leader->state <= PERF_EVENT_STATE_OFF)
+ return leader->state;
+
+ return event->state;
+}
+
+static __always_inline void
+__perf_update_times(struct perf_event *event, u64 now, u64 *enabled, u64 *running)
+{
+ enum perf_event_state state = __perf_effective_state(event);
+ u64 delta = now - event->tstamp;
+
+ *enabled = event->total_time_enabled;
+ if (state >= PERF_EVENT_STATE_INACTIVE)
+ *enabled += delta;
+
+ *running = event->total_time_running;
+ if (state >= PERF_EVENT_STATE_ACTIVE)
+ *running += delta;
+}
+
+static void perf_event_update_time(struct perf_event *event)
+{
+ u64 now = perf_event_time(event);
+
+ __perf_update_times(event, now, &event->total_time_enabled,
+ &event->total_time_running);
+ event->tstamp = now;
+}
+
+static void perf_event_update_sibling_time(struct perf_event *leader)
+{
+ struct perf_event *sibling;
+
+ list_for_each_entry(sibling, &leader->sibling_list, group_entry)
+ perf_event_update_time(sibling);
+}
+
+static void
+perf_event_set_state(struct perf_event *event, enum perf_event_state state)
+{
+ if (event->state == state)
+ return;
+
+ perf_event_update_time(event);
+ /*
+ * If a group leader gets enabled/disabled all its siblings
+ * are affected too.
+ */
+ if ((event->state < 0) ^ (state < 0))
+ perf_event_update_sibling_time(event);
+
+ WRITE_ONCE(event->state, state);
+}
+
#ifdef CONFIG_CGROUP_PERF
static inline bool
event->shadow_ctx_time = now - t->timestamp;
}
-static inline void
-perf_cgroup_defer_enabled(struct perf_event *event)
-{
- /*
- * when the current task's perf cgroup does not match
- * the event's, we need to remember to call the
- * perf_mark_enable() function the first time a task with
- * a matching perf cgroup is scheduled in.
- */
- if (is_cgroup_event(event) && !perf_cgroup_match(event))
- event->cgrp_defer_enabled = 1;
-}
-
-static inline void
-perf_cgroup_mark_enabled(struct perf_event *event,
- struct perf_event_context *ctx)
-{
- struct perf_event *sub;
- u64 tstamp = perf_event_time(event);
-
- if (!event->cgrp_defer_enabled)
- return;
-
- event->cgrp_defer_enabled = 0;
-
- event->tstamp_enabled = tstamp - event->total_time_enabled;
- list_for_each_entry(sub, &event->sibling_list, group_entry) {
- if (sub->state >= PERF_EVENT_STATE_INACTIVE) {
- sub->tstamp_enabled = tstamp - sub->total_time_enabled;
- sub->cgrp_defer_enabled = 0;
- }
- }
-}
-
/*
* Update cpuctx->cgrp so that it is set when first cgroup event is added and
* cleared when last cgroup event is removed.
return 0;
}
-static inline void
-perf_cgroup_defer_enabled(struct perf_event *event)
-{
-}
-
-static inline void
-perf_cgroup_mark_enabled(struct perf_event *event,
- struct perf_event_context *ctx)
-{
-}
-
static inline void
list_update_cgroup_event(struct perf_event *event,
struct perf_event_context *ctx, bool add)
struct perf_cpu_context *cpuctx;
int rotations = 0;
- WARN_ON(!irqs_disabled());
+ lockdep_assert_irqs_disabled();
cpuctx = container_of(hr, struct perf_cpu_context, hrtimer);
rotations = perf_rotate_context(cpuctx);
{
struct list_head *head = this_cpu_ptr(&active_ctx_list);
- WARN_ON(!irqs_disabled());
+ lockdep_assert_irqs_disabled();
WARN_ON(!list_empty(&ctx->active_ctx_list));
static void perf_event_ctx_deactivate(struct perf_event_context *ctx)
{
- WARN_ON(!irqs_disabled());
+ lockdep_assert_irqs_disabled();
WARN_ON(list_empty(&ctx->active_ctx_list));
again:
rcu_read_lock();
- ctx = ACCESS_ONCE(event->ctx);
+ ctx = READ_ONCE(event->ctx);
if (!atomic_inc_not_zero(&ctx->refcount)) {
rcu_read_unlock();
goto again;
return ctx ? ctx->time : 0;
}
-/*
- * Update the total_time_enabled and total_time_running fields for a event.
- */
-static void update_event_times(struct perf_event *event)
-{
- struct perf_event_context *ctx = event->ctx;
- u64 run_end;
-
- lockdep_assert_held(&ctx->lock);
-
- if (event->state < PERF_EVENT_STATE_INACTIVE ||
- event->group_leader->state < PERF_EVENT_STATE_INACTIVE)
- return;
-
- /*
- * in cgroup mode, time_enabled represents
- * the time the event was enabled AND active
- * tasks were in the monitored cgroup. This is
- * independent of the activity of the context as
- * there may be a mix of cgroup and non-cgroup events.
- *
- * That is why we treat cgroup events differently
- * here.
- */
- if (is_cgroup_event(event))
- run_end = perf_cgroup_event_time(event);
- else if (ctx->is_active)
- run_end = ctx->time;
- else
- run_end = event->tstamp_stopped;
-
- event->total_time_enabled = run_end - event->tstamp_enabled;
-
- if (event->state == PERF_EVENT_STATE_INACTIVE)
- run_end = event->tstamp_stopped;
- else
- run_end = perf_event_time(event);
-
- event->total_time_running = run_end - event->tstamp_running;
-
-}
-
-/*
- * Update total_time_enabled and total_time_running for all events in a group.
- */
-static void update_group_times(struct perf_event *leader)
-{
- struct perf_event *event;
-
- update_event_times(leader);
- list_for_each_entry(event, &leader->sibling_list, group_entry)
- update_event_times(event);
-}
-
static enum event_type_t get_event_type(struct perf_event *event)
{
struct perf_event_context *ctx = event->ctx;
WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
event->attach_state |= PERF_ATTACH_CONTEXT;
+ event->tstamp = perf_event_time(event);
+
/*
* If we're a stand alone event or group leader, we go to the context
* list, group events are kept attached to the group so that
if (event->group_leader == event)
list_del_init(&event->group_entry);
- update_group_times(event);
-
/*
* If event was in error state, then keep it
* that way, otherwise bogus counts will be
* of the event
*/
if (event->state > PERF_EVENT_STATE_OFF)
- event->state = PERF_EVENT_STATE_OFF;
+ perf_event_set_state(event, PERF_EVENT_STATE_OFF);
ctx->generation++;
}
struct perf_cpu_context *cpuctx,
struct perf_event_context *ctx)
{
- u64 tstamp = perf_event_time(event);
- u64 delta;
+ enum perf_event_state state = PERF_EVENT_STATE_INACTIVE;
WARN_ON_ONCE(event->ctx != ctx);
lockdep_assert_held(&ctx->lock);
- /*
- * An event which could not be activated because of
- * filter mismatch still needs to have its timings
- * maintained, otherwise bogus information is return
- * via read() for time_enabled, time_running:
- */
- if (event->state == PERF_EVENT_STATE_INACTIVE &&
- !event_filter_match(event)) {
- delta = tstamp - event->tstamp_stopped;
- event->tstamp_running += delta;
- event->tstamp_stopped = tstamp;
- }
-
if (event->state != PERF_EVENT_STATE_ACTIVE)
return;
perf_pmu_disable(event->pmu);
- event->tstamp_stopped = tstamp;
event->pmu->del(event, 0);
event->oncpu = -1;
- event->state = PERF_EVENT_STATE_INACTIVE;
+
if (event->pending_disable) {
event->pending_disable = 0;
- event->state = PERF_EVENT_STATE_OFF;
+ state = PERF_EVENT_STATE_OFF;
}
+ perf_event_set_state(event, state);
if (!is_software_event(event))
cpuctx->active_oncpu--;
struct perf_event_context *ctx)
{
struct perf_event *event;
- int state = group_event->state;
+
+ if (group_event->state != PERF_EVENT_STATE_ACTIVE)
+ return;
perf_pmu_disable(ctx->pmu);
perf_pmu_enable(ctx->pmu);
- if (state == PERF_EVENT_STATE_ACTIVE && group_event->attr.exclusive)
+ if (group_event->attr.exclusive)
cpuctx->exclusive = 0;
}
{
unsigned long flags = (unsigned long)info;
+ if (ctx->is_active & EVENT_TIME) {
+ update_context_time(ctx);
+ update_cgrp_time_from_cpuctx(cpuctx);
+ }
+
event_sched_out(event, cpuctx, ctx);
if (flags & DETACH_GROUP)
perf_group_detach(event);
if (event->state < PERF_EVENT_STATE_INACTIVE)
return;
- update_context_time(ctx);
- update_cgrp_time_from_event(event);
- update_group_times(event);
+ if (ctx->is_active & EVENT_TIME) {
+ update_context_time(ctx);
+ update_cgrp_time_from_event(event);
+ }
+
if (event == event->group_leader)
group_sched_out(event, cpuctx, ctx);
else
event_sched_out(event, cpuctx, ctx);
- event->state = PERF_EVENT_STATE_OFF;
+
+ perf_event_set_state(event, PERF_EVENT_STATE_OFF);
}
/*
}
static void perf_set_shadow_time(struct perf_event *event,
- struct perf_event_context *ctx,
- u64 tstamp)
+ struct perf_event_context *ctx)
{
/*
* use the correct time source for the time snapshot
* is cleaner and simpler to understand.
*/
if (is_cgroup_event(event))
- perf_cgroup_set_shadow_time(event, tstamp);
+ perf_cgroup_set_shadow_time(event, event->tstamp);
else
- event->shadow_ctx_time = tstamp - ctx->timestamp;
+ event->shadow_ctx_time = event->tstamp - ctx->timestamp;
}
#define MAX_INTERRUPTS (~0ULL)
struct perf_cpu_context *cpuctx,
struct perf_event_context *ctx)
{
- u64 tstamp = perf_event_time(event);
int ret = 0;
lockdep_assert_held(&ctx->lock);
WRITE_ONCE(event->oncpu, smp_processor_id());
/*
- * Order event::oncpu write to happen before the ACTIVE state
- * is visible.
+ * Order event::oncpu write to happen before the ACTIVE state is
+ * visible. This allows perf_event_{stop,read}() to observe the correct
+ * ->oncpu if it sees ACTIVE.
*/
smp_wmb();
- WRITE_ONCE(event->state, PERF_EVENT_STATE_ACTIVE);
+ perf_event_set_state(event, PERF_EVENT_STATE_ACTIVE);
/*
* Unthrottle events, since we scheduled we might have missed several
event->hw.interrupts = 0;
}
- /*
- * The new state must be visible before we turn it on in the hardware:
- */
- smp_wmb();
-
perf_pmu_disable(event->pmu);
- perf_set_shadow_time(event, ctx, tstamp);
+ perf_set_shadow_time(event, ctx);
perf_log_itrace_start(event);
if (event->pmu->add(event, PERF_EF_START)) {
- event->state = PERF_EVENT_STATE_INACTIVE;
+ perf_event_set_state(event, PERF_EVENT_STATE_INACTIVE);
event->oncpu = -1;
ret = -EAGAIN;
goto out;
}
- event->tstamp_running += tstamp - event->tstamp_stopped;
-
if (!is_software_event(event))
cpuctx->active_oncpu++;
if (!ctx->nr_active++)
{
struct perf_event *event, *partial_group = NULL;
struct pmu *pmu = ctx->pmu;
- u64 now = ctx->time;
- bool simulate = false;
if (group_event->state == PERF_EVENT_STATE_OFF)
return 0;
/*
* Groups can be scheduled in as one unit only, so undo any
* partial group before returning:
- * The events up to the failed event are scheduled out normally,
- * tstamp_stopped will be updated.
- *
- * The failed events and the remaining siblings need to have
- * their timings updated as if they had gone thru event_sched_in()
- * and event_sched_out(). This is required to get consistent timings
- * across the group. This also takes care of the case where the group
- * could never be scheduled by ensuring tstamp_stopped is set to mark
- * the time the event was actually stopped, such that time delta
- * calculation in update_event_times() is correct.
+ * The events up to the failed event are scheduled out normally.
*/
list_for_each_entry(event, &group_event->sibling_list, group_entry) {
if (event == partial_group)
- simulate = true;
+ break;
- if (simulate) {
- event->tstamp_running += now - event->tstamp_stopped;
- event->tstamp_stopped = now;
- } else {
- event_sched_out(event, cpuctx, ctx);
- }
+ event_sched_out(event, cpuctx, ctx);
}
event_sched_out(group_event, cpuctx, ctx);
return can_add_hw;
}
-/*
- * Complement to update_event_times(). This computes the tstamp_* values to
- * continue 'enabled' state from @now, and effectively discards the time
- * between the prior tstamp_stopped and now (as we were in the OFF state, or
- * just switched (context) time base).
- *
- * This further assumes '@event->state == INACTIVE' (we just came from OFF) and
- * cannot have been scheduled in yet. And going into INACTIVE state means
- * '@event->tstamp_stopped = @now'.
- *
- * Thus given the rules of update_event_times():
- *
- * total_time_enabled = tstamp_stopped - tstamp_enabled
- * total_time_running = tstamp_stopped - tstamp_running
- *
- * We can insert 'tstamp_stopped == now' and reverse them to compute new
- * tstamp_* values.
- */
-static void __perf_event_enable_time(struct perf_event *event, u64 now)
-{
- WARN_ON_ONCE(event->state != PERF_EVENT_STATE_INACTIVE);
-
- event->tstamp_stopped = now;
- event->tstamp_enabled = now - event->total_time_enabled;
- event->tstamp_running = now - event->total_time_running;
-}
-
static void add_event_to_ctx(struct perf_event *event,
struct perf_event_context *ctx)
{
- u64 tstamp = perf_event_time(event);
-
list_add_event(event, ctx);
perf_group_attach(event);
- /*
- * We can be called with event->state == STATE_OFF when we create with
- * .disabled = 1. In that case the IOC_ENABLE will call this function.
- */
- if (event->state == PERF_EVENT_STATE_INACTIVE)
- __perf_event_enable_time(event, tstamp);
}
static void ctx_sched_out(struct perf_event_context *ctx,
raw_spin_unlock_irq(&ctx->lock);
}
-/*
- * Put a event into inactive state and update time fields.
- * Enabling the leader of a group effectively enables all
- * the group members that aren't explicitly disabled, so we
- * have to update their ->tstamp_enabled also.
- * Note: this works for group members as well as group leaders
- * since the non-leader members' sibling_lists will be empty.
- */
-static void __perf_event_mark_enabled(struct perf_event *event)
-{
- struct perf_event *sub;
- u64 tstamp = perf_event_time(event);
-
- event->state = PERF_EVENT_STATE_INACTIVE;
- __perf_event_enable_time(event, tstamp);
- list_for_each_entry(sub, &event->sibling_list, group_entry) {
- /* XXX should not be > INACTIVE if event isn't */
- if (sub->state >= PERF_EVENT_STATE_INACTIVE)
- __perf_event_enable_time(sub, tstamp);
- }
-}
-
/*
* Cross CPU call to enable a performance event
*/
if (ctx->is_active)
ctx_sched_out(ctx, cpuctx, EVENT_TIME);
- __perf_event_mark_enabled(event);
+ perf_event_set_state(event, PERF_EVENT_STATE_INACTIVE);
if (!ctx->is_active)
return;
if (!event_filter_match(event)) {
- if (is_cgroup_event(event))
- perf_cgroup_defer_enabled(event);
ctx_sched_in(ctx, cpuctx, EVENT_TIME, current);
return;
}
* we know the event must be on the current CPU, therefore we
* don't need to use it.
*/
- switch (event->state) {
- case PERF_EVENT_STATE_ACTIVE:
+ if (event->state == PERF_EVENT_STATE_ACTIVE)
event->pmu->read(event);
- /* fall-through */
- case PERF_EVENT_STATE_INACTIVE:
- update_event_times(event);
- break;
-
- default:
- break;
- }
+ perf_event_update_time(event);
/*
* In order to keep per-task stats reliable we need to flip the event
if (!event_filter_match(event))
continue;
- /* may need to reset tstamp_enabled */
- if (is_cgroup_event(event))
- perf_cgroup_mark_enabled(event, ctx);
-
if (group_can_go_on(event, cpuctx, 1))
group_sched_in(event, cpuctx, ctx);
* If this pinned group hasn't been scheduled,
* put it in error state.
*/
- if (event->state == PERF_EVENT_STATE_INACTIVE) {
- update_group_times(event);
- event->state = PERF_EVENT_STATE_ERROR;
- }
+ if (event->state == PERF_EVENT_STATE_INACTIVE)
+ perf_event_set_state(event, PERF_EVENT_STATE_ERROR);
}
}
if (!event_filter_match(event))
continue;
- /* may need to reset tstamp_enabled */
- if (is_cgroup_event(event))
- perf_cgroup_mark_enabled(event, ctx);
-
if (group_can_go_on(event, cpuctx, can_add_hw)) {
if (group_sched_in(event, cpuctx, ctx))
can_add_hw = 0;
struct perf_event_context *ctx, *tmp;
int throttled;
- WARN_ON(!irqs_disabled());
+ lockdep_assert_irqs_disabled();
__this_cpu_inc(perf_throttled_seq);
throttled = __this_cpu_xchg(perf_throttled_count, 0);
if (event->state >= PERF_EVENT_STATE_INACTIVE)
return 0;
- __perf_event_mark_enabled(event);
+ perf_event_set_state(event, PERF_EVENT_STATE_INACTIVE);
return 1;
}
return;
raw_spin_lock(&ctx->lock);
- if (ctx->is_active) {
+ if (ctx->is_active & EVENT_TIME) {
update_context_time(ctx);
update_cgrp_time_from_event(event);
}
- update_event_times(event);
+ perf_event_update_time(event);
+ if (data->group)
+ perf_event_update_sibling_time(event);
+
if (event->state != PERF_EVENT_STATE_ACTIVE)
goto unlock;
pmu->read(event);
list_for_each_entry(sub, &event->sibling_list, group_entry) {
- update_event_times(sub);
if (sub->state == PERF_EVENT_STATE_ACTIVE) {
/*
* Use sibling's PMU rather than @event's since
{
unsigned long flags;
int ret = 0;
- u64 now;
/*
* Disabling interrupts avoids all counter scheduling (context
goto out;
}
-
- now = event->shadow_ctx_time + perf_clock();
- if (enabled)
- *enabled = now - event->tstamp_enabled;
/*
* If the event is currently on this CPU, its either a per-task event,
* or local to this CPU. Furthermore it means its ACTIVE (otherwise
* oncpu == -1).
*/
- if (event->oncpu == smp_processor_id()) {
+ if (event->oncpu == smp_processor_id())
event->pmu->read(event);
- if (running)
- *running = now - event->tstamp_running;
- } else if (running) {
- *running = event->total_time_running;
- }
*value = local64_read(&event->count);
+ if (enabled || running) {
+ u64 now = event->shadow_ctx_time + perf_clock();
+ u64 __enabled, __running;
+
+ __perf_update_times(event, now, &__enabled, &__running);
+ if (enabled)
+ *enabled = __enabled;
+ if (running)
+ *running = __running;
+ }
out:
local_irq_restore(flags);
static int perf_event_read(struct perf_event *event, bool group)
{
+ enum perf_event_state state = READ_ONCE(event->state);
int event_cpu, ret = 0;
/*
* If event is enabled and currently active on a CPU, update the
* value in the event structure:
*/
- if (event->state == PERF_EVENT_STATE_ACTIVE) {
- struct perf_read_data data = {
- .event = event,
- .group = group,
- .ret = 0,
- };
+again:
+ if (state == PERF_EVENT_STATE_ACTIVE) {
+ struct perf_read_data data;
+
+ /*
+ * Orders the ->state and ->oncpu loads such that if we see
+ * ACTIVE we must also see the right ->oncpu.
+ *
+ * Matches the smp_wmb() from event_sched_in().
+ */
+ smp_rmb();
event_cpu = READ_ONCE(event->oncpu);
if ((unsigned)event_cpu >= nr_cpu_ids)
return 0;
+ data = (struct perf_read_data){
+ .event = event,
+ .group = group,
+ .ret = 0,
+ };
+
preempt_disable();
event_cpu = __perf_event_read_cpu(event, event_cpu);
(void)smp_call_function_single(event_cpu, __perf_event_read, &data, 1);
preempt_enable();
ret = data.ret;
- } else if (event->state == PERF_EVENT_STATE_INACTIVE) {
+
+ } else if (state == PERF_EVENT_STATE_INACTIVE) {
struct perf_event_context *ctx = event->ctx;
unsigned long flags;
raw_spin_lock_irqsave(&ctx->lock, flags);
+ state = event->state;
+ if (state != PERF_EVENT_STATE_INACTIVE) {
+ raw_spin_unlock_irqrestore(&ctx->lock, flags);
+ goto again;
+ }
+
/*
- * may read while context is not active
- * (e.g., thread is blocked), in that case
- * we cannot update context time
+ * May read while context is not active (e.g., thread is
+ * blocked), in that case we cannot update context time
*/
- if (ctx->is_active) {
+ if (ctx->is_active & EVENT_TIME) {
update_context_time(ctx);
update_cgrp_time_from_event(event);
}
+
+ perf_event_update_time(event);
if (group)
- update_group_times(event);
- else
- update_event_times(event);
+ perf_event_update_sibling_time(event);
raw_spin_unlock_irqrestore(&ctx->lock, flags);
}
* indeed free this event, otherwise we need to serialize on
* owner->perf_event_mutex.
*/
- owner = lockless_dereference(event->owner);
+ owner = READ_ONCE(event->owner);
if (owner) {
/*
* Since delayed_put_task_struct() also drops the last
* Cannot change, child events are not migrated, see the
* comment with perf_event_ctx_lock_nested().
*/
- ctx = lockless_dereference(child->ctx);
+ ctx = READ_ONCE(child->ctx);
/*
* Since child_mutex nests inside ctx::mutex, we must jump
* through hoops. We start by grabbing a reference on the ctx.
return 0;
}
-u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
+static u64 __perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
{
struct perf_event *child;
u64 total = 0;
return total;
}
+
+u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
+{
+ struct perf_event_context *ctx;
+ u64 count;
+
+ ctx = perf_event_ctx_lock(event);
+ count = __perf_event_read_value(event, enabled, running);
+ perf_event_ctx_unlock(event, ctx);
+
+ return count;
+}
EXPORT_SYMBOL_GPL(perf_event_read_value);
static int __perf_read_group_add(struct perf_event *leader,
if (ret)
return ret;
+ raw_spin_lock_irqsave(&ctx->lock, flags);
+
/*
* Since we co-schedule groups, {enabled,running} times of siblings
* will be identical to those of the leader, so we only publish one
if (read_format & PERF_FORMAT_ID)
values[n++] = primary_event_id(leader);
- raw_spin_lock_irqsave(&ctx->lock, flags);
-
list_for_each_entry(sub, &leader->sibling_list, group_entry) {
values[n++] += perf_event_count(sub);
if (read_format & PERF_FORMAT_ID)
u64 values[4];
int n = 0;
- values[n++] = perf_event_read_value(event, &enabled, &running);
+ values[n++] = __perf_event_read_value(event, &enabled, &running);
if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
values[n++] = enabled;
if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
*now = perf_clock();
ctx_time = event->shadow_ctx_time + *now;
- *enabled = ctx_time - event->tstamp_enabled;
- *running = ctx_time - event->tstamp_running;
+ __perf_update_times(event, ctx_time, enabled, running);
}
static void perf_event_init_userpage(struct perf_event *event)
if (!rb)
goto aux_unlock;
- aux_offset = ACCESS_ONCE(rb->user_page->aux_offset);
- aux_size = ACCESS_ONCE(rb->user_page->aux_size);
+ aux_offset = READ_ONCE(rb->user_page->aux_offset);
+ aux_size = READ_ONCE(rb->user_page->aux_size);
if (aux_offset < perf_data_size(rb) + PAGE_SIZE)
goto aux_unlock;
struct pt_regs *regs, struct hlist_head *head,
struct task_struct *task)
{
- struct bpf_prog *prog = call->prog;
-
- if (prog) {
+ if (bpf_prog_array_valid(call)) {
*(struct pt_regs **)raw_data = regs;
- if (!trace_call_bpf(prog, raw_data) || hlist_empty(head)) {
+ if (!trace_call_bpf(call, raw_data) || hlist_empty(head)) {
perf_swevent_put_recursion_context(rctx);
return;
}
{
bool is_kprobe, is_tracepoint, is_syscall_tp;
struct bpf_prog *prog;
+ int ret;
if (event->attr.type != PERF_TYPE_TRACEPOINT)
return perf_event_set_bpf_handler(event, prog_fd);
- if (event->tp_event->prog)
- return -EEXIST;
-
is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_UKPROBE;
is_tracepoint = event->tp_event->flags & TRACE_EVENT_FL_TRACEPOINT;
is_syscall_tp = is_syscall_trace_event(event->tp_event);
return -EACCES;
}
}
- event->tp_event->prog = prog;
- event->tp_event->bpf_prog_owner = event;
- return 0;
+ ret = perf_event_attach_bpf_prog(event, prog);
+ if (ret)
+ bpf_prog_put(prog);
+ return ret;
}
static void perf_event_free_bpf_prog(struct perf_event *event)
{
- struct bpf_prog *prog;
-
- perf_event_free_bpf_handler(event);
-
- if (!event->tp_event)
+ if (event->attr.type != PERF_TYPE_TRACEPOINT) {
+ perf_event_free_bpf_handler(event);
return;
-
- prog = event->tp_event->prog;
- if (prog && event->tp_event->bpf_prog_owner == event) {
- event->tp_event->prog = NULL;
- bpf_prog_put(prog);
}
+ perf_event_detach_bpf_prog(event);
}
#else
inc = true;
if (inc) {
+ /*
+ * We need the mutex here because static_branch_enable()
+ * must complete *before* the perf_sched_count increment
+ * becomes visible.
+ */
if (atomic_inc_not_zero(&perf_sched_count))
goto enabled;
if (parent_event)
perf_group_detach(child_event);
list_del_event(child_event, child_ctx);
- child_event->state = PERF_EVENT_STATE_EXIT; /* is_event_hup() */
+ perf_event_set_state(child_event, PERF_EVENT_STATE_EXIT); /* is_event_hup() */
raw_spin_unlock_irq(&child_ctx->lock);
/*
struct perf_event *group_leader,
struct perf_event_context *child_ctx)
{
- enum perf_event_active_state parent_state = parent_event->state;
+ enum perf_event_state parent_state = parent_event->state;
struct perf_event *child_event;
unsigned long flags;
struct perf_event *event;
raw_spin_lock(&ctx->lock);
+ ctx_sched_out(ctx, cpuctx, EVENT_TIME);
list_for_each_entry(event, &ctx->event_list, event_entry)
__perf_remove_from_context(event, cpuctx, ctx, (void *)DETACH_GROUP);
raw_spin_unlock(&ctx->lock);
unsigned int ovlimit, completed, num_queued;
bool all_prev_completed;
- num_queued = ACCESS_ONCE(dql->num_queued);
+ num_queued = READ_ONCE(dql->num_queued);
/* Can't complete more than what's in queue */
BUG_ON(count > num_queued - dql->num_completed);
}
EXPORT_SYMBOL(dql_reset);
- int dql_init(struct dql *dql, unsigned hold_time)
+ void dql_init(struct dql *dql, unsigned int hold_time)
{
dql->max_limit = DQL_MAX_LIMIT;
dql->min_limit = 0;
dql->slack_hold_time = hold_time;
dql_reset(dql);
- return 0;
}
EXPORT_SYMBOL(dql_init);
static int mpoa_event_listener(struct notifier_block *mpoa_notifier,
unsigned long event, void *dev);
static void mpc_timer_refresh(void);
- static void mpc_cache_check(unsigned long checking_time);
+ static void mpc_cache_check(struct timer_list *unused);
static struct llc_snap_hdr llc_snap_mpoa_ctrl = {
0xaa, 0xaa, 0x03,
struct mpoa_client *mpcs = NULL; /* FIXME */
static struct atm_mpoa_qos *qos_head = NULL;
-static DEFINE_TIMER(mpc_timer, NULL, 0, 0);
+static DEFINE_TIMER(mpc_timer, NULL);
static struct mpoa_client *find_mpc_by_itfnum(int itf)
int err;
if (mpcs == NULL) {
- init_timer(&mpc_timer);
mpc_timer_refresh();
/* This lets us now how our LECs are doing */
msg_to_mpoad(msg, mpc);
}
+ static unsigned long checking_time;
+
static void mpc_timer_refresh(void)
{
mpc_timer.expires = jiffies + (MPC_P2 * HZ);
- mpc_timer.data = mpc_timer.expires;
- mpc_timer.function = mpc_cache_check;
+ checking_time = mpc_timer.expires;
+ mpc_timer.function = (TIMER_FUNC_TYPE)mpc_cache_check;
add_timer(&mpc_timer);
}
- static void mpc_cache_check(unsigned long checking_time)
+ static void mpc_cache_check(struct timer_list *unused)
{
struct mpoa_client *mpc = mpcs;
static unsigned long previous_resolving_check_time;
#include <linux/crash_dump.h>
#include <linux/sctp.h>
#include <net/udp_tunnel.h>
+ #include <linux/net_namespace.h>
#include "net-sysfs.h"
static int netif_rx_internal(struct sk_buff *skb);
static int call_netdevice_notifiers_info(unsigned long val,
- struct net_device *dev,
struct netdev_notifier_info *info);
static struct napi_struct *napi_by_id(unsigned int napi_id);
DEFINE_RWLOCK(dev_base_lock);
EXPORT_SYMBOL(dev_base_lock);
+ static DEFINE_MUTEX(ifalias_mutex);
+
/* protects napi_hash addition/deletion and napi_gen_id */
static DEFINE_SPINLOCK(napi_hash_lock);
unsigned long *inuse;
struct net_device *d;
- p = strnchr(name, IFNAMSIZ-1, '%');
+ if (!dev_valid_name(name))
+ return -EINVAL;
+
+ p = strchr(name, '%');
if (p) {
/*
* Verify the string as this thing may have come from
free_page((unsigned long) inuse);
}
- if (buf != name)
- snprintf(buf, IFNAMSIZ, name, i);
+ snprintf(buf, IFNAMSIZ, name, i);
if (!__dev_get_by_name(net, buf))
return i;
* when the name is long and there isn't enough space left
* for the digits, or if all bits are used.
*/
- return -ENFILE;
+ return p ? -ENFILE : -EEXIST;
+ }
+
+ static int dev_alloc_name_ns(struct net *net,
+ struct net_device *dev,
+ const char *name)
+ {
+ char buf[IFNAMSIZ];
+ int ret;
+
+ BUG_ON(!net);
+ ret = __dev_alloc_name(net, name, buf);
+ if (ret >= 0)
+ strlcpy(dev->name, buf, IFNAMSIZ);
+ return ret;
}
/**
int dev_alloc_name(struct net_device *dev, const char *name)
{
- char buf[IFNAMSIZ];
- struct net *net;
- int ret;
-
- BUG_ON(!dev_net(dev));
- net = dev_net(dev);
- ret = __dev_alloc_name(net, name, buf);
- if (ret >= 0)
- strlcpy(dev->name, buf, IFNAMSIZ);
- return ret;
+ return dev_alloc_name_ns(dev_net(dev), dev, name);
}
EXPORT_SYMBOL(dev_alloc_name);
- static int dev_alloc_name_ns(struct net *net,
- struct net_device *dev,
- const char *name)
- {
- char buf[IFNAMSIZ];
- int ret;
-
- ret = __dev_alloc_name(net, name, buf);
- if (ret >= 0)
- strlcpy(dev->name, buf, IFNAMSIZ);
- return ret;
- }
-
int dev_get_valid_name(struct net *net, struct net_device *dev,
const char *name)
{
- BUG_ON(!net);
-
- if (!dev_valid_name(name))
- return -EINVAL;
-
- if (strchr(name, '%'))
- return dev_alloc_name_ns(net, dev, name);
- else if (__dev_get_by_name(net, name))
- return -EEXIST;
- else if (dev->name != name)
- strlcpy(dev->name, name, IFNAMSIZ);
-
- return 0;
+ return dev_alloc_name_ns(net, dev, name);
}
EXPORT_SYMBOL(dev_get_valid_name);
*/
int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
{
- char *new_ifalias;
-
- ASSERT_RTNL();
+ struct dev_ifalias *new_alias = NULL;
if (len >= IFALIASZ)
return -EINVAL;
- if (!len) {
- kfree(dev->ifalias);
- dev->ifalias = NULL;
- return 0;
+ if (len) {
+ new_alias = kmalloc(sizeof(*new_alias) + len + 1, GFP_KERNEL);
+ if (!new_alias)
+ return -ENOMEM;
+
+ memcpy(new_alias->ifalias, alias, len);
+ new_alias->ifalias[len] = 0;
}
- new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
- if (!new_ifalias)
- return -ENOMEM;
- dev->ifalias = new_ifalias;
- memcpy(dev->ifalias, alias, len);
- dev->ifalias[len] = 0;
+ mutex_lock(&ifalias_mutex);
+ rcu_swap_protected(dev->ifalias, new_alias,
+ mutex_is_locked(&ifalias_mutex));
+ mutex_unlock(&ifalias_mutex);
+
+ if (new_alias)
+ kfree_rcu(new_alias, rcuhead);
return len;
}
+ /**
+ * dev_get_alias - get ifalias of a device
+ * @dev: device
+ * @name: buffer to store name of ifalias
+ * @len: size of buffer
+ *
+ * get ifalias for a device. Caller must make sure dev cannot go
+ * away, e.g. rcu read lock or own a reference count to device.
+ */
+ int dev_get_alias(const struct net_device *dev, char *name, size_t len)
+ {
+ const struct dev_ifalias *alias;
+ int ret = 0;
+
+ rcu_read_lock();
+ alias = rcu_dereference(dev->ifalias);
+ if (alias)
+ ret = snprintf(name, len, "%s", alias->ifalias);
+ rcu_read_unlock();
+
+ return ret;
+ }
/**
* netdev_features_change - device changes features
void netdev_state_change(struct net_device *dev)
{
if (dev->flags & IFF_UP) {
- struct netdev_notifier_change_info change_info;
+ struct netdev_notifier_change_info change_info = {
+ .info.dev = dev,
+ };
- change_info.flags_changed = 0;
- call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
+ call_netdevice_notifiers_info(NETDEV_CHANGE,
&change_info.info);
rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
}
static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
struct net_device *dev)
{
- struct netdev_notifier_info info;
+ struct netdev_notifier_info info = {
+ .dev = dev,
+ };
- netdev_notifier_info_init(&info, dev);
return nb->notifier_call(nb, val, &info);
}
*/
static int call_netdevice_notifiers_info(unsigned long val,
- struct net_device *dev,
struct netdev_notifier_info *info)
{
ASSERT_RTNL();
- netdev_notifier_info_init(info, dev);
return raw_notifier_call_chain(&netdev_chain, val, info);
}
int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
{
- struct netdev_notifier_info info;
+ struct netdev_notifier_info info = {
+ .dev = dev,
+ };
- return call_netdevice_notifiers_info(val, dev, &info);
+ return call_netdevice_notifiers_info(val, &info);
}
EXPORT_SYMBOL(call_netdevice_notifiers);
return 0;
}
+ EXPORT_SYMBOL(netdev_txq_to_tc);
#ifdef CONFIG_XPS
static DEFINE_MUTEX(xps_map_mutex);
static struct sk_buff *
sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
{
- struct tcf_proto *cl = rcu_dereference_bh(dev->egress_cl_list);
+ struct mini_Qdisc *miniq = rcu_dereference_bh(dev->miniq_egress);
struct tcf_result cl_res;
- if (!cl)
+ if (!miniq)
return skb;
/* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */
- qdisc_bstats_cpu_update(cl->q, skb);
+ mini_qdisc_bstats_cpu_update(miniq, skb);
- switch (tcf_classify(skb, cl, &cl_res, false)) {
+ switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) {
case TC_ACT_OK:
case TC_ACT_RECLASSIFY:
skb->tc_index = TC_H_MIN(cl_res.classid);
break;
case TC_ACT_SHOT:
- qdisc_qstats_cpu_drop(cl->q);
+ mini_qdisc_qstats_cpu_drop(miniq);
*ret = NET_XMIT_DROP;
kfree_skb(skb);
return NULL;
flow_table = rcu_dereference(rxqueue->rps_flow_table);
if (flow_table && flow_id <= flow_table->mask) {
rflow = &flow_table->flows[flow_id];
- cpu = ACCESS_ONCE(rflow->cpu);
+ cpu = READ_ONCE(rflow->cpu);
if (rflow->filter == filter_id && cpu < nr_cpu_ids &&
((int)(per_cpu(softnet_data, cpu).input_queue_head -
rflow->last_qtail) <
static u32 netif_receive_generic_xdp(struct sk_buff *skb,
struct bpf_prog *xdp_prog)
{
+ u32 metalen, act = XDP_DROP;
struct xdp_buff xdp;
- u32 act = XDP_DROP;
void *orig_data;
int hlen, off;
u32 mac_len;
if (skb_cloned(skb))
return XDP_PASS;
- if (skb_linearize(skb))
- goto do_drop;
+ /* XDP packets must be linear and must have sufficient headroom
+ * of XDP_PACKET_HEADROOM bytes. This is the guarantee that also
+ * native XDP provides, thus we need to do it here as well.
+ */
+ if (skb_is_nonlinear(skb) ||
+ skb_headroom(skb) < XDP_PACKET_HEADROOM) {
+ int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
+ int troom = skb->tail + skb->data_len - skb->end;
+
+ /* In case we have to go down the path and also linearize,
+ * then lets do the pskb_expand_head() work just once here.
+ */
+ if (pskb_expand_head(skb,
+ hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
+ troom > 0 ? troom + 128 : 0, GFP_ATOMIC))
+ goto do_drop;
+ if (troom > 0 && __skb_linearize(skb))
+ goto do_drop;
+ }
/* The XDP program wants to see the packet starting at the MAC
* header.
mac_len = skb->data - skb_mac_header(skb);
hlen = skb_headlen(skb) + mac_len;
xdp.data = skb->data - mac_len;
+ xdp.data_meta = xdp.data;
xdp.data_end = xdp.data + hlen;
xdp.data_hard_start = skb->data - skb_headroom(skb);
orig_data = xdp.data;
case XDP_REDIRECT:
case XDP_TX:
__skb_push(skb, mac_len);
- /* fall through */
+ break;
case XDP_PASS:
+ metalen = xdp.data - xdp.data_meta;
+ if (metalen)
+ skb_metadata_set(skb, metalen);
break;
-
default:
bpf_warn_invalid_xdp_action(act);
/* fall through */
struct net_device *orig_dev)
{
#ifdef CONFIG_NET_CLS_ACT
- struct tcf_proto *cl = rcu_dereference_bh(skb->dev->ingress_cl_list);
+ struct mini_Qdisc *miniq = rcu_dereference_bh(skb->dev->miniq_ingress);
struct tcf_result cl_res;
/* If there's at least one ingress present somewhere (so
* that are not configured with an ingress qdisc will bail
* out here.
*/
- if (!cl)
+ if (!miniq)
return skb;
+
if (*pt_prev) {
*ret = deliver_skb(skb, *pt_prev, orig_dev);
*pt_prev = NULL;
qdisc_skb_cb(skb)->pkt_len = skb->len;
skb->tc_at_ingress = 1;
- qdisc_bstats_cpu_update(cl->q, skb);
+ mini_qdisc_bstats_cpu_update(miniq, skb);
- switch (tcf_classify(skb, cl, &cl_res, false)) {
+ switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) {
case TC_ACT_OK:
case TC_ACT_RECLASSIFY:
skb->tc_index = TC_H_MIN(cl_res.classid);
break;
case TC_ACT_SHOT:
- qdisc_qstats_cpu_drop(cl->q);
+ mini_qdisc_qstats_cpu_drop(miniq);
kfree_skb(skb);
return NULL;
case TC_ACT_STOLEN:
return ret;
}
+ /**
+ * netif_receive_skb_core - special purpose version of netif_receive_skb
+ * @skb: buffer to process
+ *
+ * More direct receive version of netif_receive_skb(). It should
+ * only be used by callers that have a need to skip RPS and Generic XDP.
+ * Caller must also take care of handling if (page_is_)pfmemalloc.
+ *
+ * This function may only be called from softirq context and interrupts
+ * should be enabled.
+ *
+ * Return values (usually ignored):
+ * NET_RX_SUCCESS: no congestion
+ * NET_RX_DROP: packet was dropped
+ */
+ int netif_receive_skb_core(struct sk_buff *skb)
+ {
+ int ret;
+
+ rcu_read_lock();
+ ret = __netif_receive_skb_core(skb, false);
+ rcu_read_unlock();
+
+ return ret;
+ }
+ EXPORT_SYMBOL(netif_receive_skb_core);
+
static int __netif_receive_skb(struct sk_buff *skb)
{
int ret;
return ret;
}
- static int generic_xdp_install(struct net_device *dev, struct netdev_xdp *xdp)
+ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
{
struct bpf_prog *old = rtnl_dereference(dev->xdp_prog);
struct bpf_prog *new = xdp->prog;
diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
diffs |= p->vlan_tci ^ skb->vlan_tci;
diffs |= skb_metadata_dst_cmp(p, skb);
+ diffs |= skb_metadata_differs(p, skb);
if (maclen == ETH_HLEN)
diffs |= compare_ether_header(skb_mac_header(p),
skb_mac_header(skb));
static int __netdev_upper_dev_link(struct net_device *dev,
struct net_device *upper_dev, bool master,
- void *upper_priv, void *upper_info)
- {
- struct netdev_notifier_changeupper_info changeupper_info;
+ void *upper_priv, void *upper_info,
+ struct netlink_ext_ack *extack)
+ {
+ struct netdev_notifier_changeupper_info changeupper_info = {
+ .info = {
+ .dev = dev,
+ .extack = extack,
+ },
+ .upper_dev = upper_dev,
+ .master = master,
+ .linking = true,
+ .upper_info = upper_info,
+ };
int ret = 0;
ASSERT_RTNL();
if (master && netdev_master_upper_dev_get(dev))
return -EBUSY;
- changeupper_info.upper_dev = upper_dev;
- changeupper_info.master = master;
- changeupper_info.linking = true;
- changeupper_info.upper_info = upper_info;
-
- ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
+ ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER,
&changeupper_info.info);
ret = notifier_to_errno(ret);
if (ret)
if (ret)
return ret;
- ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
+ ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER,
&changeupper_info.info);
ret = notifier_to_errno(ret);
if (ret)
* returns zero.
*/
int netdev_upper_dev_link(struct net_device *dev,
- struct net_device *upper_dev)
+ struct net_device *upper_dev,
+ struct netlink_ext_ack *extack)
{
- return __netdev_upper_dev_link(dev, upper_dev, false, NULL, NULL);
+ return __netdev_upper_dev_link(dev, upper_dev, false,
+ NULL, NULL, extack);
}
EXPORT_SYMBOL(netdev_upper_dev_link);
*/
int netdev_master_upper_dev_link(struct net_device *dev,
struct net_device *upper_dev,
- void *upper_priv, void *upper_info)
+ void *upper_priv, void *upper_info,
+ struct netlink_ext_ack *extack)
{
return __netdev_upper_dev_link(dev, upper_dev, true,
- upper_priv, upper_info);
+ upper_priv, upper_info, extack);
}
EXPORT_SYMBOL(netdev_master_upper_dev_link);
void netdev_upper_dev_unlink(struct net_device *dev,
struct net_device *upper_dev)
{
- struct netdev_notifier_changeupper_info changeupper_info;
+ struct netdev_notifier_changeupper_info changeupper_info = {
+ .info = {
+ .dev = dev,
+ },
+ .upper_dev = upper_dev,
+ .linking = false,
+ };
ASSERT_RTNL();
- changeupper_info.upper_dev = upper_dev;
changeupper_info.master = netdev_master_upper_dev_get(dev) == upper_dev;
- changeupper_info.linking = false;
- call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
+ call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER,
&changeupper_info.info);
__netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
- call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
+ call_netdevice_notifiers_info(NETDEV_CHANGEUPPER,
&changeupper_info.info);
}
EXPORT_SYMBOL(netdev_upper_dev_unlink);
void netdev_bonding_info_change(struct net_device *dev,
struct netdev_bonding_info *bonding_info)
{
- struct netdev_notifier_bonding_info info;
+ struct netdev_notifier_bonding_info info = {
+ .info.dev = dev,
+ };
memcpy(&info.bonding_info, bonding_info,
sizeof(struct netdev_bonding_info));
- call_netdevice_notifiers_info(NETDEV_BONDING_INFO, dev,
+ call_netdevice_notifiers_info(NETDEV_BONDING_INFO,
&info.info);
}
EXPORT_SYMBOL(netdev_bonding_info_change);
void netdev_lower_state_changed(struct net_device *lower_dev,
void *lower_state_info)
{
- struct netdev_notifier_changelowerstate_info changelowerstate_info;
+ struct netdev_notifier_changelowerstate_info changelowerstate_info = {
+ .info.dev = lower_dev,
+ };
ASSERT_RTNL();
changelowerstate_info.lower_state_info = lower_state_info;
- call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE, lower_dev,
+ call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE,
&changelowerstate_info.info);
}
EXPORT_SYMBOL(netdev_lower_state_changed);
if (dev->flags & IFF_UP &&
(changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) {
- struct netdev_notifier_change_info change_info;
+ struct netdev_notifier_change_info change_info = {
+ .info = {
+ .dev = dev,
+ },
+ .flags_changed = changes,
+ };
- change_info.flags_changed = changes;
- call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
- &change_info.info);
+ call_netdevice_notifiers_info(NETDEV_CHANGE, &change_info.info);
}
}
}
EXPORT_SYMBOL(dev_change_proto_down);
- u8 __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op, u32 *prog_id)
+ u8 __dev_xdp_attached(struct net_device *dev, bpf_op_t bpf_op, u32 *prog_id)
{
- struct netdev_xdp xdp;
+ struct netdev_bpf xdp;
memset(&xdp, 0, sizeof(xdp));
xdp.command = XDP_QUERY_PROG;
/* Query must always succeed. */
- WARN_ON(xdp_op(dev, &xdp) < 0);
+ WARN_ON(bpf_op(dev, &xdp) < 0);
if (prog_id)
*prog_id = xdp.prog_id;
return xdp.prog_attached;
}
- static int dev_xdp_install(struct net_device *dev, xdp_op_t xdp_op,
+ static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op,
struct netlink_ext_ack *extack, u32 flags,
struct bpf_prog *prog)
{
- struct netdev_xdp xdp;
+ struct netdev_bpf xdp;
memset(&xdp, 0, sizeof(xdp));
if (flags & XDP_FLAGS_HW_MODE)
xdp.flags = flags;
xdp.prog = prog;
- return xdp_op(dev, &xdp);
+ return bpf_op(dev, &xdp);
}
/**
{
const struct net_device_ops *ops = dev->netdev_ops;
struct bpf_prog *prog = NULL;
- xdp_op_t xdp_op, xdp_chk;
+ bpf_op_t bpf_op, bpf_chk;
int err;
ASSERT_RTNL();
- xdp_op = xdp_chk = ops->ndo_xdp;
- if (!xdp_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE)))
+ bpf_op = bpf_chk = ops->ndo_bpf;
+ if (!bpf_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE)))
return -EOPNOTSUPP;
- if (!xdp_op || (flags & XDP_FLAGS_SKB_MODE))
- xdp_op = generic_xdp_install;
- if (xdp_op == xdp_chk)
- xdp_chk = generic_xdp_install;
+ if (!bpf_op || (flags & XDP_FLAGS_SKB_MODE))
+ bpf_op = generic_xdp_install;
+ if (bpf_op == bpf_chk)
+ bpf_chk = generic_xdp_install;
if (fd >= 0) {
- if (xdp_chk && __dev_xdp_attached(dev, xdp_chk, NULL))
+ if (bpf_chk && __dev_xdp_attached(dev, bpf_chk, NULL))
return -EEXIST;
if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) &&
- __dev_xdp_attached(dev, xdp_op, NULL))
+ __dev_xdp_attached(dev, bpf_op, NULL))
return -EBUSY;
- prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP);
+ if (bpf_op == ops->ndo_bpf)
+ prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP,
+ dev);
+ else
+ prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP);
if (IS_ERR(prog))
return PTR_ERR(prog);
}
- err = dev_xdp_install(dev, xdp_op, extack, flags, prog);
+ err = dev_xdp_install(dev, bpf_op, extack, flags, prog);
if (err < 0 && prog)
bpf_prog_put(prog);
if (!dev->rtnl_link_ops ||
dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0,
- GFP_KERNEL);
+ GFP_KERNEL, NULL);
/*
* Flush the unicast and multicast chains
unsigned int txqs, unsigned int rxqs)
{
struct net_device *dev;
- size_t alloc_size;
+ unsigned int alloc_size;
struct net_device *p;
BUG_ON(strlen(name) >= sizeof(dev->name));
int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
{
- int err;
+ int err, new_nsid;
ASSERT_RTNL();
call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
rcu_barrier();
call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
- rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
+ if (dev->rtnl_link_ops && dev->rtnl_link_ops->get_link_net)
+ new_nsid = peernet2id_alloc(dev_net(dev), net);
+ else
+ new_nsid = peernet2id(dev_net(dev), net);
+ rtmsg_ifinfo_newnet(RTM_DELLINK, dev, ~0U, GFP_KERNEL, &new_nsid);
/*
* Flush the unicast and multicast chains
{
kfree(net->dev_name_head);
kfree(net->dev_index_head);
+ if (net != &init_net)
+ WARN_ON_ONCE(!list_empty(&net->dev_base_head));
}
static struct pernet_operations __net_initdata netdev_net_ops = {
+ pkt_dev->pkt_overhead;
}
- for (i = 0; i < IN6_ADDR_HSIZE; i++)
+ for (i = 0; i < sizeof(struct in6_addr); i++)
if (pkt_dev->cur_in6_saddr.s6_addr[i]) {
set = 1;
break;
static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb,
int datalen)
{
- struct timeval timestamp;
+ struct timespec64 timestamp;
struct pktgen_hdr *pgh;
pgh = skb_put(skb, sizeof(*pgh));
pgh->tv_sec = 0;
pgh->tv_usec = 0;
} else {
- do_gettimeofday(×tamp);
+ /*
+ * pgh->tv_sec wraps in y2106 when interpreted as unsigned
+ * as done by wireshark, or y2038 when interpreted as signed.
+ * This is probably harmless, but if anyone wants to improve
+ * it, we could introduce a variant that puts 64-bit nanoseconds
+ * into the respective header bytes.
+ * This would also be slightly faster to read.
+ */
+ ktime_get_real_ts64(×tamp);
pgh->tv_sec = htonl(timestamp.tv_sec);
- pgh->tv_usec = htonl(timestamp.tv_usec);
+ pgh->tv_usec = htonl(timestamp.tv_nsec / NSEC_PER_USEC);
}
}
static void pktgen_xmit(struct pktgen_dev *pkt_dev)
{
- unsigned int burst = ACCESS_ONCE(pkt_dev->burst);
+ unsigned int burst = READ_ONCE(pkt_dev->burst);
struct net_device *odev = pkt_dev->odev;
struct netdev_queue *txq;
struct sk_buff *skb;
static unsigned int dn_rt_hash_mask;
static struct timer_list dn_route_timer;
-static DEFINE_TIMER(dn_rt_flush_timer, dn_run_flush, 0, 0);
+static DEFINE_TIMER(dn_rt_flush_timer, dn_run_flush);
int decnet_dst_gc_interval = 2;
static struct dst_ops dn_dst_ops = {
dn_rt_hash_table[hash].chain);
rcu_assign_pointer(dn_rt_hash_table[hash].chain, rth);
- dst_use(&rth->dst, now);
+ dst_hold_and_use(&rth->dst, now);
spin_unlock_bh(&dn_rt_hash_table[hash].lock);
dst_release_immediate(&rt->dst);
rcu_assign_pointer(rt->dst.dn_next, dn_rt_hash_table[hash].chain);
rcu_assign_pointer(dn_rt_hash_table[hash].chain, rt);
- dst_use(&rt->dst, now);
+ dst_hold_and_use(&rt->dst, now);
spin_unlock_bh(&dn_rt_hash_table[hash].lock);
*rp = rt;
return 0;
(flp->flowidn_mark == rt->fld.flowidn_mark) &&
dn_is_output_route(rt) &&
(rt->fld.flowidn_oif == flp->flowidn_oif)) {
- dst_use(&rt->dst, jiffies);
+ dst_hold_and_use(&rt->dst, jiffies);
rcu_read_unlock_bh();
*pprt = &rt->dst;
return 0;
(rt->fld.flowidn_oif == 0) &&
(rt->fld.flowidn_mark == skb->mark) &&
(rt->fld.flowidn_iif == cb->iif)) {
- dst_use(&rt->dst, jiffies);
+ dst_hold_and_use(&rt->dst, jiffies);
rcu_read_unlock();
skb_dst_set(skb, (struct dst_entry *)rt);
return 0;
spin_unlock(&hb->chain_lock);
hlist_for_each_entry_safe(fq, n, &expired, list_evictor)
- f->frag_expire((unsigned long) fq);
+ f->frag_expire(&fq->timer);
return evicted;
}
local_bh_disable();
- for (i = ACCESS_ONCE(f->next_bucket); budget; --budget) {
+ for (i = READ_ONCE(f->next_bucket); budget; --budget) {
evicted += inet_evict_bucket(f, &f->hash[i]);
i = (i + 1) & (INETFRAGS_HASHSZ - 1);
if (evicted > INETFRAGS_EVICT_MAX)
f->constructor(q, arg);
add_frag_mem_limit(nf, f->qsize);
- setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
+ timer_setup(&q->timer, f->frag_expire, 0);
spin_lock_init(&q->lock);
refcount_set(&q->refcnt, 1);
{
u32 *p_tstamp = ip_tstamps + hash % IP_IDENTS_SZ;
atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ;
- u32 old = ACCESS_ONCE(*p_tstamp);
+ u32 old = READ_ONCE(*p_tstamp);
u32 now = (u32)jiffies;
u32 new, delta = 0;
static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
{
unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr);
- unsigned int advmss = max_t(unsigned int, dst->dev->mtu - header_size,
+ unsigned int advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size,
ip_rt_min_advmss);
return min(advmss, IPV4_MAX_PMTU - header_size);
int __init ip_rt_init(void)
{
- int rc = 0;
int cpu;
ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL);
#endif
register_pernet_subsys(&rt_genid_ops);
register_pernet_subsys(&ipv4_inetpeer_ops);
- return rc;
+ return 0;
}
#ifdef CONFIG_SYSCTL
#include <linux/ipsec.h>
#include <asm/unaligned.h>
#include <linux/errqueue.h>
+ #include <trace/events/tcp.h>
+ #include <linux/static_key.h>
- int sysctl_tcp_fack __read_mostly;
- int sysctl_tcp_max_reordering __read_mostly = 300;
- int sysctl_tcp_dsack __read_mostly = 1;
- int sysctl_tcp_app_win __read_mostly = 31;
- int sysctl_tcp_adv_win_scale __read_mostly = 1;
- EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
-
- /* rfc5961 challenge ack rate limiting */
- int sysctl_tcp_challenge_ack_limit = 1000;
-
- int sysctl_tcp_stdurg __read_mostly;
- int sysctl_tcp_rfc1337 __read_mostly;
int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
- int sysctl_tcp_frto __read_mostly = 2;
- int sysctl_tcp_min_rtt_wlen __read_mostly = 300;
- int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
- int sysctl_tcp_early_retrans __read_mostly = 3;
- int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
#define FLAG_DATA 0x01 /* Incoming frame contained data. */
#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */
sndmem *= nr_segs * per_mss;
if (sk->sk_sndbuf < sndmem)
- sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
+ sk->sk_sndbuf = min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2]);
}
/* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
{
struct tcp_sock *tp = tcp_sk(sk);
/* Optimize this! */
- int truesize = tcp_win_from_space(skb->truesize) >> 1;
- int window = tcp_win_from_space(sysctl_tcp_rmem[2]) >> 1;
+ int truesize = tcp_win_from_space(sk, skb->truesize) >> 1;
+ int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
while (tp->rcv_ssthresh <= window) {
if (truesize <= skb->len)
/* Check #2. Increase window, if skb with such overhead
* will fit to rcvbuf in future.
*/
- if (tcp_win_from_space(skb->truesize) <= skb->len)
+ if (tcp_win_from_space(sk, skb->truesize) <= skb->len)
incr = 2 * tp->advmss;
else
incr = __tcp_grow_window(sk, skb);
/* Dynamic Right Sizing (DRS) has 2 to 3 RTT latency
* Allow enough cushion so that sender is not limited by our window
*/
- if (sysctl_tcp_moderate_rcvbuf)
+ if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf)
rcvmem <<= 2;
if (sk->sk_rcvbuf < rcvmem)
- sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]);
+ sk->sk_rcvbuf = min(rcvmem, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
}
/* 4. Try to fixup all. It is made immediately after connection enters
*/
void tcp_init_buffer_space(struct sock *sk)
{
+ int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win;
struct tcp_sock *tp = tcp_sk(sk);
int maxwin;
if (tp->window_clamp >= maxwin) {
tp->window_clamp = maxwin;
- if (sysctl_tcp_app_win && maxwin > 4 * tp->advmss)
+ if (tcp_app_win && maxwin > 4 * tp->advmss)
tp->window_clamp = max(maxwin -
- (maxwin >> sysctl_tcp_app_win),
+ (maxwin >> tcp_app_win),
4 * tp->advmss);
}
/* Force reservation of one segment. */
- if (sysctl_tcp_app_win &&
+ if (tcp_app_win &&
tp->window_clamp > 2 * tp->advmss &&
tp->window_clamp + tp->advmss > maxwin)
tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
{
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
+ struct net *net = sock_net(sk);
icsk->icsk_ack.quick = 0;
- if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
+ if (sk->sk_rcvbuf < net->ipv4.sysctl_tcp_rmem[2] &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
!tcp_under_memory_pressure(sk) &&
sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
- sysctl_tcp_rmem[2]);
+ net->ipv4.sysctl_tcp_rmem[2]);
}
if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
* <prev RTT . ><current RTT .. ><next RTT .... >
*/
- if (sysctl_tcp_moderate_rcvbuf &&
+ if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
int rcvwin, rcvmem, rcvbuf;
}
rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
- while (tcp_win_from_space(rcvmem) < tp->advmss)
+ while (tcp_win_from_space(sk, rcvmem) < tp->advmss)
rcvmem += 128;
- rcvbuf = min(rcvwin / tp->advmss * rcvmem, sysctl_tcp_rmem[2]);
+ rcvbuf = min(rcvwin / tp->advmss * rcvmem,
+ sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
if (rcvbuf > sk->sk_rcvbuf) {
sk->sk_rcvbuf = rcvbuf;
tp->srtt_us = max(1U, srtt);
}
- /* Set the sk_pacing_rate to allow proper sizing of TSO packets.
- * Note: TCP stack does not yet implement pacing.
- * FQ packet scheduler can be used to implement cheap but effective
- * TCP pacing, to smooth the burst on large writes when packets
- * in flight is significantly lower than cwnd (or rwin)
- */
- int sysctl_tcp_pacing_ss_ratio __read_mostly = 200;
- int sysctl_tcp_pacing_ca_ratio __read_mostly = 120;
-
static void tcp_update_pacing_rate(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
* end of slow start and should slow down.
*/
if (tp->snd_cwnd < tp->snd_ssthresh / 2)
- rate *= sysctl_tcp_pacing_ss_ratio;
+ rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio;
else
- rate *= sysctl_tcp_pacing_ca_ratio;
+ rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio;
rate *= max(tp->snd_cwnd, tp->packets_out);
if (likely(tp->srtt_us))
do_div(rate, tp->srtt_us);
- /* ACCESS_ONCE() is needed because sch_fq fetches sk_pacing_rate
+ /* WRITE_ONCE() is needed because sch_fq fetches sk_pacing_rate
* without any lock. We want to make sure compiler wont store
* intermediate values in this location.
*/
- ACCESS_ONCE(sk->sk_pacing_rate) = min_t(u64, rate,
- sk->sk_max_pacing_rate);
+ WRITE_ONCE(sk->sk_pacing_rate, min_t(u64, rate,
+ sk->sk_max_pacing_rate));
}
/* Calculate rto without backoff. This is the second half of Van Jacobson's
return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
}
- /*
- * Packet counting of FACK is based on in-order assumptions, therefore TCP
- * disables it when reordering is detected
- */
- void tcp_disable_fack(struct tcp_sock *tp)
- {
- /* RFC3517 uses different metric in lost marker => reset on change */
- if (tcp_is_fack(tp))
- tp->lost_skb_hint = NULL;
- tp->rx_opt.sack_ok &= ~TCP_FACK_ENABLED;
- }
-
/* Take a notice that peer is sending D-SACKs */
static void tcp_dsack_seen(struct tcp_sock *tp)
{
tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
+ tp->rack.dsack_seen = 1;
}
- static void tcp_update_reordering(struct sock *sk, const int metric,
- const int ts)
+ /* It's reordering when higher sequence was delivered (i.e. sacked) before
+ * some lower never-retransmitted sequence ("low_seq"). The maximum reordering
+ * distance is approximated in full-mss packet distance ("reordering").
+ */
+ static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq,
+ const int ts)
{
struct tcp_sock *tp = tcp_sk(sk);
- int mib_idx;
+ const u32 mss = tp->mss_cache;
+ u32 fack, metric;
- if (WARN_ON_ONCE(metric < 0))
+ fack = tcp_highest_sack_seq(tp);
+ if (!before(low_seq, fack))
return;
- if (metric > tp->reordering) {
- tp->reordering = min(sysctl_tcp_max_reordering, metric);
-
+ metric = fack - low_seq;
+ if ((metric > tp->reordering * mss) && mss) {
#if FASTRETRANS_DEBUG > 1
pr_debug("Disorder%d %d %u f%u s%u rr%d\n",
tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
tp->reordering,
- tp->fackets_out,
+ 0,
tp->sacked_out,
tp->undo_marker ? tp->undo_retrans : 0);
#endif
- tcp_disable_fack(tp);
+ tp->reordering = min_t(u32, (metric + mss - 1) / mss,
+ sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
}
tp->rack.reord = 1;
-
/* This exciting event is worth to be remembered. 8) */
- if (ts)
- mib_idx = LINUX_MIB_TCPTSREORDER;
- else if (tcp_is_reno(tp))
- mib_idx = LINUX_MIB_TCPRENOREORDER;
- else if (tcp_is_fack(tp))
- mib_idx = LINUX_MIB_TCPFACKREORDER;
- else
- mib_idx = LINUX_MIB_TCPSACKREORDER;
-
- NET_INC_STATS(sock_net(sk), mib_idx);
+ NET_INC_STATS(sock_net(sk),
+ ts ? LINUX_MIB_TCPTSREORDER : LINUX_MIB_TCPSACKREORDER);
}
/* This must be called before lost_out is incremented */
* 3. Loss detection event of two flavors:
* A. Scoreboard estimator decided the packet is lost.
* A'. Reno "three dupacks" marks head of queue lost.
- * A''. Its FACK modification, head until snd.fack is lost.
* B. SACK arrives sacking SND.NXT at the moment, when the
* segment was retransmitted.
* 4. D-SACK added new rule: D-SACK changes any tag to S.
}
struct tcp_sacktag_state {
- int reord;
- int fack_count;
+ u32 reord;
/* Timestamps for earliest and latest never-retransmitted segment
* that was SACKed. RTO needs the earliest RTT to stay conservative,
* but congestion control should still get an accurate delay signal.
u64 last_sackt;
struct rate_sample *rate;
int flag;
+ unsigned int mss_now;
};
/* Check if skb is fully within the SACK block. In presence of GSO skbs,
if (pkt_len >= skb->len && !in_sack)
return 0;
- err = tcp_fragment(sk, skb, pkt_len, mss, GFP_ATOMIC);
+ err = tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
+ pkt_len, mss, GFP_ATOMIC);
if (err < 0)
return err;
}
u64 xmit_time)
{
struct tcp_sock *tp = tcp_sk(sk);
- int fack_count = state->fack_count;
/* Account D-SACK for retransmitted packet. */
if (dup_sack && (sacked & TCPCB_RETRANS)) {
if (tp->undo_marker && tp->undo_retrans > 0 &&
after(end_seq, tp->undo_marker))
tp->undo_retrans--;
- if (sacked & TCPCB_SACKED_ACKED)
- state->reord = min(fack_count, state->reord);
+ if ((sacked & TCPCB_SACKED_ACKED) &&
+ before(start_seq, state->reord))
+ state->reord = start_seq;
}
/* Nothing to do; acked frame is about to be dropped (was ACKed). */
* which was in hole. It is reordering.
*/
if (before(start_seq,
- tcp_highest_sack_seq(tp)))
- state->reord = min(fack_count,
- state->reord);
+ tcp_highest_sack_seq(tp)) &&
+ before(start_seq, state->reord))
+ state->reord = start_seq;
+
if (!after(end_seq, tp->high_seq))
state->flag |= FLAG_ORIG_SACK_ACKED;
if (state->first_sackt == 0)
tp->sacked_out += pcount;
tp->delivered += pcount; /* Out-of-order packets delivered */
- fack_count += pcount;
-
/* Lost marker hint past SACKed? Tweak RFC3517 cnt */
- if (!tcp_is_fack(tp) && tp->lost_skb_hint &&
+ if (tp->lost_skb_hint &&
before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq))
tp->lost_cnt_hint += pcount;
-
- if (fack_count > tp->fackets_out)
- tp->fackets_out = fack_count;
}
/* D-SACK. We can detect redundant retransmission in S|R and plain R
/* Shift newly-SACKed bytes from this skb to the immediately previous
* already-SACKed sk_buff. Mark the newly-SACKed bytes as such.
*/
- static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
+ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev,
+ struct sk_buff *skb,
struct tcp_sacktag_state *state,
unsigned int pcount, int shifted, int mss,
bool dup_sack)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct sk_buff *prev = tcp_write_queue_prev(sk, skb);
u32 start_seq = TCP_SKB_CB(skb)->seq; /* start of newly-SACKed */
u32 end_seq = start_seq + shifted; /* end of newly-SACKed */
if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp))
TCP_SKB_CB(prev)->tx.delivered_mstamp = 0;
- tcp_unlink_write_queue(skb, sk);
- sk_wmem_free_skb(sk, skb);
+ tcp_rtx_queue_unlink_and_free(skb, sk);
NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKMERGED);
goto fallback;
/* Can only happen with delayed DSACK + discard craziness */
- if (unlikely(skb == tcp_write_queue_head(sk)))
+ prev = skb_rb_prev(skb);
+ if (!prev)
goto fallback;
- prev = tcp_write_queue_prev(sk, skb);
if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED)
goto fallback;
if (!skb_shift(prev, skb, len))
goto fallback;
- if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack))
+ if (!tcp_shifted_skb(sk, prev, skb, state, pcount, len, mss, dup_sack))
goto out;
/* Hole filled allows collapsing with the next as well, this is very
* useful when hole on every nth skb pattern happens
*/
- if (prev == tcp_write_queue_tail(sk))
+ skb = skb_rb_next(prev);
+ if (!skb)
goto out;
- skb = tcp_write_queue_next(sk, prev);
if (!skb_can_shift(skb) ||
- (skb == tcp_send_head(sk)) ||
((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) ||
(mss != tcp_skb_seglen(skb)))
goto out;
len = skb->len;
if (skb_shift(prev, skb, len)) {
pcount += tcp_skb_pcount(skb);
- tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss, 0);
+ tcp_shifted_skb(sk, prev, skb, state, tcp_skb_pcount(skb),
+ len, mss, 0);
}
out:
- state->fack_count += pcount;
return prev;
noop:
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *tmp;
- tcp_for_write_queue_from(skb, sk) {
+ skb_rbtree_walk_from(skb) {
int in_sack = 0;
bool dup_sack = dup_sack_in;
- if (skb == tcp_send_head(sk))
- break;
-
/* queue is in-order => we can short-circuit the walk early */
if (!before(TCP_SKB_CB(skb)->seq, end_seq))
break;
tcp_skb_pcount(skb),
skb->skb_mstamp);
tcp_rate_skb_delivered(sk, skb, state->rate);
+ if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
+ list_del_init(&skb->tcp_tsorted_anchor);
if (!before(TCP_SKB_CB(skb)->seq,
tcp_highest_sack_seq(tp)))
tcp_advance_highest_sack(sk, skb);
}
-
- state->fack_count += tcp_skb_pcount(skb);
}
return skb;
}
- /* Avoid all extra work that is being done by sacktag while walking in
- * a normal way
- */
+ static struct sk_buff *tcp_sacktag_bsearch(struct sock *sk,
+ struct tcp_sacktag_state *state,
+ u32 seq)
+ {
+ struct rb_node *parent, **p = &sk->tcp_rtx_queue.rb_node;
+ struct sk_buff *skb;
+
+ while (*p) {
+ parent = *p;
+ skb = rb_to_skb(parent);
+ if (before(seq, TCP_SKB_CB(skb)->seq)) {
+ p = &parent->rb_left;
+ continue;
+ }
+ if (!before(seq, TCP_SKB_CB(skb)->end_seq)) {
+ p = &parent->rb_right;
+ continue;
+ }
+ return skb;
+ }
+ return NULL;
+ }
+
static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
struct tcp_sacktag_state *state,
u32 skip_to_seq)
{
- tcp_for_write_queue_from(skb, sk) {
- if (skb == tcp_send_head(sk))
- break;
-
- if (after(TCP_SKB_CB(skb)->end_seq, skip_to_seq))
- break;
+ if (skb && after(TCP_SKB_CB(skb)->seq, skip_to_seq))
+ return skb;
- state->fack_count += tcp_skb_pcount(skb);
- }
- return skb;
+ return tcp_sacktag_bsearch(sk, state, skip_to_seq);
}
static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
int first_sack_index;
state->flag = 0;
- state->reord = tp->packets_out;
+ state->reord = tp->snd_nxt;
- if (!tp->sacked_out) {
- if (WARN_ON(tp->fackets_out))
- tp->fackets_out = 0;
+ if (!tp->sacked_out)
tcp_highest_sack_reset(sk);
- }
found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
num_sacks, prior_snd_una);
}
}
- skb = tcp_write_queue_head(sk);
- state->fack_count = 0;
+ state->mss_now = tcp_current_mss(sk);
+ skb = NULL;
i = 0;
if (!tp->sacked_out) {
skb = tcp_highest_sack(sk);
if (!skb)
break;
- state->fack_count = tp->fackets_out;
cache++;
goto walk;
}
skb = tcp_highest_sack(sk);
if (!skb)
break;
- state->fack_count = tp->fackets_out;
}
skb = tcp_sacktag_skip(skb, sk, state, start_seq);
for (j = 0; j < used_sacks; j++)
tp->recv_sack_cache[i++] = sp[j];
- if ((state->reord < tp->fackets_out) &&
- ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker))
- tcp_update_reordering(sk, tp->fackets_out - state->reord, 0);
+ if (inet_csk(sk)->icsk_ca_state != TCP_CA_Loss || tp->undo_marker)
+ tcp_check_sack_reordering(sk, state->reord, 0);
tcp_verify_left_out(tp);
out:
static void tcp_check_reno_reordering(struct sock *sk, const int addend)
{
struct tcp_sock *tp = tcp_sk(sk);
- if (tcp_limit_reno_sacked(tp))
- tcp_update_reordering(sk, tp->packets_out + addend, 0);
+
+ if (!tcp_limit_reno_sacked(tp))
+ return;
+
+ tp->reordering = min_t(u32, tp->packets_out + addend,
+ sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER);
}
/* Emulate SACKs for SACKless connection: account for a new dupack. */
tp->lost_out = 0;
tp->undo_marker = 0;
tp->undo_retrans = -1;
- tp->fackets_out = 0;
tp->sacked_out = 0;
}
if (tcp_is_reno(tp))
tcp_reset_reno_sack(tp);
- skb = tcp_write_queue_head(sk);
+ skb = tcp_rtx_queue_head(sk);
is_reneg = skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED);
if (is_reneg) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
tp->sacked_out = 0;
- tp->fackets_out = 0;
}
tcp_clear_all_retrans_hints(tp);
- tcp_for_write_queue(skb, sk) {
- if (skb == tcp_send_head(sk))
- break;
-
+ skb_rbtree_walk_from(skb) {
mark_lost = (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
is_reneg);
if (mark_lost)
* falsely raise the receive window, which results in repeated
* timeouts and stop-and-go behavior.
*/
- tp->frto = sysctl_tcp_frto &&
+ tp->frto = net->ipv4.sysctl_tcp_frto &&
(new_recovery || icsk->icsk_retransmits) &&
!inet_csk(sk)->icsk_mtup.probe_size;
}
return false;
}
- static inline int tcp_fackets_out(const struct tcp_sock *tp)
- {
- return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out;
- }
-
/* Heurestics to calculate number of duplicate ACKs. There's no dupACKs
* counter when SACK is enabled (without SACK, sacked_out is used for
* that purpose).
*
- * Instead, with FACK TCP uses fackets_out that includes both SACKed
- * segments up to the highest received SACK block so far and holes in
- * between them.
- *
* With reordering, holes may still be in flight, so RFC3517 recovery
* uses pure sacked_out (total number of SACKed segments) even though
* it violates the RFC that uses duplicate ACKs, often these are equal
*/
static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
{
- return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
+ return tp->sacked_out + 1;
}
- /* Linux NewReno/SACK/FACK/ECN state machine.
+ /* Linux NewReno/SACK/ECN state machine.
* --------------------------------------
*
* "Open" Normal state, no dubious events, fast path.
* dynamically measured and adjusted. This is implemented in
* tcp_rack_mark_lost.
*
- * FACK (Disabled by default. Subsumbed by RACK):
- * It is the simplest heuristics. As soon as we decided
- * that something is lost, we decide that _all_ not SACKed
- * packets until the most forward SACK are lost. I.e.
- * lost_out = fackets_out - sacked_out and left_out = fackets_out.
- * It is absolutely correct estimate, if network does not reorder
- * packets. And it loses any connection to reality when reordering
- * takes place. We use FACK by default until reordering
- * is suspected on the path to this destination.
- *
* If the receiver does not support SACK:
*
* NewReno (RFC6582): in Recovery we assume that one segment
}
/* Detect loss in event "A" above by marking head of queue up as lost.
- * For FACK or non-SACK(Reno) senders, the first "packets" number of segments
+ * For non-SACK(Reno) senders, the first "packets" number of segments
* are considered lost. For RFC3517 SACK, a segment is considered lost if it
* has at least tp->reordering SACKed seqments above it; "packets" refers to
* the maximum SACKed segments to pass before reaching this limit.
const u32 loss_high = tcp_is_sack(tp) ? tp->snd_nxt : tp->high_seq;
WARN_ON(packets > tp->packets_out);
- if (tp->lost_skb_hint) {
- skb = tp->lost_skb_hint;
- cnt = tp->lost_cnt_hint;
+ skb = tp->lost_skb_hint;
+ if (skb) {
/* Head already handled? */
- if (mark_head && skb != tcp_write_queue_head(sk))
+ if (mark_head && after(TCP_SKB_CB(skb)->seq, tp->snd_una))
return;
+ cnt = tp->lost_cnt_hint;
} else {
- skb = tcp_write_queue_head(sk);
+ skb = tcp_rtx_queue_head(sk);
cnt = 0;
}
- tcp_for_write_queue_from(skb, sk) {
- if (skb == tcp_send_head(sk))
- break;
+ skb_rbtree_walk_from(skb) {
/* TODO: do this better */
/* this is not the most efficient way to do this... */
tp->lost_skb_hint = skb;
break;
oldcnt = cnt;
- if (tcp_is_fack(tp) || tcp_is_reno(tp) ||
+ if (tcp_is_reno(tp) ||
(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
cnt += tcp_skb_pcount(skb);
if (cnt > packets) {
- if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) ||
+ if (tcp_is_sack(tp) ||
(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
(oldcnt >= packets))
break;
/* If needed, chop off the prefix to mark as lost. */
lost = (packets - oldcnt) * mss;
if (lost < skb->len &&
- tcp_fragment(sk, skb, lost, mss, GFP_ATOMIC) < 0)
+ tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
+ lost, mss, GFP_ATOMIC) < 0)
break;
cnt = packets;
}
if (tcp_is_reno(tp)) {
tcp_mark_head_lost(sk, 1, 1);
- } else if (tcp_is_fack(tp)) {
- int lost = tp->fackets_out - tp->reordering;
- if (lost <= 0)
- lost = 1;
- tcp_mark_head_lost(sk, lost, 0);
} else {
int sacked_upto = tp->sacked_out - tp->reordering;
if (sacked_upto >= 0)
if (tp->retrans_out)
return true;
- skb = tcp_write_queue_head(sk);
+ skb = tcp_rtx_queue_head(sk);
if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
return true;
return false;
}
- #if FASTRETRANS_DEBUG > 1
static void DBGUNDO(struct sock *sk, const char *msg)
{
+ #if FASTRETRANS_DEBUG > 1
struct tcp_sock *tp = tcp_sk(sk);
struct inet_sock *inet = inet_sk(sk);
tp->packets_out);
}
#endif
- }
- #else
- #define DBGUNDO(x...) do { } while (0)
#endif
+ }
static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
{
if (unmark_loss) {
struct sk_buff *skb;
- tcp_for_write_queue(skb, sk) {
- if (skb == tcp_send_head(sk))
- break;
+ skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
}
tp->lost_out = 0;
mib_idx = LINUX_MIB_TCPFULLUNDO;
NET_INC_STATS(sock_net(sk), mib_idx);
+ } else if (tp->rack.reo_wnd_persist) {
+ tp->rack.reo_wnd_persist--;
}
if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
/* Hold old state until something *above* high_seq
struct tcp_sock *tp = tcp_sk(sk);
if (tp->undo_marker && !tp->undo_retrans) {
+ tp->rack.reo_wnd_persist = min(TCP_RACK_RECOVERY_THRESH,
+ tp->rack.reo_wnd_persist + 1);
DBGUNDO(sk, "D-SACK");
tcp_undo_cwnd_reduction(sk, false);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
struct sk_buff *skb;
unsigned int mss = tcp_current_mss(sk);
- tcp_for_write_queue(skb, sk) {
- if (skb == tcp_send_head(sk))
- break;
+ skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
if (tcp_skb_seglen(skb) > mss &&
!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
* is updated in tcp_ack()). Otherwise fall back to
* the conventional recovery.
*/
- if (tcp_send_head(sk) &&
+ if (!tcp_write_queue_empty(sk) &&
after(tcp_wnd_end(tp), tp->snd_nxt)) {
*rexmit = REXMIT_NEW;
return;
}
/* Undo during fast recovery after partial ACK. */
- static bool tcp_try_undo_partial(struct sock *sk, const int acked)
+ static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una)
{
struct tcp_sock *tp = tcp_sk(sk);
if (tp->undo_marker && tcp_packet_delayed(tp)) {
/* Plain luck! Hole if filled with delayed
- * packet, rather than with a retransmit.
+ * packet, rather than with a retransmit. Check reordering.
*/
- tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
+ tcp_check_sack_reordering(sk, prior_snd_una, 1);
/* We are getting evidence that the reordering degree is higher
* than we realized. If there are no retransmits out then we
struct tcp_sock *tp = tcp_sk(sk);
/* Use RACK to detect loss */
- if (sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) {
+ if (sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) {
u32 prior_retrans = tp->retrans_out;
tcp_rack_mark_lost(sk);
}
}
+ static bool tcp_force_fast_retransmit(struct sock *sk)
+ {
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ return after(tcp_highest_sack_seq(tp),
+ tp->snd_una + tp->reordering * tp->mss_cache);
+ }
+
/* Process an event, which can update packets-in-flight not trivially.
* Main goal of this function is to calculate new estimate for left_out,
* taking into account both packets sitting in receiver's buffer and
* It does _not_ decide what to send, it is made in function
* tcp_xmit_retransmit_queue().
*/
- static void tcp_fastretrans_alert(struct sock *sk, const int acked,
+ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
bool is_dupack, int *ack_flag, int *rexmit)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
int fast_rexmit = 0, flag = *ack_flag;
bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
- (tcp_fackets_out(tp) > tp->reordering));
+ tcp_force_fast_retransmit(sk));
- if (WARN_ON(!tp->packets_out && tp->sacked_out))
+ if (!tp->packets_out && tp->sacked_out)
tp->sacked_out = 0;
- if (WARN_ON(!tp->sacked_out && tp->fackets_out))
- tp->fackets_out = 0;
/* Now state machine starts.
* A. ECE, hence prohibit cwnd undoing, the reduction is required. */
if (tcp_is_reno(tp) && is_dupack)
tcp_add_reno_sack(sk);
} else {
- if (tcp_try_undo_partial(sk, acked))
+ if (tcp_try_undo_partial(sk, prior_snd_una))
return;
/* Partial ACK arrived. Force fast retransmit. */
do_lost = tcp_is_reno(tp) ||
- tcp_fackets_out(tp) > tp->reordering;
+ tcp_force_fast_retransmit(sk);
}
if (tcp_try_undo_dsack(sk)) {
tcp_try_keep_open(sk);
(*ack_flag & FLAG_LOST_RETRANS)))
return;
/* Change state if cwnd is undone or retransmits are lost */
+ /* fall through */
default:
if (tcp_is_reno(tp)) {
if (flag & FLAG_SND_UNA_ADVANCED)
static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us)
{
+ u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ;
struct tcp_sock *tp = tcp_sk(sk);
- u32 wlen = sysctl_tcp_min_rtt_wlen * HZ;
minmax_running_min(&tp->rtt_min, wlen, tcp_jiffies32,
rtt_us ? : jiffies_to_usecs(1));
shinfo = skb_shinfo(skb);
if (!before(shinfo->tskey, prior_snd_una) &&
- before(shinfo->tskey, tcp_sk(sk)->snd_una))
- __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
+ before(shinfo->tskey, tcp_sk(sk)->snd_una)) {
+ tcp_skb_tsorted_save(skb) {
+ __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
+ } tcp_skb_tsorted_restore(skb);
+ }
}
/* Remove acknowledged frames from the retransmission queue. If our packet
* is before the ack sequence we can discard it as it's confirmed to have
* arrived at the other end.
*/
- static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
- u32 prior_snd_una, int *acked,
+ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
+ u32 prior_snd_una,
struct tcp_sacktag_state *sack)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
u64 first_ackt, last_ackt;
struct tcp_sock *tp = tcp_sk(sk);
u32 prior_sacked = tp->sacked_out;
- u32 reord = tp->packets_out;
+ u32 reord = tp->snd_nxt; /* lowest acked un-retx un-sacked seq */
+ struct sk_buff *skb, *next;
bool fully_acked = true;
long sack_rtt_us = -1L;
long seq_rtt_us = -1L;
long ca_rtt_us = -1L;
- struct sk_buff *skb;
u32 pkts_acked = 0;
u32 last_in_flight = 0;
bool rtt_update;
first_ackt = 0;
- while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
+ for (skb = skb_rb_first(&sk->tcp_rtx_queue); skb; skb = next) {
struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
+ const u32 start_seq = scb->seq;
u8 sacked = scb->sacked;
u32 acked_pcount;
break;
fully_acked = false;
} else {
- /* Speedup tcp_unlink_write_queue() and next loop */
- prefetchw(skb->next);
acked_pcount = tcp_skb_pcount(skb);
}
first_ackt = last_ackt;
last_in_flight = TCP_SKB_CB(skb)->tx.in_flight;
- reord = min(pkts_acked, reord);
+ if (before(start_seq, reord))
+ reord = start_seq;
if (!after(scb->end_seq, tp->high_seq))
flag |= FLAG_ORIG_SACK_ACKED;
}
if (!fully_acked)
break;
- tcp_unlink_write_queue(skb, sk);
- sk_wmem_free_skb(sk, skb);
+ next = skb_rb_next(skb);
if (unlikely(skb == tp->retransmit_skb_hint))
tp->retransmit_skb_hint = NULL;
if (unlikely(skb == tp->lost_skb_hint))
tp->lost_skb_hint = NULL;
+ tcp_rtx_queue_unlink_and_free(skb, sk);
}
if (!skb)
int delta;
/* Non-retransmitted hole got filled? That's reordering */
- if (reord < prior_fackets && reord <= tp->fackets_out)
- tcp_update_reordering(sk, tp->fackets_out - reord, 0);
+ if (before(reord, prior_fack))
+ tcp_check_sack_reordering(sk, reord, 0);
- delta = tcp_is_fack(tp) ? pkts_acked :
- prior_sacked - tp->sacked_out;
+ delta = prior_sacked - tp->sacked_out;
tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta);
}
-
- tp->fackets_out -= min(pkts_acked, tp->fackets_out);
-
} else if (skb && rtt_update && sack_rtt_us >= 0 &&
sack_rtt_us > tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp)) {
/* Do not re-arm RTO if the sack RTT is measured from data sent
}
}
#endif
- *acked = pkts_acked;
return flag;
}
static void tcp_ack_probe(struct sock *sk)
{
- const struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
+ struct sk_buff *head = tcp_send_head(sk);
+ const struct tcp_sock *tp = tcp_sk(sk);
/* Was it a usable window open? */
-
- if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq, tcp_wnd_end(tp))) {
+ if (!head)
+ return;
+ if (!after(TCP_SKB_CB(head)->end_seq, tcp_wnd_end(tp))) {
icsk->icsk_backoff = 0;
inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
/* Socket must be waked up by subsequent tcp_data_snd_check().
tp->pred_flags = 0;
tcp_fast_path_check(sk);
- if (tcp_send_head(sk))
+ if (!tcp_write_queue_empty(sk))
tcp_slow_start_after_idle_check(sk);
if (nwin > tp->max_window) {
if (*last_oow_ack_time) {
s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time);
- if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
+ if (0 <= elapsed && elapsed < net->ipv4.sysctl_tcp_invalid_ratelimit) {
NET_INC_STATS(net, mib_idx);
return true; /* rate-limited: don't send yet! */
}
static u32 challenge_timestamp;
static unsigned int challenge_count;
struct tcp_sock *tp = tcp_sk(sk);
+ struct net *net = sock_net(sk);
u32 count, now;
/* First check our per-socket dupack rate limit. */
- if (__tcp_oow_rate_limited(sock_net(sk),
+ if (__tcp_oow_rate_limited(net,
LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
&tp->last_oow_ack_time))
return;
/* Then check host-wide RFC 5961 rate limit. */
now = jiffies / HZ;
if (now != challenge_timestamp) {
- u32 half = (sysctl_tcp_challenge_ack_limit + 1) >> 1;
+ u32 ack_limit = net->ipv4.sysctl_tcp_challenge_ack_limit;
+ u32 half = (ack_limit + 1) >> 1;
challenge_timestamp = now;
- WRITE_ONCE(challenge_count, half +
- prandom_u32_max(sysctl_tcp_challenge_ack_limit));
+ WRITE_ONCE(challenge_count, half + prandom_u32_max(ack_limit));
}
count = READ_ONCE(challenge_count);
if (count > 0) {
WRITE_ONCE(challenge_count, count - 1);
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK);
+ NET_INC_STATS(net, LINUX_MIB_TCPCHALLENGEACK);
tcp_send_ack(sk);
}
}
u32 ack_seq = TCP_SKB_CB(skb)->seq;
u32 ack = TCP_SKB_CB(skb)->ack_seq;
bool is_dupack = false;
- u32 prior_fackets;
int prior_packets = tp->packets_out;
u32 delivered = tp->delivered;
u32 lost = tp->lost;
- int acked = 0; /* Number of packets newly acked */
int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */
+ u32 prior_fack;
sack_state.first_sackt = 0;
sack_state.rate = &rs;
- /* We very likely will need to access write queue head. */
- prefetchw(sk->sk_write_queue.next);
+ /* We very likely will need to access rtx queue. */
+ prefetch(sk->tcp_rtx_queue.rb_node);
/* If the ack is older than previous acks
* then we can probably ignore it.
icsk->icsk_retransmits = 0;
}
- prior_fackets = tp->fackets_out;
+ prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una;
rs.prior_in_flight = tcp_packets_in_flight(tp);
/* ts_recent update must be made after we are sure that the packet
goto no_queue;
/* See if we can take anything off of the retransmit queue. */
- flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked,
- &sack_state);
+ flag |= tcp_clean_rtx_queue(sk, prior_fack, prior_snd_una, &sack_state);
+
+ tcp_rack_update_reo_wnd(sk, &rs);
if (tp->tlp_high_seq)
tcp_process_tlp_ack(sk, ack, flag);
if (tcp_ack_is_dubious(sk, flag)) {
is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
- tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
+ tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
+ &rexmit);
}
if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
no_queue:
/* If data was DSACKed, see if we can undo a cwnd reduction. */
if (flag & FLAG_DSACKING_ACK)
- tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
+ tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
+ &rexmit);
/* If this ack opens up a zero window, clear backoff. It was
* being used to time the probes, and is probably far higher than
* it needs to be for normal retransmission.
*/
- if (tcp_send_head(sk))
- tcp_ack_probe(sk);
+ tcp_ack_probe(sk);
if (tp->tlp_high_seq)
tcp_process_tlp_ack(sk, ack, flag);
if (TCP_SKB_CB(skb)->sacked) {
flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
&sack_state);
- tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
+ tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
+ &rexmit);
tcp_xmit_recovery(sk, rexmit);
}
foc->exp = exp_opt;
}
+ static void smc_parse_options(const struct tcphdr *th,
+ struct tcp_options_received *opt_rx,
+ const unsigned char *ptr,
+ int opsize)
+ {
+ #if IS_ENABLED(CONFIG_SMC)
+ if (static_branch_unlikely(&tcp_have_smc)) {
+ if (th->syn && !(opsize & 1) &&
+ opsize >= TCPOLEN_EXP_SMC_BASE &&
+ get_unaligned_be32(ptr) == TCPOPT_SMC_MAGIC)
+ opt_rx->smc_ok = 1;
+ }
+ #endif
+ }
+
/* Look for tcp options. Normally only called on SYN and SYNACK packets.
* But, this can also be called on packets in the established flow when
* the fast version below fails.
tcp_parse_fastopen_option(opsize -
TCPOLEN_EXP_FASTOPEN_BASE,
ptr + 2, th->syn, foc, true);
+ else
+ smc_parse_options(th, opt_rx, ptr,
+ opsize);
break;
}
/* When we get a reset we do this. */
void tcp_reset(struct sock *sk)
{
+ trace_tcp_receive_reset(sk);
+
/* We want the right error as BSD sees it (and indeed as we do). */
switch (sk->sk_state) {
case TCP_SYN_SENT:
{
struct tcp_sock *tp = tcp_sk(sk);
- if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
+ if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
int mib_idx;
if (before(seq, tp->rcv_nxt))
NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
tcp_enter_quickack_mode(sk);
- if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
+ if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
u32 end_seq = TCP_SKB_CB(skb)->end_seq;
if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
tp->rx_opt.num_sacks = num_sacks;
}
- enum tcp_queue {
- OOO_QUEUE,
- RCV_QUEUE,
- };
-
/**
* tcp_try_coalesce - try to merge skb to prior one
* @sk: socket
* Returns true if caller should free @from instead of queueing it
*/
static bool tcp_try_coalesce(struct sock *sk,
- enum tcp_queue dest,
struct sk_buff *to,
struct sk_buff *from,
bool *fragstolen)
if (TCP_SKB_CB(from)->has_rxtstamp) {
TCP_SKB_CB(to)->has_rxtstamp = true;
- if (dest == OOO_QUEUE)
- TCP_SKB_CB(to)->swtstamp = TCP_SKB_CB(from)->swtstamp;
- else
- to->tstamp = from->tstamp;
+ to->tstamp = from->tstamp;
}
return true;
p = rb_first(&tp->out_of_order_queue);
while (p) {
- skb = rb_entry(p, struct sk_buff, rbnode);
+ skb = rb_to_skb(p);
if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
break;
}
p = rb_next(p);
rb_erase(&skb->rbnode, &tp->out_of_order_queue);
- /* Replace tstamp which was stomped by rbnode */
- if (TCP_SKB_CB(skb)->has_rxtstamp)
- skb->tstamp = TCP_SKB_CB(skb)->swtstamp;
if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {
SOCK_DEBUG(sk, "ofo packet was already received\n");
TCP_SKB_CB(skb)->end_seq);
tail = skb_peek_tail(&sk->sk_receive_queue);
- eaten = tail && tcp_try_coalesce(sk, RCV_QUEUE,
- tail, skb, &fragstolen);
+ eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
if (!eaten)
static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct rb_node **p, *q, *parent;
+ struct rb_node **p, *parent;
struct sk_buff *skb1;
u32 seq, end_seq;
bool fragstolen;
return;
}
- /* Stash tstamp to avoid being stomped on by rbnode */
- if (TCP_SKB_CB(skb)->has_rxtstamp)
- TCP_SKB_CB(skb)->swtstamp = skb->tstamp;
-
/* Disable header prediction. */
tp->pred_flags = 0;
inet_csk_schedule_ack(sk);
/* In the typical case, we are adding an skb to the end of the list.
* Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup.
*/
- if (tcp_try_coalesce(sk, OOO_QUEUE, tp->ooo_last_skb,
+ if (tcp_try_coalesce(sk, tp->ooo_last_skb,
skb, &fragstolen)) {
coalesce_done:
tcp_grow_window(sk, skb);
parent = NULL;
while (*p) {
parent = *p;
- skb1 = rb_entry(parent, struct sk_buff, rbnode);
+ skb1 = rb_to_skb(parent);
if (before(seq, TCP_SKB_CB(skb1)->seq)) {
p = &parent->rb_left;
continue;
__kfree_skb(skb1);
goto merge_right;
}
- } else if (tcp_try_coalesce(sk, OOO_QUEUE, skb1,
+ } else if (tcp_try_coalesce(sk, skb1,
skb, &fragstolen)) {
goto coalesce_done;
}
merge_right:
/* Remove other segments covered by skb. */
- while ((q = rb_next(&skb->rbnode)) != NULL) {
- skb1 = rb_entry(q, struct sk_buff, rbnode);
-
+ while ((skb1 = skb_rb_next(skb)) != NULL) {
if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
break;
if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
tcp_drop(sk, skb1);
}
/* If there is no skb after us, we are the last_skb ! */
- if (!q)
+ if (!skb1)
tp->ooo_last_skb = skb;
add_sack:
__skb_pull(skb, hdrlen);
eaten = (tail &&
- tcp_try_coalesce(sk, RCV_QUEUE, tail,
+ tcp_try_coalesce(sk, tail,
skb, fragstolen)) ? 1 : 0;
tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
if (!eaten) {
if (list)
return !skb_queue_is_last(list, skb) ? skb->next : NULL;
- return rb_entry_safe(rb_next(&skb->rbnode), struct sk_buff, rbnode);
+ return skb_rb_next(skb);
}
static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
}
/* Insert skb into rb tree, ordered by TCP_SKB_CB(skb)->seq */
- static void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb)
+ void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb)
{
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
while (*p) {
parent = *p;
- skb1 = rb_entry(parent, struct sk_buff, rbnode);
+ skb1 = rb_to_skb(parent);
if (before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb1)->seq))
p = &parent->rb_left;
else
* overlaps to the next one.
*/
if (!(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)) &&
- (tcp_win_from_space(skb->truesize) > skb->len ||
+ (tcp_win_from_space(sk, skb->truesize) > skb->len ||
before(TCP_SKB_CB(skb)->seq, start))) {
end_of_skbs = false;
break;
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb, *head;
- struct rb_node *p;
u32 start, end;
- p = rb_first(&tp->out_of_order_queue);
- skb = rb_entry_safe(p, struct sk_buff, rbnode);
+ skb = skb_rb_first(&tp->out_of_order_queue);
new_range:
if (!skb) {
- p = rb_last(&tp->out_of_order_queue);
- /* Note: This is possible p is NULL here. We do not
- * use rb_entry_safe(), as ooo_last_skb is valid only
- * if rbtree is not empty.
- */
- tp->ooo_last_skb = rb_entry(p, struct sk_buff, rbnode);
+ tp->ooo_last_skb = skb_rb_last(&tp->out_of_order_queue);
return;
}
start = TCP_SKB_CB(skb)->seq;
end = TCP_SKB_CB(skb)->end_seq;
for (head = skb;;) {
- skb = tcp_skb_next(skb, NULL);
+ skb = skb_rb_next(skb);
/* Range is terminated when we see a gap or when
* we are at the queue end.
do {
prev = rb_prev(node);
rb_erase(node, &tp->out_of_order_queue);
- tcp_drop(sk, rb_entry(node, struct sk_buff, rbnode));
+ tcp_drop(sk, rb_to_skb(node));
sk_mem_reclaim(sk);
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
!tcp_under_memory_pressure(sk))
break;
node = prev;
} while (node);
- tp->ooo_last_skb = rb_entry(prev, struct sk_buff, rbnode);
+ tp->ooo_last_skb = rb_to_skb(prev);
/* Reset SACK state. A conforming SACK implementation will
* do the same at a timeout based retransmit. When a connection
struct tcp_sock *tp = tcp_sk(sk);
u32 ptr = ntohs(th->urg_ptr);
- if (ptr && !sysctl_tcp_stdurg)
+ if (ptr && !sock_net(sk)->ipv4.sysctl_tcp_stdurg)
ptr--;
ptr += ntohl(th->seq);
security_inet_conn_established(sk, skb);
}
- /* Make sure socket is routed, for correct metrics. */
- icsk->icsk_af_ops->rebuild_header(sk);
-
- tcp_init_metrics(sk);
- tcp_call_bpf(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB);
- tcp_init_congestion_control(sk);
+ tcp_init_transfer(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB);
/* Prevent spurious tcp_cwnd_restart() on first data
* packet.
*/
tp->lsndtime = tcp_jiffies32;
- tcp_init_buffer_space(sk);
-
if (sock_flag(sk, SOCK_KEEPOPEN))
inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
struct tcp_fastopen_cookie *cookie)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct sk_buff *data = tp->syn_data ? tcp_write_queue_head(sk) : NULL;
+ struct sk_buff *data = tp->syn_data ? tcp_rtx_queue_head(sk) : NULL;
u16 mss = tp->rx_opt.mss_clamp, try_exp = 0;
bool syn_drop = false;
tcp_fastopen_cache_set(sk, mss, cookie, syn_drop, try_exp);
if (data) { /* Retransmit unacked data in SYN */
- tcp_for_write_queue_from(data, sk) {
- if (data == tcp_send_head(sk) ||
- __tcp_retransmit_skb(sk, data, 1))
+ skb_rbtree_walk_from(data) {
+ if (__tcp_retransmit_skb(sk, data, 1))
break;
}
tcp_rearm_rto(sk);
return false;
}
+ static void smc_check_reset_syn(struct tcp_sock *tp)
+ {
+ #if IS_ENABLED(CONFIG_SMC)
+ if (static_branch_unlikely(&tcp_have_smc)) {
+ if (tp->syn_smc && !tp->rx_opt.smc_ok)
+ tp->syn_smc = 0;
+ }
+ #endif
+ }
+
static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
const struct tcphdr *th)
{
tp->tcp_header_len = sizeof(struct tcphdr);
}
- if (tcp_is_sack(tp) && sysctl_tcp_fack)
- tcp_enable_fack(tp);
-
- tcp_mtup_init(sk);
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
tcp_initialize_rcv_mss(sk);
* is initialized. */
tp->copied_seq = tp->rcv_nxt;
+ smc_check_reset_syn(tp);
+
smp_mb();
tcp_finish_connect(sk, skb);
if (req) {
inet_csk(sk)->icsk_retransmits = 0;
reqsk_fastopen_remove(sk, req, false);
+ /* Re-arm the timer because data may have been sent out.
+ * This is similar to the regular data transmission case
+ * when new data has just been ack'ed.
+ *
+ * (TFO) - we could try to be more aggressive and
+ * retransmitting any data sooner based on when they
+ * are sent out.
+ */
+ tcp_rearm_rto(sk);
} else {
- /* Make sure socket is routed, for correct metrics. */
- icsk->icsk_af_ops->rebuild_header(sk);
- tcp_call_bpf(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
- tcp_init_congestion_control(sk);
-
- tcp_mtup_init(sk);
+ tcp_init_transfer(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
tp->copied_seq = tp->rcv_nxt;
- tcp_init_buffer_space(sk);
}
smp_mb();
tcp_set_state(sk, TCP_ESTABLISHED);
if (tp->rx_opt.tstamp_ok)
tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
- if (req) {
- /* Re-arm the timer because data may have been sent out.
- * This is similar to the regular data transmission case
- * when new data has just been ack'ed.
- *
- * (TFO) - we could try to be more aggressive and
- * retransmitting any data sooner based on when they
- * are sent out.
- */
- tcp_rearm_rto(sk);
- } else
- tcp_init_metrics(sk);
-
if (!inet_csk(sk)->icsk_ca_ops->cong_control)
tcp_update_pacing_rate(sk);
case TCP_LAST_ACK:
if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
break;
+ /* fall through */
case TCP_FIN_WAIT1:
case TCP_FIN_WAIT2:
/* RFC 793 says to queue data in these states,
ireq->ir_rmt_port = tcp_hdr(skb)->source;
ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
ireq->ir_mark = inet_request_mark(sk, skb);
+ #if IS_ENABLED(CONFIG_SMC)
+ ireq->smc_ok = rx_opt->smc_ok;
+ #endif
}
struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
tcp_openreq_init_rwin(req, sk, dst);
if (!want_cookie) {
tcp_reqsk_record_syn(sk, req, skb);
- fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc);
+ fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
}
if (fastopen_sk) {
af_ops->send_synack(fastopen_sk, dst, &fl, req,
#include <linux/compiler.h>
#include <linux/gfp.h>
#include <linux/module.h>
+ #include <linux/static_key.h>
- /* People can turn this off for buggy TCP's found in printers etc. */
- int sysctl_tcp_retrans_collapse __read_mostly = 1;
-
- /* People can turn this on to work with those rare, broken TCPs that
- * interpret the window field as a signed quantity.
- */
- int sysctl_tcp_workaround_signed_windows __read_mostly = 0;
-
- /* Default TSQ limit of four TSO segments */
- int sysctl_tcp_limit_output_bytes __read_mostly = 262144;
-
- /* This limits the percentage of the congestion window which we
- * will allow a single TSO frame to consume. Building TSO frames
- * which are too large can cause TCP streams to be bursty.
- */
- int sysctl_tcp_tso_win_divisor __read_mostly = 3;
-
- /* By default, RFC2861 behavior. */
- int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
+ #include <trace/events/tcp.h>
static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
int push_one, gfp_t gfp);
/* Account for new data that has been sent to the network. */
- static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
+ static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
unsigned int prior_packets = tp->packets_out;
- tcp_advance_send_head(sk, skb);
tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
+ __skb_unlink(skb, &sk->sk_write_queue);
+ tcp_rbtree_insert(&sk->tcp_rtx_queue, skb);
+
tp->packets_out += tcp_skb_pcount(skb);
if (!prior_packets || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
tcp_rearm_rto(sk);
* be a multiple of mss if possible. We assume here that mss >= 1.
* This MUST be enforced by all callers.
*/
- void tcp_select_initial_window(int __space, __u32 mss,
+ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
__u32 *rcv_wnd, __u32 *window_clamp,
int wscale_ok, __u8 *rcv_wscale,
__u32 init_rcv_wnd)
* which we interpret as a sign the remote TCP is not
* misinterpreting the window field as a signed quantity.
*/
- if (sysctl_tcp_workaround_signed_windows)
+ if (sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)
(*rcv_wnd) = min(space, MAX_TCP_WINDOW);
else
(*rcv_wnd) = space;
(*rcv_wscale) = 0;
if (wscale_ok) {
/* Set window scaling on max possible window */
- space = max_t(u32, space, sysctl_tcp_rmem[2]);
+ space = max_t(u32, space, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
space = max_t(u32, space, sysctl_rmem_max);
space = min_t(u32, space, *window_clamp);
while (space > U16_MAX && (*rcv_wscale) < TCP_MAX_WSCALE) {
/* Make sure we do not exceed the maximum possible
* scaled window.
*/
- if (!tp->rx_opt.rcv_wscale && sysctl_tcp_workaround_signed_windows)
+ if (!tp->rx_opt.rcv_wscale &&
+ sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)
new_win = min(new_win, MAX_TCP_WINDOW);
else
new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
{
skb->ip_summed = CHECKSUM_PARTIAL;
- skb->csum = 0;
TCP_SKB_CB(skb)->tcp_flags = flags;
TCP_SKB_CB(skb)->sacked = 0;
#define OPTION_MD5 (1 << 2)
#define OPTION_WSCALE (1 << 3)
#define OPTION_FAST_OPEN_COOKIE (1 << 8)
+ #define OPTION_SMC (1 << 9)
+
+ static void smc_options_write(__be32 *ptr, u16 *options)
+ {
+ #if IS_ENABLED(CONFIG_SMC)
+ if (static_branch_unlikely(&tcp_have_smc)) {
+ if (unlikely(OPTION_SMC & *options)) {
+ *ptr++ = htonl((TCPOPT_NOP << 24) |
+ (TCPOPT_NOP << 16) |
+ (TCPOPT_EXP << 8) |
+ (TCPOLEN_EXP_SMC_BASE));
+ *ptr++ = htonl(TCPOPT_SMC_MAGIC);
+ }
+ }
+ #endif
+ }
struct tcp_out_options {
u16 options; /* bit field of OPTION_* */
}
ptr += (len + 3) >> 2;
}
+
+ smc_options_write(ptr, &options);
+ }
+
+ static void smc_set_option(const struct tcp_sock *tp,
+ struct tcp_out_options *opts,
+ unsigned int *remaining)
+ {
+ #if IS_ENABLED(CONFIG_SMC)
+ if (static_branch_unlikely(&tcp_have_smc)) {
+ if (tp->syn_smc) {
+ if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
+ opts->options |= OPTION_SMC;
+ *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
+ }
+ }
+ }
+ #endif
+ }
+
+ static void smc_set_option_cond(const struct tcp_sock *tp,
+ const struct inet_request_sock *ireq,
+ struct tcp_out_options *opts,
+ unsigned int *remaining)
+ {
+ #if IS_ENABLED(CONFIG_SMC)
+ if (static_branch_unlikely(&tcp_have_smc)) {
+ if (tp->syn_smc && ireq->smc_ok) {
+ if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
+ opts->options |= OPTION_SMC;
+ *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
+ }
+ }
+ }
+ #endif
}
/* Compute TCP options for SYN packets. This is not the final
}
}
+ smc_set_option(tp, opts, &remaining);
+
return MAX_TCP_OPTION_SPACE - remaining;
}
/* Set up TCP options for SYN-ACKs. */
- static unsigned int tcp_synack_options(struct request_sock *req,
+ static unsigned int tcp_synack_options(const struct sock *sk,
+ struct request_sock *req,
unsigned int mss, struct sk_buff *skb,
struct tcp_out_options *opts,
const struct tcp_md5sig_key *md5,
}
}
+ smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining);
+
return MAX_TCP_OPTION_SPACE - remaining;
}
HRTIMER_MODE_ABS_PINNED);
}
+ static void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb)
+ {
+ skb->skb_mstamp = tp->tcp_mstamp;
+ list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
+ }
+
/* This routine actually transmits TCP packets queued in by
* tcp_do_sendmsg(). This is used by both the initial
* transmission and possible later retransmissions.
TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
- tp->snd_una;
oskb = skb;
- if (unlikely(skb_cloned(skb)))
- skb = pskb_copy(skb, gfp_mask);
- else
- skb = skb_clone(skb, gfp_mask);
+
+ tcp_skb_tsorted_save(oskb) {
+ if (unlikely(skb_cloned(oskb)))
+ skb = pskb_copy(oskb, gfp_mask);
+ else
+ skb = skb_clone(oskb, gfp_mask);
+ } tcp_skb_tsorted_restore(oskb);
+
if (unlikely(!skb))
return -ENOBUFS;
}
err = net_xmit_eval(err);
}
if (!err && oskb) {
- oskb->skb_mstamp = tp->tcp_mstamp;
+ tcp_update_skb_after_send(tp, oskb);
tcp_rate_skb_sent(sk, oskb);
}
return err;
}
}
- /* When a modification to fackets out becomes necessary, we need to check
- * skb is counted to fackets_out or not.
- */
- static void tcp_adjust_fackets_out(struct sock *sk, const struct sk_buff *skb,
- int decr)
- {
- struct tcp_sock *tp = tcp_sk(sk);
-
- if (!tp->sacked_out || tcp_is_reno(tp))
- return;
-
- if (after(tcp_highest_sack_seq(tp), TCP_SKB_CB(skb)->seq))
- tp->fackets_out -= decr;
- }
-
/* Pcount in the middle of the write queue got changed, we need to do various
* tweaks to fix counters
*/
if (tcp_is_reno(tp) && decr > 0)
tp->sacked_out -= min_t(u32, tp->sacked_out, decr);
- tcp_adjust_fackets_out(sk, skb, decr);
-
if (tp->lost_skb_hint &&
before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) &&
- (tcp_is_fack(tp) || (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)))
+ (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
tp->lost_cnt_hint -= decr;
tcp_verify_left_out(tp);
TCP_SKB_CB(skb)->eor = 0;
}
+ /* Insert buff after skb on the write or rtx queue of sk. */
+ static void tcp_insert_write_queue_after(struct sk_buff *skb,
+ struct sk_buff *buff,
+ struct sock *sk,
+ enum tcp_queue tcp_queue)
+ {
+ if (tcp_queue == TCP_FRAG_IN_WRITE_QUEUE)
+ __skb_queue_after(&sk->sk_write_queue, skb, buff);
+ else
+ tcp_rbtree_insert(&sk->tcp_rtx_queue, buff);
+ }
+
/* Function to create two new TCP segments. Shrinks the given segment
* to the specified size and appends a new segment with the rest of the
* packet to the list. This won't be called frequently, I hope.
* Remember, these are still headerless SKBs at this point.
*/
- int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
+ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
+ struct sk_buff *skb, u32 len,
unsigned int mss_now, gfp_t gfp)
{
struct tcp_sock *tp = tcp_sk(sk);
/* Link BUFF into the send queue. */
__skb_header_release(buff);
- tcp_insert_write_queue_after(skb, buff, sk);
+ tcp_insert_write_queue_after(skb, buff, sk, tcp_queue);
+ if (tcp_queue == TCP_FRAG_IN_RTX_QUEUE)
+ list_add(&buff->tcp_tsorted_anchor, &skb->tcp_tsorted_anchor);
return 0;
}
if (tp->packets_out > tp->snd_cwnd_used)
tp->snd_cwnd_used = tp->packets_out;
- if (sysctl_tcp_slow_start_after_idle &&
+ if (sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle &&
(s32)(tcp_jiffies32 - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto &&
!ca_ops->cong_control)
tcp_cwnd_application_limited(sk);
* is caused by insufficient sender buffer:
* 1) just sent some data (see tcp_write_xmit)
* 2) not cwnd limited (this else condition)
- * 3) no more data to send (null tcp_send_head )
+ * 3) no more data to send (tcp_write_queue_empty())
* 4) application is hitting buffer limit (SOCK_NOSPACE)
*/
- if (!tcp_send_head(sk) && sk->sk_socket &&
+ if (tcp_write_queue_empty(sk) && sk->sk_socket &&
test_bit(SOCK_NOSPACE, &sk->sk_socket->flags) &&
(1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
tcp_chrono_start(sk, TCP_CHRONO_SNDBUF_LIMITED);
{
u32 bytes, segs;
- bytes = min(sk->sk_pacing_rate >> 10,
+ bytes = min(sk->sk_pacing_rate >> sk->sk_pacing_shift,
sk->sk_gso_max_size - 1 - MAX_TCP_HEADER);
/* Goal is to send at least one packet per ms,
u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0;
return tso_segs ? :
- tcp_tso_autosize(sk, mss_now, sysctl_tcp_min_tso_segs);
+ tcp_tso_autosize(sk, mss_now,
+ sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
}
/* Returns the portion of skb which can be sent right away */
* know that all the data is in scatter-gather pages, and that the
* packet has never been sent out before (and thus is not cloned).
*/
- static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
+ static int tso_fragment(struct sock *sk, enum tcp_queue tcp_queue,
+ struct sk_buff *skb, unsigned int len,
unsigned int mss_now, gfp_t gfp)
{
struct sk_buff *buff;
/* All of a TSO frame must be composed of paged data. */
if (skb->len != skb->data_len)
- return tcp_fragment(sk, skb, len, mss_now, gfp);
+ return tcp_fragment(sk, tcp_queue, skb, len, mss_now, gfp);
buff = sk_stream_alloc_skb(sk, 0, gfp, true);
if (unlikely(!buff))
/* Link BUFF into the send queue. */
__skb_header_release(buff);
- tcp_insert_write_queue_after(skb, buff, sk);
+ tcp_insert_write_queue_after(skb, buff, sk, tcp_queue);
return 0;
}
if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len))
goto send_now;
- win_divisor = READ_ONCE(sysctl_tcp_tso_win_divisor);
- win_divisor = ACCESS_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tso_win_divisor);
++ win_divisor = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tso_win_divisor);
if (win_divisor) {
u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
goto send_now;
}
- head = tcp_write_queue_head(sk);
-
+ /* TODO : use tsorted_sent_queue ? */
+ head = tcp_rtx_queue_head(sk);
+ if (!head)
+ goto send_now;
age = tcp_stamp_us_delta(tp->tcp_mstamp, head->skb_mstamp);
/* If next ACK is likely to come too late (half srtt), do not defer */
if (age < (tp->srtt_us >> 4))
{
unsigned int limit;
- limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10);
- limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes);
+ limit = max(2 * skb->truesize, sk->sk_pacing_rate >> sk->sk_pacing_shift);
+ limit = min_t(u32, limit,
+ sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
limit <<= factor;
if (refcount_read(&sk->sk_wmem_alloc) > limit) {
- /* Always send the 1st or 2nd skb in write queue.
+ /* Always send skb if rtx queue is empty.
* No need to wait for TX completion to call us back,
* after softirq/tasklet schedule.
* This helps when TX completions are delayed too much.
*/
- if (skb == sk->sk_write_queue.next ||
- skb->prev == sk->sk_write_queue.next)
+ if (tcp_rtx_queue_empty(sk))
return false;
set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags);
* it's the "most interesting" or current chrono we are
* tracking and starts busy chrono if we have pending data.
*/
- if (tcp_write_queue_empty(sk))
+ if (tcp_rtx_and_write_queues_empty(sk))
tcp_chrono_set(tp, TCP_CHRONO_UNSPEC);
else if (type == tp->chrono_type)
tcp_chrono_set(tp, TCP_CHRONO_BUSY);
if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
/* "skb_mstamp" is used as a start point for the retransmit timer */
- skb->skb_mstamp = tp->tcp_mstamp;
+ tcp_update_skb_after_send(tp, skb);
goto repair; /* Skip network transmission */
}
nonagle);
if (skb->len > limit &&
- unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
+ unlikely(tso_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE,
+ skb, limit, mss_now, gfp)))
break;
if (test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags))
tcp_cwnd_validate(sk, is_cwnd_limited);
return false;
}
- return !tp->packets_out && tcp_send_head(sk);
+ return !tp->packets_out && !tcp_write_queue_empty(sk);
}
bool tcp_schedule_loss_probe(struct sock *sk)
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
u32 timeout, rto_delta_us;
+ int early_retrans;
/* Don't do any loss probe on a Fast Open connection before 3WHS
* finishes.
if (tp->fastopen_rsk)
return false;
+ early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans;
/* Schedule a loss probe in 2*RTT for SACK capable connections
* in Open state, that are either limited by cwnd or application.
*/
- if ((sysctl_tcp_early_retrans != 3 && sysctl_tcp_early_retrans != 4) ||
+ if ((early_retrans != 3 && early_retrans != 4) ||
!tp->packets_out || !tcp_is_sack(tp) ||
icsk->icsk_ca_state != TCP_CA_Open)
return false;
if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) &&
- tcp_send_head(sk))
+ !tcp_write_queue_empty(sk))
return false;
/* Probe timeout is 2*rtt. Add minimum RTO to account
int mss = tcp_current_mss(sk);
skb = tcp_send_head(sk);
- if (skb) {
- if (tcp_snd_wnd_test(tp, skb, mss)) {
- pcount = tp->packets_out;
- tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC);
- if (tp->packets_out > pcount)
- goto probe_sent;
- goto rearm_timer;
- }
- skb = tcp_write_queue_prev(sk, skb);
- } else {
- skb = tcp_write_queue_tail(sk);
+ if (skb && tcp_snd_wnd_test(tp, skb, mss)) {
+ pcount = tp->packets_out;
+ tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC);
+ if (tp->packets_out > pcount)
+ goto probe_sent;
+ goto rearm_timer;
}
+ skb = skb_rb_last(&sk->tcp_rtx_queue);
/* At most one outstanding TLP retransmission. */
if (tp->tlp_high_seq)
goto rearm_timer;
if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) {
- if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss,
+ if (unlikely(tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
+ (pcount - 1) * mss, mss,
GFP_ATOMIC)))
goto rearm_timer;
- skb = tcp_write_queue_next(sk, skb);
+ skb = skb_rb_next(skb);
}
if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
+ struct sk_buff *next_skb = skb_rb_next(skb);
int skb_size, next_skb_size;
skb_size = skb->len;
}
tcp_highest_sack_replace(sk, next_skb, skb);
- tcp_unlink_write_queue(next_skb, sk);
-
if (next_skb->ip_summed == CHECKSUM_PARTIAL)
skb->ip_summed = CHECKSUM_PARTIAL;
tcp_skb_collapse_tstamp(skb, next_skb);
- sk_wmem_free_skb(sk, next_skb);
+ tcp_rtx_queue_unlink_and_free(next_skb, sk);
return true;
}
return false;
if (skb_cloned(skb))
return false;
- if (skb == tcp_send_head(sk))
- return false;
/* Some heuristics for collapsing over SACK'd could be invented */
if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
return false;
struct sk_buff *skb = to, *tmp;
bool first = true;
- if (!sysctl_tcp_retrans_collapse)
+ if (!sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse)
return;
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
return;
- tcp_for_write_queue_from_safe(skb, tmp, sk) {
+ skb_rbtree_walk_from_safe(skb, tmp) {
if (!tcp_can_collapse(sk, skb))
break;
len = cur_mss * segs;
if (skb->len > len) {
- if (tcp_fragment(sk, skb, len, cur_mss, GFP_ATOMIC))
+ if (tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb, len,
+ cur_mss, GFP_ATOMIC))
return -ENOMEM; /* We'll try again later. */
} else {
if (skb_unclone(skb, GFP_ATOMIC))
skb_headroom(skb) >= 0xFFFF)) {
struct sk_buff *nskb;
- nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC);
- err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
- -ENOBUFS;
+ tcp_skb_tsorted_save(skb) {
+ nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC);
+ err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
+ -ENOBUFS;
+ } tcp_skb_tsorted_restore(skb);
+
if (!err) {
- skb->skb_mstamp = tp->tcp_mstamp;
+ tcp_update_skb_after_send(tp, skb);
tcp_rate_skb_sent(sk, skb);
}
} else {
if (likely(!err)) {
TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
+ trace_tcp_retransmit_skb(sk, skb);
} else if (err != -EBUSY) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
}
* retransmitted data is acknowledged. It tries to continue
* resending the rest of the retransmit queue, until either
* we've sent it all or the congestion window limit is reached.
- * If doing SACK, the first ACK which comes back for a timeout
- * based retransmit packet might feed us FACK information again.
- * If so, we use it to avoid unnecessarily retransmissions.
*/
void tcp_xmit_retransmit_queue(struct sock *sk)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
+ struct sk_buff *skb, *rtx_head, *hole = NULL;
struct tcp_sock *tp = tcp_sk(sk);
- struct sk_buff *skb;
- struct sk_buff *hole = NULL;
u32 max_segs;
int mib_idx;
if (!tp->packets_out)
return;
- if (tp->retransmit_skb_hint) {
- skb = tp->retransmit_skb_hint;
- } else {
- skb = tcp_write_queue_head(sk);
- }
-
+ rtx_head = tcp_rtx_queue_head(sk);
+ skb = tp->retransmit_skb_hint ?: rtx_head;
max_segs = tcp_tso_segs(sk, tcp_current_mss(sk));
- tcp_for_write_queue_from(skb, sk) {
+ skb_rbtree_walk_from(skb) {
__u8 sacked;
int segs;
- if (skb == tcp_send_head(sk))
- break;
-
if (tcp_pacing_check(sk))
break;
if (tcp_in_cwnd_reduction(sk))
tp->prr_out += tcp_skb_pcount(skb);
- if (skb == tcp_write_queue_head(sk) &&
+ if (skb == rtx_head &&
icsk->icsk_pending != ICSK_TIME_REO_TIMEOUT)
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
inet_csk(sk)->icsk_rto,
* Note: in the latter case, FIN packet will be sent after a timeout,
* as TCP stack thinks it has already been transmitted.
*/
- if (tskb && (tcp_send_head(sk) || tcp_under_memory_pressure(sk))) {
+ if (!tskb && tcp_under_memory_pressure(sk))
+ tskb = skb_rb_last(&sk->tcp_rtx_queue);
+
+ if (tskb) {
coalesce:
TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN;
TCP_SKB_CB(tskb)->end_seq++;
tp->write_seq++;
- if (!tcp_send_head(sk)) {
+ if (tcp_write_queue_empty(sk)) {
/* This means tskb was already sent.
* Pretend we included the FIN on previous transmit.
* We need to set tp->snd_nxt to the value it would have
goto coalesce;
return;
}
+ INIT_LIST_HEAD(&skb->tcp_tsorted_anchor);
skb_reserve(skb, MAX_TCP_HEADER);
sk_forced_mem_schedule(sk, skb->truesize);
/* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
/* Send it off. */
if (tcp_transmit_skb(sk, skb, 0, priority))
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
+
+ /* skb of trace_tcp_send_reset() keeps the skb that caused RST,
+ * skb here is different to the troublesome skb, so use NULL
+ */
+ trace_tcp_send_reset(sk, NULL);
}
/* Send a crossed SYN-ACK during socket establishment.
{
struct sk_buff *skb;
- skb = tcp_write_queue_head(sk);
+ skb = tcp_rtx_queue_head(sk);
if (!skb || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
- pr_debug("%s: wrong queue state\n", __func__);
+ pr_err("%s: wrong queue state\n", __func__);
return -EFAULT;
}
if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) {
if (skb_cloned(skb)) {
- struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
+ struct sk_buff *nskb;
+
+ tcp_skb_tsorted_save(skb) {
+ nskb = skb_copy(skb, GFP_ATOMIC);
+ } tcp_skb_tsorted_restore(skb);
if (!nskb)
return -ENOMEM;
- tcp_unlink_write_queue(skb, sk);
+ INIT_LIST_HEAD(&nskb->tcp_tsorted_anchor);
+ tcp_rtx_queue_unlink_and_free(skb, sk);
__skb_header_release(nskb);
- __tcp_add_write_queue_head(sk, nskb);
- sk_wmem_free_skb(sk, skb);
+ tcp_rbtree_insert(&sk->tcp_rtx_queue, nskb);
sk->sk_wmem_queued += nskb->truesize;
sk_mem_charge(sk, nskb->truesize);
skb = nskb;
md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req));
#endif
skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4);
- tcp_header_size = tcp_synack_options(req, mss, skb, &opts, md5, foc) +
- sizeof(*th);
+ tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5,
+ foc) + sizeof(*th);
skb_push(skb, tcp_header_size);
skb_reset_transport_header(skb);
if (rcv_wnd == 0)
rcv_wnd = dst_metric(dst, RTAX_INITRWND);
- tcp_select_initial_window(tcp_full_space(sk),
+ tcp_select_initial_window(sk, tcp_full_space(sk),
tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
&tp->rcv_wnd,
&tp->window_clamp,
tcb->end_seq += skb->len;
__skb_header_release(skb);
- __tcp_add_write_queue_tail(sk, skb);
sk->sk_wmem_queued += skb->truesize;
sk_mem_charge(sk, skb->truesize);
tp->write_seq = tcb->end_seq;
int copied = copy_from_iter(skb_put(syn_data, space), space,
&fo->data->msg_iter);
if (unlikely(!copied)) {
+ tcp_skb_tsorted_anchor_cleanup(syn_data);
kfree_skb(syn_data);
goto fallback;
}
TCP_SKB_CB(syn_data)->tcp_flags = TCPHDR_ACK | TCPHDR_PSH;
if (!err) {
tp->syn_data = (fo->copied > 0);
+ tcp_rbtree_insert(&sk->tcp_rtx_queue, syn_data);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT);
goto done;
}
- /* data was not sent, this is our new send_head */
- sk->sk_send_head = syn_data;
+ /* data was not sent, put it in write_queue */
+ __skb_queue_tail(&sk->sk_write_queue, syn_data);
tp->packets_out -= tcp_skb_pcount(syn_data);
fallback:
tp->retrans_stamp = tcp_time_stamp(tp);
tcp_connect_queue_skb(sk, buff);
tcp_ecn_send_syn(sk, buff);
+ tcp_rbtree_insert(&sk->tcp_rtx_queue, buff);
/* Send off SYN; include data in Fast Open. */
err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) :
skb->len > mss) {
seg_size = min(seg_size, mss);
TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
- if (tcp_fragment(sk, skb, seg_size, mss, GFP_ATOMIC))
+ if (tcp_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE,
+ skb, seg_size, mss, GFP_ATOMIC))
return -1;
} else if (!tcp_skb_pcount(skb))
tcp_set_skb_tso_segs(skb, mss);
err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE);
- if (tp->packets_out || !tcp_send_head(sk)) {
+ if (tp->packets_out || tcp_write_queue_empty(sk)) {
/* Cancel probe timer, if it is not required. */
icsk->icsk_probes_out = 0;
icsk->icsk_backoff = 0;
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
if (unlikely(tcp_passive_fastopen(sk)))
tcp_sk(sk)->total_retrans++;
+ trace_tcp_retransmit_synack(sk, req);
}
return res;
}
if (likely(partial)) {
up->forward_deficit += size;
size = up->forward_deficit;
- if (size < (sk->sk_rcvbuf >> 2) &&
- !skb_queue_empty(&up->reader_queue))
+ if (size < (sk->sk_rcvbuf >> 2))
return;
} else {
size += up->forward_deficit;
*/
/* if we're overly short, let UDP handle it */
- encap_rcv = ACCESS_ONCE(up->encap_rcv);
+ encap_rcv = READ_ONCE(up->encap_rcv);
if (encap_rcv) {
int ret;
unlock_sock_fast(sk, slow);
if (static_key_false(&udp_encap_needed) && up->encap_type) {
void (*encap_destroy)(struct sock *sk);
- encap_destroy = ACCESS_ONCE(up->encap_destroy);
+ encap_destroy = READ_ONCE(up->encap_destroy);
if (encap_destroy)
encap_destroy(sk);
}
case NEXTHDR_DEST:
if (dir == XFRM_POLICY_OUT)
ipv6_rearrange_destopt(iph, exthdr.opth);
+ /* fall through */
case NEXTHDR_HOP:
if (!zero_out_mutable_opts(exthdr.opth)) {
net_dbg_ratelimited("overrun %sopts\n",
if (err == -EINPROGRESS)
goto out;
- if (err == -EBUSY)
+ if (err == -ENOSPC)
err = NET_XMIT_DROP;
goto out_free;
}
case -EINPROGRESS:
goto error;
- case -EBUSY:
+ case -ENOSPC:
err = NET_XMIT_DROP;
break;
goto out;
}
- if (skb_copy_bits(skb, skb->len - alen - 2, nexthdr, 2))
- BUG();
+ ret = skb_copy_bits(skb, skb->len - alen - 2, nexthdr, 2);
+ BUG_ON(ret);
ret = -EINVAL;
padlen = nexthdr[0];
static void esp_input_set_header(struct sk_buff *skb, __be32 *seqhi)
{
struct xfrm_state *x = xfrm_input_state(skb);
- struct ip_esp_hdr *esph = (struct ip_esp_hdr *)skb->data;
/* For ESN we move the header forward by 4 bytes to
* accomodate the high bits. We will move it back after
* decryption.
*/
if ((x->props.flags & XFRM_STATE_ESN)) {
- esph = skb_push(skb, 4);
+ struct ip_esp_hdr *esph = skb_push(skb, 4);
+
*seqhi = esph->spi;
esph->spi = esph->seq_no;
esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi;
ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
u8 *type, u8 *code, int *msg, __u32 *info, int offset)
{
- const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) skb->data;
- struct ip6_tnl *t;
- int rel_msg = 0;
+ const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb->data;
+ struct net *net = dev_net(skb->dev);
u8 rel_type = ICMPV6_DEST_UNREACH;
u8 rel_code = ICMPV6_ADDR_UNREACH;
- u8 tproto;
__u32 rel_info = 0;
- __u16 len;
+ struct ip6_tnl *t;
int err = -ENOENT;
+ int rel_msg = 0;
+ u8 tproto;
+ __u16 len;
/* If the packet doesn't contain the original IPv6 header we are
in trouble since we might need the source address for further
if (!t)
goto out;
- tproto = ACCESS_ONCE(t->parms.proto);
+ tproto = READ_ONCE(t->parms.proto);
if (tproto != ipproto && tproto != 0)
goto out;
err = 0;
switch (*type) {
- __u32 teli;
struct ipv6_tlv_tnl_enc_lim *tel;
- __u32 mtu;
+ __u32 mtu, teli;
case ICMPV6_DEST_UNREACH:
net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n",
t->parms.name);
}
break;
case ICMPV6_PKT_TOOBIG:
+ ip6_update_pmtu(skb, net, htonl(*info), 0, 0,
+ sock_net_uid(net, NULL));
mtu = *info - offset;
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
- t->dev->mtu = mtu;
-
len = sizeof(*ipv6h) + ntohs(ipv6h->payload_len);
if (len > mtu) {
rel_type = ICMPV6_PKT_TOOBIG;
rel_msg = 1;
}
break;
+ case NDISC_REDIRECT:
+ ip6_redirect(skb, net, skb->dev->ifindex, 0,
+ sock_net_uid(net, NULL));
+ break;
}
*type = rel_type;
ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
- int rel_msg = 0;
- u8 rel_type = type;
- u8 rel_code = code;
__u32 rel_info = ntohl(info);
- int err;
- struct sk_buff *skb2;
const struct iphdr *eiph;
+ struct sk_buff *skb2;
+ int err, rel_msg = 0;
+ u8 rel_type = type;
+ u8 rel_code = code;
struct rtable *rt;
struct flowi4 fl4;
rel_type = ICMP_DEST_UNREACH;
rel_code = ICMP_FRAG_NEEDED;
break;
- case NDISC_REDIRECT:
- rel_type = ICMP_REDIRECT;
- rel_code = ICMP_REDIR_HOST;
default:
return 0;
}
eiph = ip_hdr(skb2);
/* Try to guess incoming interface */
- rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
- eiph->saddr, 0,
- 0, 0,
- IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
+ rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, eiph->saddr,
+ 0, 0, 0, IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
if (IS_ERR(rt))
goto out;
skb2->dev = rt->dst.dev;
+ ip_rt_put(rt);
/* route "incoming" packet */
if (rt->rt_flags & RTCF_LOCAL) {
- ip_rt_put(rt);
- rt = NULL;
rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
- eiph->daddr, eiph->saddr,
- 0, 0,
- IPPROTO_IPIP,
- RT_TOS(eiph->tos), 0);
- if (IS_ERR(rt) ||
- rt->dst.dev->type != ARPHRD_TUNNEL) {
+ eiph->daddr, eiph->saddr, 0, 0,
+ IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
+ if (IS_ERR(rt) || rt->dst.dev->type != ARPHRD_TUNNEL) {
if (!IS_ERR(rt))
ip_rt_put(rt);
goto out;
}
skb_dst_set(skb2, &rt->dst);
} else {
- ip_rt_put(rt);
if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos,
skb2->dev) ||
skb_dst(skb2)->dev->type != ARPHRD_TUNNEL)
if (rel_info > dst_mtu(skb_dst(skb2)))
goto out;
- skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2, rel_info);
+ skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2,
+ rel_info);
}
- if (rel_type == ICMP_REDIRECT)
- skb_dst(skb2)->ops->redirect(skb_dst(skb2), NULL, skb2);
icmp_send(skb2, rel_type, rel_code, htonl(rel_info));
ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
- int rel_msg = 0;
+ __u32 rel_info = ntohl(info);
+ int err, rel_msg = 0;
u8 rel_type = type;
u8 rel_code = code;
- __u32 rel_info = ntohl(info);
- int err;
err = ip6_tnl_err(skb, IPPROTO_IPV6, opt, &rel_type, &rel_code,
&rel_msg, &rel_info, offset);
if ((ipv6_addr_is_multicast(laddr) ||
likely(ipv6_chk_addr(net, laddr, ldev, 0))) &&
- likely(!ipv6_chk_addr(net, raddr, NULL, 0)))
+ ((p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) ||
+ likely(!ipv6_chk_addr(net, raddr, NULL, 0))))
ret = 1;
}
return ret;
t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr);
if (t) {
- u8 tproto = ACCESS_ONCE(t->parms.proto);
+ u8 tproto = READ_ONCE(t->parms.proto);
if (tproto != ipproto && tproto != 0)
goto drop;
if (unlikely(!ipv6_chk_addr(net, laddr, ldev, 0)))
pr_warn("%s xmit: Local address not yet configured!\n",
p->name);
- else if (!ipv6_addr_is_multicast(raddr) &&
+ else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) &&
+ !ipv6_addr_is_multicast(raddr) &&
unlikely(ipv6_chk_addr(net, raddr, NULL, 0)))
pr_warn("%s xmit: Routing loop! Remote address found on this node!\n",
p->name);
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
- tproto = ACCESS_ONCE(t->parms.proto);
+ tproto = READ_ONCE(t->parms.proto);
if (tproto != IPPROTO_IPIP && tproto != 0)
return -1;
u8 tproto;
int err;
- tproto = ACCESS_ONCE(t->parms.proto);
+ tproto = READ_ONCE(t->parms.proto);
if ((tproto != IPPROTO_IPV6 && tproto != 0) ||
ip6_tnl_addr_conflict(t, ipv6h))
return -1;
.priority = 1,
};
- static void __net_exit ip6_tnl_destroy_tunnels(struct net *net)
+ static void __net_exit ip6_tnl_destroy_tunnels(struct net *net, struct list_head *list)
{
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
struct net_device *dev, *aux;
int h;
struct ip6_tnl *t;
- LIST_HEAD(list);
for_each_netdev_safe(net, dev, aux)
if (dev->rtnl_link_ops == &ip6_link_ops)
- unregister_netdevice_queue(dev, &list);
+ unregister_netdevice_queue(dev, list);
for (h = 0; h < IP6_TUNNEL_HASH_SIZE; h++) {
t = rtnl_dereference(ip6n->tnls_r_l[h]);
* been added to the list by the previous loop.
*/
if (!net_eq(dev_net(t->dev), net))
- unregister_netdevice_queue(t->dev, &list);
+ unregister_netdevice_queue(t->dev, list);
t = rtnl_dereference(t->next);
}
}
-
- unregister_netdevice_many(&list);
}
static int __net_init ip6_tnl_init_net(struct net *net)
return err;
}
- static void __net_exit ip6_tnl_exit_net(struct net *net)
+ static void __net_exit ip6_tnl_exit_batch_net(struct list_head *net_list)
{
+ struct net *net;
+ LIST_HEAD(list);
+
rtnl_lock();
- ip6_tnl_destroy_tunnels(net);
+ list_for_each_entry(net, net_list, exit_list)
+ ip6_tnl_destroy_tunnels(net, &list);
+ unregister_netdevice_many(&list);
rtnl_unlock();
}
static struct pernet_operations ip6_tnl_net_ops = {
.init = ip6_tnl_init_net,
- .exit = ip6_tnl_exit_net,
+ .exit_batch = ip6_tnl_exit_batch_net,
.id = &ip6_tnl_net_id,
.size = sizeof(struct ip6_tnl_net),
};
sta->mesh = kzalloc(sizeof(*sta->mesh), gfp);
if (!sta->mesh)
goto free;
+ sta->mesh->plink_sta = sta;
spin_lock_init(&sta->mesh->plink_lock);
if (ieee80211_vif_is_mesh(&sdata->vif) &&
!sdata->u.mesh.user_mpm)
- init_timer(&sta->mesh->plink_timer);
+ timer_setup(&sta->mesh->plink_timer, mesh_plink_timer,
+ 0);
sta->mesh->nonpeer_pm = NL80211_MESH_POWER_ACTIVE;
}
#endif
return err;
}
+ static void
+ ieee80211_recalc_p2p_go_ps_allowed(struct ieee80211_sub_if_data *sdata)
+ {
+ struct ieee80211_local *local = sdata->local;
+ bool allow_p2p_go_ps = sdata->vif.p2p;
+ struct sta_info *sta;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(sta, &local->sta_list, list) {
+ if (sdata != sta->sdata ||
+ !test_sta_flag(sta, WLAN_STA_ASSOC))
+ continue;
+ if (!sta->sta.support_p2p_ps) {
+ allow_p2p_go_ps = false;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ if (allow_p2p_go_ps != sdata->vif.bss_conf.allow_p2p_go_ps) {
+ sdata->vif.bss_conf.allow_p2p_go_ps = allow_p2p_go_ps;
+ ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_P2P_PS);
+ }
+ }
+
/*
* should be called with sta_mtx locked
* this function replaces the mutex lock
goto out_remove;
set_sta_flag(sta, WLAN_STA_INSERTED);
+
+ if (sta->sta_state >= IEEE80211_STA_ASSOC) {
+ ieee80211_recalc_min_chandef(sta->sdata);
+ if (!sta->sta.support_p2p_ps)
+ ieee80211_recalc_p2p_go_ps_allowed(sta->sdata);
+ }
+
/* accept BA sessions now */
clear_sta_flag(sta, WLAN_STA_BLOCK_BA);
}
EXPORT_SYMBOL(ieee80211_sta_set_buffered);
- static void
- ieee80211_recalc_p2p_go_ps_allowed(struct ieee80211_sub_if_data *sdata)
- {
- struct ieee80211_local *local = sdata->local;
- bool allow_p2p_go_ps = sdata->vif.p2p;
- struct sta_info *sta;
-
- rcu_read_lock();
- list_for_each_entry_rcu(sta, &local->sta_list, list) {
- if (sdata != sta->sdata ||
- !test_sta_flag(sta, WLAN_STA_ASSOC))
- continue;
- if (!sta->sta.support_p2p_ps) {
- allow_p2p_go_ps = false;
- break;
- }
- }
- rcu_read_unlock();
-
- if (allow_p2p_go_ps != sdata->vif.bss_conf.allow_p2p_go_ps) {
- sdata->vif.bss_conf.allow_p2p_go_ps = allow_p2p_go_ps;
- ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_P2P_PS);
- }
- }
-
int sta_info_move_state(struct sta_info *sta,
enum ieee80211_sta_state new_state)
{
static int sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo)
{
- u16 rate = ACCESS_ONCE(sta_get_last_rx_stats(sta)->last_rate);
+ u16 rate = READ_ONCE(sta_get_last_rx_stats(sta)->last_rate);
if (rate == STA_STATS_RATE_INVALID)
return -EINVAL;
spin_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
}
-static void ip_vs_conn_expire(unsigned long data);
+static void ip_vs_conn_expire(struct timer_list *t);
/*
* Returns hash value for IPVS connection entry
hlist_add_head_rcu(&cp->c_list, &ip_vs_conn_tab[hash]);
ret = 1;
} else {
- pr_err("%s(): request for already hashed, called from %pF\n",
+ pr_err("%s(): request for already hashed, called from %pS\n",
__func__, __builtin_return_address(0));
ret = 0;
}
static void __ip_vs_conn_put_notimer(struct ip_vs_conn *cp)
{
__ip_vs_conn_put(cp);
- ip_vs_conn_expire((unsigned long)cp);
+ ip_vs_conn_expire(&cp->timer);
}
/*
kmem_cache_free(ip_vs_conn_cachep, cp);
}
-static void ip_vs_conn_expire(unsigned long data)
+static void ip_vs_conn_expire(struct timer_list *t)
{
- struct ip_vs_conn *cp = (struct ip_vs_conn *)data;
+ struct ip_vs_conn *cp = from_timer(cp, t, timer);
struct netns_ipvs *ipvs = cp->ipvs;
/*
}
INIT_HLIST_NODE(&cp->c_list);
- setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
+ timer_setup(&cp->timer, ip_vs_conn_expire, 0);
cp->ipvs = ipvs;
cp->af = p->af;
cp->daf = dest_af;
unsigned int hash;
if (svc->flags & IP_VS_SVC_F_HASHED) {
- pr_err("%s(): request for already hashed, called from %pF\n",
+ pr_err("%s(): request for already hashed, called from %pS\n",
__func__, __builtin_return_address(0));
return 0;
}
static int ip_vs_svc_unhash(struct ip_vs_service *svc)
{
if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
- pr_err("%s(): request for unhash flagged, called from %pF\n",
+ pr_err("%s(): request for unhash flagged, called from %pS\n",
__func__, __builtin_return_address(0));
return 0;
}
return 0;
}
-static void ip_vs_dest_trash_expire(unsigned long data)
+static void ip_vs_dest_trash_expire(struct timer_list *t)
{
- struct netns_ipvs *ipvs = (struct netns_ipvs *)data;
+ struct netns_ipvs *ipvs = from_timer(ipvs, t, dest_trash_timer);
struct ip_vs_dest *dest, *next;
unsigned long now = jiffies;
seq_puts(seq,
" -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
} else {
+ struct net *net = seq_file_net(seq);
+ struct netns_ipvs *ipvs = net_ipvs(net);
const struct ip_vs_service *svc = v;
const struct ip_vs_iter *iter = seq->private;
const struct ip_vs_dest *dest;
struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler);
char *sched_name = sched ? sched->name : "none";
+ if (svc->ipvs != ipvs)
+ return 0;
if (iter->table == ip_vs_svc_table) {
#ifdef CONFIG_IP_VS_IPV6
if (svc->af == AF_INET6)
INIT_LIST_HEAD(&ipvs->dest_trash);
spin_lock_init(&ipvs->dest_trash_lock);
- setup_timer(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire,
- (unsigned long) ipvs);
+ timer_setup(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire, 0);
atomic_set(&ipvs->ftpsvc_counter, 0);
atomic_set(&ipvs->nullsvc_counter, 0);
atomic_set(&ipvs->conn_out_counter, 0);
case NL80211_CHAN_HT40MINUS:
cfg80211_chandef_create(chandef, chandef->chan,
chantype);
+ /* user input for center_freq is incorrect */
+ if (info->attrs[NL80211_ATTR_CENTER_FREQ1] &&
+ chandef->center_freq1 != nla_get_u32(
+ info->attrs[NL80211_ATTR_CENTER_FREQ1]))
+ return -EINVAL;
+ /* center_freq2 must be zero */
+ if (info->attrs[NL80211_ATTR_CENTER_FREQ2] &&
+ nla_get_u32(info->attrs[NL80211_ATTR_CENTER_FREQ2]))
+ return -EINVAL;
break;
default:
return -EINVAL;
}
}
+ static int nl80211_reload_regdb(struct sk_buff *skb, struct genl_info *info)
+ {
+ return reg_reload_regdb();
+ }
+
static int nl80211_get_mesh_config(struct sk_buff *skb,
struct genl_info *info)
{
return regulatory_pre_cac_allowed(wdev->wiphy);
}
+ static int
+ nl80211_check_scan_flags(struct wiphy *wiphy, struct wireless_dev *wdev,
+ void *request, struct nlattr **attrs,
+ bool is_sched_scan)
+ {
+ u8 *mac_addr, *mac_addr_mask;
+ u32 *flags;
+ enum nl80211_feature_flags randomness_flag;
+
+ if (!attrs[NL80211_ATTR_SCAN_FLAGS])
+ return 0;
+
+ if (is_sched_scan) {
+ struct cfg80211_sched_scan_request *req = request;
+
+ randomness_flag = wdev ?
+ NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR :
+ NL80211_FEATURE_ND_RANDOM_MAC_ADDR;
+ flags = &req->flags;
+ mac_addr = req->mac_addr;
+ mac_addr_mask = req->mac_addr_mask;
+ } else {
+ struct cfg80211_scan_request *req = request;
+
+ randomness_flag = NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR;
+ flags = &req->flags;
+ mac_addr = req->mac_addr;
+ mac_addr_mask = req->mac_addr_mask;
+ }
+
+ *flags = nla_get_u32(attrs[NL80211_ATTR_SCAN_FLAGS]);
+
+ if ((*flags & NL80211_SCAN_FLAG_LOW_PRIORITY) &&
+ !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN))
+ return -EOPNOTSUPP;
+
+ if (*flags & NL80211_SCAN_FLAG_RANDOM_ADDR) {
+ int err;
+
+ if (!(wiphy->features & randomness_flag) ||
+ (wdev && wdev->current_bss))
+ return -EOPNOTSUPP;
+
+ err = nl80211_parse_random_mac(attrs, mac_addr, mac_addr_mask);
+ if (err)
+ return err;
+ }
+
+ if ((*flags & NL80211_SCAN_FLAG_FILS_MAX_CHANNEL_TIME) &&
+ !wiphy_ext_feature_isset(wiphy,
+ NL80211_EXT_FEATURE_FILS_MAX_CHANNEL_TIME))
+ return -EOPNOTSUPP;
+
+ if ((*flags & NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP) &&
+ !wiphy_ext_feature_isset(wiphy,
+ NL80211_EXT_FEATURE_ACCEPT_BCAST_PROBE_RESP))
+ return -EOPNOTSUPP;
+
+ if ((*flags & NL80211_SCAN_FLAG_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION) &&
+ !wiphy_ext_feature_isset(wiphy,
+ NL80211_EXT_FEATURE_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION))
+ return -EOPNOTSUPP;
+
+ if ((*flags & NL80211_SCAN_FLAG_OCE_PROBE_REQ_HIGH_TX_RATE) &&
+ !wiphy_ext_feature_isset(wiphy,
+ NL80211_EXT_FEATURE_OCE_PROBE_REQ_HIGH_TX_RATE))
+ return -EOPNOTSUPP;
+
+ return 0;
+ }
+
static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
nla_get_flag(info->attrs[NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY]);
}
- if (info->attrs[NL80211_ATTR_SCAN_FLAGS]) {
- request->flags = nla_get_u32(
- info->attrs[NL80211_ATTR_SCAN_FLAGS]);
- if ((request->flags & NL80211_SCAN_FLAG_LOW_PRIORITY) &&
- !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) {
- err = -EOPNOTSUPP;
- goto out_free;
- }
-
- if (request->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) {
- if (!(wiphy->features &
- NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR)) {
- err = -EOPNOTSUPP;
- goto out_free;
- }
-
- if (wdev->current_bss) {
- err = -EOPNOTSUPP;
- goto out_free;
- }
-
- err = nl80211_parse_random_mac(info->attrs,
- request->mac_addr,
- request->mac_addr_mask);
- if (err)
- goto out_free;
- }
- }
+ err = nl80211_check_scan_flags(wiphy, wdev, request, info->attrs,
+ false);
+ if (err)
+ goto out_free;
request->no_cck =
nla_get_flag(info->attrs[NL80211_ATTR_TX_NO_CCK_RATE]);
request->ie_len);
}
- if (attrs[NL80211_ATTR_SCAN_FLAGS]) {
- request->flags = nla_get_u32(
- attrs[NL80211_ATTR_SCAN_FLAGS]);
- if ((request->flags & NL80211_SCAN_FLAG_LOW_PRIORITY) &&
- !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) {
- err = -EOPNOTSUPP;
- goto out_free;
- }
-
- if (request->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) {
- u32 flg = NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR;
-
- if (!wdev) /* must be net-detect */
- flg = NL80211_FEATURE_ND_RANDOM_MAC_ADDR;
-
- if (!(wiphy->features & flg)) {
- err = -EOPNOTSUPP;
- goto out_free;
- }
-
- if (wdev && wdev->current_bss) {
- err = -EOPNOTSUPP;
- goto out_free;
- }
-
- err = nl80211_parse_random_mac(attrs, request->mac_addr,
- request->mac_addr_mask);
- if (err)
- goto out_free;
- }
- }
+ err = nl80211_check_scan_flags(wiphy, wdev, request, attrs, true);
+ if (err)
+ goto out_free;
if (attrs[NL80211_ATTR_SCHED_SCAN_DELAY])
request->delay =
if (info->attrs[NL80211_ATTR_USE_MFP]) {
connect.mfp = nla_get_u32(info->attrs[NL80211_ATTR_USE_MFP]);
+ if (connect.mfp == NL80211_MFP_OPTIONAL &&
+ !wiphy_ext_feature_isset(&rdev->wiphy,
+ NL80211_EXT_FEATURE_MFP_OPTIONAL))
+ return -EOPNOTSUPP;
+
if (connect.mfp != NL80211_MFP_REQUIRED &&
- connect.mfp != NL80211_MFP_NO)
+ connect.mfp != NL80211_MFP_NO &&
+ connect.mfp != NL80211_MFP_OPTIONAL)
return -EINVAL;
} else {
connect.mfp = NL80211_MFP_NO;
.policy = nl80211_policy,
.flags = GENL_ADMIN_PERM,
},
+ {
+ .cmd = NL80211_CMD_RELOAD_REGDB,
+ .doit = nl80211_reload_regdb,
+ .policy = nl80211_policy,
+ .flags = GENL_ADMIN_PERM,
+ },
{
.cmd = NL80211_CMD_GET_MESH_CONFIG,
.doit = nl80211_get_mesh_config,
info->req_ie)) ||
(info->resp_ie &&
nla_put(msg, NL80211_ATTR_RESP_IE, info->resp_ie_len,
- info->resp_ie)) ||
- (info->authorized &&
- nla_put_flag(msg, NL80211_ATTR_PORT_AUTHORIZED)))
+ info->resp_ie)))
goto nla_put_failure;
genlmsg_end(msg, hdr);
NL80211_MCGRP_MLME, gfp);
return;
+ nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ nlmsg_free(msg);
+ }
+
+ void nl80211_send_port_authorized(struct cfg80211_registered_device *rdev,
+ struct net_device *netdev, const u8 *bssid)
+ {
+ struct sk_buff *msg;
+ void *hdr;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_PORT_AUTHORIZED);
+ if (!hdr) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ if (nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, bssid))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+
+ genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
+ NL80211_MCGRP_MLME, GFP_KERNEL);
+ return;
+
nla_put_failure:
genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
struct sk_buff *msg;
void *hdr;
- u32 nlportid = ACCESS_ONCE(wdev->ap_unexpected_nlportid);
+ u32 nlportid = READ_ONCE(wdev->ap_unexpected_nlportid);
if (!nlportid)
return false;