Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next

author Linus Torvalds <torvalds@linux-foundation.org>

Wed, 15 Nov 2017 19:56:19 +0000 (11:56 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Wed, 15 Nov 2017 19:56:19 +0000 (11:56 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Wed, 15 Nov 2017 19:56:19 +0000 (11:56 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Wed, 15 Nov 2017 19:56:19 +0000 (11:56 -0800)
diff --combined MAINTAINERS

index 7e9c887ad951215464705d112377e4e2266a9d76,29aa89a1837bcd089db9388180cc19fb68324562..16e1e6dc89f253338e8307fc9ff296acbb98b8d1
--- 1/MAINTAINERS
--- 2/MAINTAINERS
+++ b/MAINTAINERS
@@@ -527,6 -527,11 +527,6 @@@ W:        http://ez.analog.com/community/linux
   S:    Supported
   F:    drivers/input/misc/adxl34x.c
   
- -AEDSP16 DRIVER
- -M:    Riccardo Facchetti <fizban@tin.it>
- -S:    Maintained
- -F:    sound/oss/aedsp16.c
- -
   AF9013 MEDIA DRIVER
   M:    Antti Palosaari <crope@iki.fi>
   L:    linux-media@vger.kernel.org
@@@ -695,9 -700,9 +695,9 @@@ F: include/linux/altera_uart.
   F:    include/linux/altera_jtaguart.h
   
   AMAZON ETHERNET DRIVERS
- M:    Netanel Belgazal <netanel@annapurnalabs.com>
- R:    Saeed Bishara <saeed@annapurnalabs.com>
- R:    Zorik Machulsky <zorik@annapurnalabs.com>
+ M:    Netanel Belgazal <netanel@amazon.com>
+ R:    Saeed Bishara <saeedb@amazon.com>
+ R:    Zorik Machulsky <zorik@amazon.com>
   L:    netdev@vger.kernel.org
   S:    Supported
   F:    Documentation/networking/ena.txt
@@@ -2024,7 -2029,6 +2024,7 @@@ M:      Masahiro Yamada <yamada.masahiro@soc
   L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-uniphier.git
   S:    Maintained
+ +F:    Documentation/devicetree/bindings/gpio/gpio-uniphier.txt
   F:    arch/arm/boot/dts/uniphier*
   F:    arch/arm/include/asm/hardware/cache-uniphier.h
   F:    arch/arm/mach-uniphier/
@@@ -2032,7 -2036,6 +2032,7 @@@ F:      arch/arm/mm/cache-uniphier.
   F:    arch/arm64/boot/dts/socionext/
   F:    drivers/bus/uniphier-system-bus.c
   F:    drivers/clk/uniphier/
+ +F:    drivers/gpio/gpio-uniphier.c
   F:    drivers/i2c/busses/i2c-uniphier*
   F:    drivers/irqchip/irq-uniphier-aidet.c
   F:    drivers/pinctrl/uniphier/
@@@ -2244,7 -2247,7 +2244,7 @@@ F:      include/linux/dmaengine.
   F:    include/linux/async_tx.h
   
   AT24 EEPROM DRIVER
- -M:    Wolfram Sang <wsa@the-dreams.de>
+ +M:    Bartosz Golaszewski <brgl@bgdev.pl>
   L:    linux-i2c@vger.kernel.org
   S:    Maintained
   F:    drivers/misc/eeprom/at24.c
@@@ -2559,12 -2562,10 +2559,12 @@@ S:   Maintaine
   F:    drivers/net/hamradio/baycom*
   
   BCACHE (BLOCK LAYER CACHE)
+ +M:    Michael Lyle <mlyle@lyle.org>
   M:    Kent Overstreet <kent.overstreet@gmail.com>
   L:    linux-bcache@vger.kernel.org
   W:    http://bcache.evilpiepirate.org
- -S:    Orphan
+ +C:    irc://irc.oftc.net/bcache
+ +S:    Maintained
   F:    drivers/md/bcache/
   
   BDISP ST MEDIA DRIVER
@@@ -2712,6 -2713,7 +2712,7 @@@ L:      linux-kernel@vger.kernel.or
   S:    Supported
   F:    arch/x86/net/bpf_jit*
   F:    Documentation/networking/filter.txt
+ F:    Documentation/bpf/
   F:    include/linux/bpf*
   F:    include/linux/filter.h
   F:    include/uapi/linux/bpf*
@@@ -2724,7 -2726,7 +2725,7 @@@ F:      net/core/filter.
   F:    net/sched/act_bpf.c
   F:    net/sched/cls_bpf.c
   F:    samples/bpf/
- F:    tools/net/bpf*
+ F:    tools/bpf/
   F:    tools/testing/selftests/bpf/
   
   BROADCOM B44 10/100 ETHERNET DRIVER
@@@ -2895,14 -2897,8 +2896,15 @@@ S:    Supporte
   F:    drivers/gpio/gpio-brcmstb.c
   F:    Documentation/devicetree/bindings/gpio/brcm,brcmstb-gpio.txt
   
+ +BROADCOM BRCMSTB USB2 and USB3 PHY DRIVER
+ +M:    Al Cooper <alcooperx@gmail.com>
+ +L:    linux-kernel@vger.kernel.org
+ +L:    bcm-kernel-feedback-list@broadcom.com
+ +S:    Maintained
+ +F:    drivers/phy/broadcom/phy-brcm-usb*
+ +
   BROADCOM GENET ETHERNET DRIVER
+ M:    Doug Berger <opendmb@gmail.com>
   M:    Florian Fainelli <f.fainelli@gmail.com>
   L:    netdev@vger.kernel.org
   S:    Supported
@@@ -3088,7 -3084,6 +3090,6 @@@ F:      arch/c6x
   
   CA8210 IEEE-802.15.4 RADIO DRIVER
   M:    Harry Morris <h.morris@cascoda.com>
- M:    linuxdev@cascoda.com
   L:    linux-wpan@vger.kernel.org
   W:    https://github.com/Cascoda/ca8210-linux.git
   S:    Maintained
@@@ -3335,17 -3330,22 +3336,22 @@@ S:   Maintaine
   F:    drivers/auxdisplay/cfag12864bfb.c
   F:    include/linux/cfag12864b.h
   
- CFG80211 and NL80211
+ 802.11 (including CFG80211/NL80211)
   M:    Johannes Berg <johannes@sipsolutions.net>
   L:    linux-wireless@vger.kernel.org
   W:    http://wireless.kernel.org/
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211.git
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211-next.git
   S:    Maintained
+ F:    net/wireless/
   F:    include/uapi/linux/nl80211.h
+ F:    include/linux/ieee80211.h
+ F:    include/net/wext.h
   F:    include/net/cfg80211.h
- F:    net/wireless/*
- X:    net/wireless/wext*
+ F:    include/net/iw_handler.h
+ F:    include/net/ieee80211_radiotap.h
+ F:    Documentation/driver-api/80211/cfg80211.rst
+ F:    Documentation/networking/regulatory.txt
   
   CHAR and MISC DRIVERS
   M:    Arnd Bergmann <arnd@arndb.de>
@@@ -3421,7 -3421,7 +3427,7 @@@ F:      drivers/scsi/snic
   CISCO VIC ETHERNET NIC DRIVER
   M:    Christian Benvenuti <benve@cisco.com>
   M:    Govindarajulu Varadarajan <_govind@gmx.com>
- M:    Neel Patel <neepatel@cisco.com>
+ M:    Parvi Kaustubhi <pkaustub@cisco.com>
   S:    Supported
   F:    drivers/net/ethernet/cisco/enic/
   
@@@ -3450,8 -3450,7 +3456,8 @@@ M:      Thomas Gleixner <tglx@linutronix.de
   L:    linux-kernel@vger.kernel.org
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/core
   S:    Supported
- -F:    drivers/clocksource
+ +F:    drivers/clocksource/
+ +F:    Documentation/devicetree/bindings/timer/
   
   CMPC ACPI DRIVER
   M:    Thadeu Lima de Souza Cascardo <cascardo@holoscopio.com>
@@@ -3472,7 -3471,7 +3478,7 @@@ COCCINELLE/Semantic Patches (SmPL
   M:    Julia Lawall <Julia.Lawall@lip6.fr>
   M:    Gilles Muller <Gilles.Muller@lip6.fr>
   M:    Nicolas Palix <nicolas.palix@imag.fr>
- -M:    Michal Marek <mmarek@suse.com>
+ +M:    Michal Marek <michal.lkml@markovi.net>
   L:    cocci@systeme.lip6.fr (moderated for non-subscribers)
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/mmarek/kbuild.git misc
   W:    http://coccinelle.lip6.fr/
@@@ -3643,8 -3642,6 +3649,8 @@@ F:      drivers/cpufreq/arm_big_little_dt.
   
   CPU POWER MONITORING SUBSYSTEM
   M:    Thomas Renninger <trenn@suse.com>
+ +M:    Shuah Khan <shuahkh@osg.samsung.com>
+ +M:    Shuah Khan <shuah@kernel.org>
   L:    linux-pm@vger.kernel.org
   S:    Maintained
   F:    tools/power/cpupower/
@@@ -4100,8 -4097,6 +4106,8 @@@ T:      git git://git.kernel.org/pub/scm/lin
   T:    quilt http://people.redhat.com/agk/patches/linux/editing/
   S:    Maintained
   F:    Documentation/device-mapper/
+ +F:    drivers/md/Makefile
+ +F:    drivers/md/Kconfig
   F:    drivers/md/dm*
   F:    drivers/md/persistent-data/
   F:    include/linux/device-mapper.h
@@@ -4245,7 -4240,7 +4251,7 @@@ S:      Maintaine
   F:    drivers/dma/
   F:    include/linux/dmaengine.h
   F:    Documentation/devicetree/bindings/dma/
- -F:    Documentation/dmaengine/
+ +F:    Documentation/driver-api/dmaengine/
   T:    git git://git.infradead.org/users/vkoul/slave-dma.git
   
   DMA MAPPING HELPERS
@@@ -4917,19 -4912,13 +4923,19 @@@ L:   linux-edac@vger.kernel.or
   S:    Maintained
   F:    drivers/edac/highbank*
   
- -EDAC-CAVIUM
+ +EDAC-CAVIUM OCTEON
   M:    Ralf Baechle <ralf@linux-mips.org>
   M:    David Daney <david.daney@cavium.com>
   L:    linux-edac@vger.kernel.org
   L:    linux-mips@linux-mips.org
   S:    Supported
   F:    drivers/edac/octeon_edac*
+ +
+ +EDAC-CAVIUM THUNDERX
+ +M:    David Daney <david.daney@cavium.com>
+ +M:    Jan Glauber <jglauber@cavium.com>
+ +L:    linux-edac@vger.kernel.org
+ +S:    Supported
   F:    drivers/edac/thunderx_edac*
   
   EDAC-CORE
@@@ -5230,7 -5219,8 +5236,7 @@@ F:      fs/ext4
   
   Extended Verification Module (EVM)
   M:    Mimi Zohar <zohar@linux.vnet.ibm.com>
- -L:    linux-ima-devel@lists.sourceforge.net
- -L:    linux-security-module@vger.kernel.org
+ +L:    linux-integrity@vger.kernel.org
   S:    Supported
   F:    security/integrity/evm/
   
@@@ -5485,7 -5475,7 +5491,7 @@@ F:      include/uapi/linux/fb.
   
   FREESCALE CAAM (Cryptographic Acceleration and Assurance Module) DRIVER
   M:    Horia Geantă <horia.geanta@nxp.com>
- -M:    Dan Douglass <dan.douglass@nxp.com>
+ +M:    Aymen Sghaier <aymen.sghaier@nxp.com>
   L:    linux-crypto@vger.kernel.org
   S:    Maintained
   F:    drivers/crypto/caam/
@@@ -5665,7 -5655,6 +5671,7 @@@ T:      git git://git.kernel.org/pub/scm/lin
   S:    Supported
   F:    fs/crypto/
   F:    include/linux/fscrypt*.h
+ +F:    Documentation/filesystems/fscrypt.rst
   
   FUJITSU FR-V (FRV) PORT
   S:    Orphan
@@@ -6259,13 -6248,6 +6265,13 @@@ S:    Maintaine
   F:    drivers/net/ethernet/hisilicon/
   F:    Documentation/devicetree/bindings/net/hisilicon*.txt
   
+ +HISILICON PMU DRIVER
+ +M:    Shaokun Zhang <zhangshaokun@hisilicon.com>
+ +W:    http://www.hisilicon.com
+ +S:    Supported
+ +F:    drivers/perf/hisilicon
+ +F:    Documentation/perf/hisi-pmu.txt
+ +
   HISILICON ROCE DRIVER
   M:    Lijun Ou <oulijun@huawei.com>
   M:    Wei Hu(Xavier) <xavier.huwei@huawei.com>
@@@ -6865,7 -6847,9 +6871,7 @@@ L:      linux-crypto@vger.kernel.or
   INTEGRITY MEASUREMENT ARCHITECTURE (IMA)
   M:    Mimi Zohar <zohar@linux.vnet.ibm.com>
   M:    Dmitry Kasatkin <dmitry.kasatkin@gmail.com>
- -L:    linux-ima-devel@lists.sourceforge.net
- -L:    linux-ima-user@lists.sourceforge.net
- -L:    linux-security-module@vger.kernel.org
+ +L:    linux-integrity@vger.kernel.org
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/zohar/linux-integrity.git
   S:    Supported
   F:    security/integrity/ima/
@@@ -7455,8 -7439,10 +7461,8 @@@ F:     mm/kasan
   F:    scripts/Makefile.kasan
   
   KCONFIG
- -M:    "Yann E. MORIN" <yann.morin.1998@free.fr>
   L:    linux-kbuild@vger.kernel.org
- -T:    git git://gitorious.org/linux-kconfig/linux-kconfig
- -S:    Maintained
+ +S:    Orphan
   F:    Documentation/kbuild/kconfig-language.txt
   F:    scripts/kconfig/
   
@@@ -7485,7 -7471,7 +7491,7 @@@ F:      fs/autofs4
   
   KERNEL BUILD + files below scripts/ (unless maintained elsewhere)
   M:    Masahiro Yamada <yamada.masahiro@socionext.com>
- -M:    Michal Marek <mmarek@suse.com>
+ +M:    Michal Marek <michal.lkml@markovi.net>
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild.git
   L:    linux-kbuild@vger.kernel.org
   S:    Maintained
@@@ -7646,7 -7632,8 +7652,7 @@@ F:      kernel/kexec
   
   KEYS-ENCRYPTED
   M:    Mimi Zohar <zohar@linux.vnet.ibm.com>
- -M:    David Safford <safford@us.ibm.com>
- -L:    linux-security-module@vger.kernel.org
+ +L:    linux-integrity@vger.kernel.org
   L:    keyrings@vger.kernel.org
   S:    Supported
   F:    Documentation/security/keys/trusted-encrypted.rst
@@@ -7654,8 -7641,9 +7660,8 @@@ F:      include/keys/encrypted-type.
   F:    security/keys/encrypted-keys/
   
   KEYS-TRUSTED
- -M:    David Safford <safford@us.ibm.com>
   M:    Mimi Zohar <zohar@linux.vnet.ibm.com>
- -L:    linux-security-module@vger.kernel.org
+ +L:    linux-integrity@vger.kernel.org
   L:    keyrings@vger.kernel.org
   S:    Supported
   F:    Documentation/security/keys/trusted-encrypted.rst
@@@ -8231,6 -8219,7 +8237,7 @@@ F:      Documentation/networking/mac80211-in
   F:    include/net/mac80211.h
   F:    net/mac80211/
   F:    drivers/net/wireless/mac80211_hwsim.[ch]
+ F:    Documentation/networking/mac80211_hwsim/README
   
   MAILBOX API
   M:    Jassi Brar <jassisinghbrar@gmail.com>
@@@ -9222,6 -9211,12 +9229,6 @@@ F:     include/linux/dt-bindings/mux
   F:    include/linux/mux/
   F:    drivers/mux/
   
- -MULTISOUND SOUND DRIVER
- -M:    Andrew Veliath <andrewtv@usa.net>
- -S:    Maintained
- -F:    Documentation/sound/oss/MultiSound
- -F:    sound/oss/msnd*
- -
   MULTITECH MULTIPORT CARD (ISICOM)
   S:    Orphan
   F:    drivers/tty/isicom.c
@@@ -9425,6 -9420,7 +9432,7 @@@ M:      Florian Fainelli <f.fainelli@gmail.c
   S:    Maintained
   F:    net/dsa/
   F:    include/net/dsa.h
+ F:    include/linux/dsa/
   F:    drivers/net/dsa/
   
   NETWORKING [GENERAL]
@@@ -9445,8 -9441,8 +9453,8 @@@ F:      include/uapi/linux/in.
   F:    include/uapi/linux/net.h
   F:    include/uapi/linux/netdevice.h
   F:    include/uapi/linux/net_namespace.h
- F:    tools/net/
   F:    tools/testing/selftests/net/
+ F:    lib/net_utils.c
   F:    lib/random32.c
   
   NETWORKING [IPSEC]
@@@ -10048,11 -10044,7 +10056,11 @@@ T: git git://github.com/openrisc/linux.
   L:    openrisc@lists.librecores.org
   W:    http://openrisc.io
   S:    Maintained
+ +F:    Documentation/devicetree/bindings/openrisc/
+ +F:    Documentation/openrisc/
   F:    arch/openrisc/
+ +F:    drivers/irqchip/irq-ompic.c
+ +F:    drivers/irqchip/irq-or1k-*
   
   OPENVSWITCH
   M:    Pravin Shelar <pshelar@nicira.com>
@@@ -10070,7 -10062,7 +10078,7 @@@ M:   Stephen Boyd <sboyd@codeaurora.org
   L:    linux-pm@vger.kernel.org
   S:    Maintained
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/vireshk/pm.git
- -F:    drivers/base/power/opp/
+ +F:    drivers/opp/
   F:    include/linux/pm_opp.h
   F:    Documentation/power/opp.txt
   F:    Documentation/devicetree/bindings/opp/
@@@ -11061,6 -11053,7 +11069,6 @@@ F:   drivers/mtd/nand/pxa3xx_nand.
   
   QAT DRIVER
   M:    Giovanni Cabiddu <giovanni.cabiddu@intel.com>
- -M:    Salvatore Benedetto <salvatore.benedetto@intel.com>
   L:    qat-linux@intel.com
   S:    Supported
   F:    drivers/crypto/qat/
@@@ -11520,6 -11513,7 +11528,7 @@@ T:   git git://git.kernel.org/pub/scm/lin
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211-next.git
   S:    Maintained
   F:    Documentation/rfkill.txt
+ F:    Documentation/ABI/stable/sysfs-class-rfkill
   F:    net/rfkill/
   
   RHASHTABLE
@@@ -11541,16 -11535,6 +11550,16 @@@ S: Maintaine
   F:    drivers/mtd/nand/r852.c
   F:    drivers/mtd/nand/r852.h
   
+ +RISC-V ARCHITECTURE
+ +M:    Palmer Dabbelt <palmer@sifive.com>
+ +M:    Albert Ou <albert@sifive.com>
+ +L:    patches@groups.riscv.org
+ +T:    git https://github.com/riscv/riscv-linux
+ +S:    Supported
+ +F:    arch/riscv/
+ +K:    riscv
+ +N:    riscv
+ +
   ROCCAT DRIVERS
   M:    Stefan Achatz <erazor_de@users.sourceforge.net>
   W:    http://sourceforge.net/projects/roccat/
@@@ -11803,7 -11787,7 +11812,7 @@@ L:   linux-crypto@vger.kernel.or
   L:    linux-samsung-soc@vger.kernel.org
   S:    Maintained
   F:    drivers/crypto/exynos-rng.c
- -F:    Documentation/devicetree/bindings/rng/samsung,exynos-rng4.txt
+ +F:    Documentation/devicetree/bindings/crypto/samsung,exynos-rng4.txt
   
   SAMSUNG FRAMEBUFFER DRIVER
   M:    Jingoo Han <jingoohan1@gmail.com>
@@@ -12086,15 -12070,10 +12095,15 @@@ L:        linux-mmc@vger.kernel.or
   S:    Maintained
   F:    drivers/mmc/host/sdhci-spear.c
   
+ +SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) TI OMAP DRIVER
+ +M:    Kishon Vijay Abraham I <kishon@ti.com>
+ +L:    linux-mmc@vger.kernel.org
+ +S:    Maintained
+ +F:    drivers/mmc/host/sdhci-omap.c
+ +
   SECURE ENCRYPTING DEVICE (SED) OPAL DRIVER
   M:    Scott Bauer <scott.bauer@intel.com>
   M:    Jonathan Derrick <jonathan.derrick@intel.com>
- -M:    Rafael Antognolli <rafael.antognolli@intel.com>
   L:    linux-block@vger.kernel.org
   S:    Supported
   F:    block/sed*
@@@ -12495,10 -12474,7 +12504,10 @@@ M: Shaohua Li <shli@kernel.org
   L:    linux-raid@vger.kernel.org
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git
   S:    Supported
- -F:    drivers/md/
+ +F:    drivers/md/Makefile
+ +F:    drivers/md/Kconfig
+ +F:    drivers/md/md*
+ +F:    drivers/md/raid*
   F:    include/linux/raid/
   F:    include/uapi/linux/raid/
   
@@@ -12951,16 -12927,9 +12960,16 @@@ F: arch/arc/plat-axs10
   F:    arch/arc/boot/dts/ax*
   F:    Documentation/devicetree/bindings/arc/axs10*
   
+ +SYNOPSYS DESIGNWARE APB GPIO DRIVER
+ +M:    Hoan Tran <hotran@apm.com>
+ +L:    linux-gpio@vger.kernel.org
+ +S:    Maintained
+ +F:    drivers/gpio/gpio-dwapb.c
+ +F:    Documentation/devicetree/bindings/gpio/snps-dwapb-gpio.txt
+ +
   SYNOPSYS DESIGNWARE DMAC DRIVER
   M:    Viresh Kumar <vireshk@kernel.org>
- -M:    Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+ +R:    Andy Shevchenko <andriy.shevchenko@linux.intel.com>
   S:    Maintained
   F:    include/linux/dma/dw.h
   F:    include/linux/platform_data/dma-dw.h
@@@ -13343,6 -13312,15 +13352,15 @@@ M: Mika Westerberg <mika.westerberg@lin
   M:    Yehezkel Bernat <yehezkel.bernat@intel.com>
   S:    Maintained
   F:    drivers/thunderbolt/
+ F:    include/linux/thunderbolt.h
+ 
+ THUNDERBOLT NETWORK DRIVER
+ M:    Michael Jamet <michael.jamet@intel.com>
+ M:    Mika Westerberg <mika.westerberg@linux.intel.com>
+ M:    Yehezkel Bernat <yehezkel.bernat@intel.com>
+ L:    netdev@vger.kernel.org
+ S:    Maintained
+ F:    drivers/net/thunderbolt.c
   
   THUNDERX GPIO DRIVER
   M:    David Daney <david.daney@cavium.com>
@@@ -13799,7 -13777,7 +13817,7 @@@ UDRAW TABLE
   M:    Bastien Nocera <hadess@hadess.net>
   L:    linux-input@vger.kernel.org
   S:    Maintained
- -F:    drivers/hid/hid-udraw.c
+ +F:    drivers/hid/hid-udraw-ps3.c
   
   UFS FILESYSTEM
   M:    Evgeniy Dushistov <dushistov@mail.ru>
@@@ -14322,12 -14300,15 +14340,15 @@@ S:        Maintaine
   F:    include/linux/virtio_vsock.h
   F:    include/uapi/linux/virtio_vsock.h
   F:    include/uapi/linux/vsockmon.h
+ F:    include/uapi/linux/vm_sockets_diag.h
+ F:    net/vmw_vsock/diag.c
   F:    net/vmw_vsock/af_vsock_tap.c
   F:    net/vmw_vsock/virtio_transport_common.c
   F:    net/vmw_vsock/virtio_transport.c
   F:    drivers/net/vsockmon.c
   F:    drivers/vhost/vsock.c
   F:    drivers/vhost/vsock.h
+ F:    tools/testing/vsock/
   
   VIRTIO CONSOLE DRIVER
   M:    Amit Shah <amit@kernel.org>
@@@ -14368,7 -14349,6 +14389,7 @@@ L:   virtualization@lists.linux-foundatio
   L:    kvm@vger.kernel.org
   S:    Supported
   F:    drivers/s390/virtio/
+ +F:    arch/s390/include/uapi/asm/virtio-ccw.h
   
   VIRTIO GPU DRIVER
   M:    David Airlie <airlied@linux.ie>
@@@ -14591,7 -14571,6 +14612,6 @@@ L:   wil6210@qca.qualcomm.co
   S:    Supported
   W:    http://wireless.kernel.org/en/users/Drivers/wil6210
   F:    drivers/net/wireless/ath/wil6210/
- F:    include/uapi/linux/wil6210_uapi.h
   
   WIMAX STACK
   M:    Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
@@@ -14642,7 -14621,6 +14662,7 @@@ F:   Documentation/devicetree/bindings/ex
   F:    Documentation/devicetree/bindings/regulator/arizona-regulator.txt
   F:    Documentation/devicetree/bindings/mfd/arizona.txt
   F:    Documentation/devicetree/bindings/mfd/wm831x.txt
+ +F:    Documentation/devicetree/bindings/sound/wlf,arizona.txt
   F:    arch/arm/mach-s3c64xx/mach-crag6410*
   F:    drivers/clk/clk-wm83*.c
   F:    drivers/extcon/extcon-arizona.c
diff --combined drivers/atm/idt77105.c

index 57af9fd198e4e756b4646e5153f3f51b6b741626,d781b3f87693855b0b8d07634ab07d11f868669b..909744eb7bab419eec2dc71e2c79c87231812ce3
--- 1/drivers/atm/idt77105.c
--- 2/drivers/atm/idt77105.c
+++ b/drivers/atm/idt77105.c
@@@ -49,8 -49,8 +49,8 @@@ static void idt77105_stats_timer_func(u
   static void idt77105_restart_timer_func(unsigned long);
   
   
- -static DEFINE_TIMER(stats_timer, idt77105_stats_timer_func, 0, 0);
- -static DEFINE_TIMER(restart_timer, idt77105_restart_timer_func, 0, 0);
+ +static DEFINE_TIMER(stats_timer, idt77105_stats_timer_func);
+ +static DEFINE_TIMER(restart_timer, idt77105_restart_timer_func);
   static int start_timer = 1;
   static struct idt77105_priv *idt77105_all = NULL;
   
@@@ -306,11 -306,9 +306,9 @@@ static int idt77105_start(struct atm_de
         if (start_timer) {
                 start_timer = 0;
                   
-               setup_timer(&stats_timer, idt77105_stats_timer_func, 0UL);
                 stats_timer.expires = jiffies+IDT77105_STATS_TIMER_PERIOD;
                 add_timer(&stats_timer);
                   
-               setup_timer(&restart_timer, idt77105_restart_timer_func, 0UL);
                 restart_timer.expires = jiffies+IDT77105_RESTART_TIMER_PERIOD;
                 add_timer(&restart_timer);
         }
diff --combined drivers/atm/iphase.c

index ad6b582c268e1c2dbc6f20051eb49170b18dd8be,a785c6e697579579e2009a1a1cdc32eeae5bcca4..12f646760b6827e3ddf8a63290cfb22a08e51d0b
--- 1/drivers/atm/iphase.c
--- 2/drivers/atm/iphase.c
+++ b/drivers/atm/iphase.c
@@@ -76,7 -76,7 +76,7 @@@ static IADEV *ia_dev[8]
   static struct atm_dev *_ia_dev[8];
   static int iadev_count;
   static void ia_led_timer(unsigned long arg);
- -static DEFINE_TIMER(ia_timer, ia_led_timer, 0, 0);
+ +static DEFINE_TIMER(ia_timer, ia_led_timer);
   static int IA_TX_BUF = DFL_TX_BUFFERS, IA_TX_BUF_SZ = DFL_TX_BUF_SZ;
   static int IA_RX_BUF = DFL_RX_BUFFERS, IA_RX_BUF_SZ = DFL_RX_BUF_SZ;
   static uint IADebugFlag = /* IF_IADBG_ERR | IF_IADBG_CBR| IF_IADBG_INIT_ADAPTER
@@@ -880,7 -880,7 +880,7 @@@ static void ia_phy_write(struct iadev_p
   
   static void ia_suni_pm7345_init_ds3(struct iadev_priv *iadev)
   {
-       static const struct ia_reg suni_ds3_init [] = {
+       static const struct ia_reg suni_ds3_init[] = {
                 { SUNI_DS3_FRM_INTR_ENBL,       0x17 },
                 { SUNI_DS3_FRM_CFG,             0x01 },
                 { SUNI_DS3_TRAN_CFG,            0x01 },
@@@ -898,7 -898,7 +898,7 @@@
   
   static void ia_suni_pm7345_init_e3(struct iadev_priv *iadev)
   {
-       static const struct ia_reg suni_e3_init [] = {
+       static const struct ia_reg suni_e3_init[] = {
                 { SUNI_E3_FRM_FRAM_OPTIONS,             0x04 },
                 { SUNI_E3_FRM_MAINT_OPTIONS,            0x20 },
                 { SUNI_E3_FRM_FRAM_INTR_ENBL,           0x1d },
@@@ -918,7 -918,7 +918,7 @@@
   
   static void ia_suni_pm7345_init(struct iadev_priv *iadev)
   {
-       static const struct ia_reg suni_init [] = {
+       static const struct ia_reg suni_init[] = {
                 /* Enable RSOP loss of signal interrupt. */
                 { SUNI_INTR_ENBL,               0x28 },
                 /* Clear error counters. */
diff --combined drivers/net/bonding/bond_main.c

index 08a4f57cf40966d1ca45cb51c1b8131aa80d65f2,99a3b0cd5bd690e04cec718739920f76890f3a8f..c669554d70bb7c7ba2fe3091ed1c58bd3026229f
--- 1/drivers/net/bonding/bond_main.c
--- 2/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@@ -1167,7 -1167,7 +1167,7 @@@ static rx_handler_result_t bond_handle_
         slave = bond_slave_get_rcu(skb->dev);
         bond = slave->bond;
   
- -      recv_probe = ACCESS_ONCE(bond->recv_probe);
+ +      recv_probe = READ_ONCE(bond->recv_probe);
         if (recv_probe) {
                 ret = recv_probe(skb, bond, slave);
                 if (ret == RX_HANDLER_CONSUMED) {
@@@ -1217,25 -1217,21 +1217,21 @@@ static enum netdev_lag_tx_type bond_lag
         }
   }
   
- static int bond_master_upper_dev_link(struct bonding *bond, struct slave *slave)
+ static int bond_master_upper_dev_link(struct bonding *bond, struct slave *slave,
+                                     struct netlink_ext_ack *extack)
   {
         struct netdev_lag_upper_info lag_upper_info;
-       int err;
   
         lag_upper_info.tx_type = bond_lag_tx_type(bond);
-       err = netdev_master_upper_dev_link(slave->dev, bond->dev, slave,
-                                          &lag_upper_info);
-       if (err)
-               return err;
-       rtmsg_ifinfo(RTM_NEWLINK, slave->dev, IFF_SLAVE, GFP_KERNEL);
-       return 0;
+ 
+       return netdev_master_upper_dev_link(slave->dev, bond->dev, slave,
+                                           &lag_upper_info, extack);
   }
   
   static void bond_upper_dev_unlink(struct bonding *bond, struct slave *slave)
   {
         netdev_upper_dev_unlink(slave->dev, bond->dev);
         slave->dev->flags &= ~IFF_SLAVE;
-       rtmsg_ifinfo(RTM_NEWLINK, slave->dev, IFF_SLAVE, GFP_KERNEL);
   }
   
   static struct slave *bond_alloc_slave(struct bonding *bond)
@@@ -1328,7 -1324,8 +1324,8 @@@ void bond_lower_state_changed(struct sl
   }
   
   /* enslave device <slave> to bond device <master> */
- int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
+ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
+                struct netlink_ext_ack *extack)
   {
         struct bonding *bond = netdev_priv(bond_dev);
         const struct net_device_ops *slave_ops = slave_dev->netdev_ops;
@@@ -1346,12 -1343,14 +1343,14 @@@
   
         /* already in-use? */
         if (netdev_is_rx_handler_busy(slave_dev)) {
+               NL_SET_ERR_MSG(extack, "Device is in use and cannot be enslaved");
                 netdev_err(bond_dev,
                            "Error: Device is in use and cannot be enslaved\n");
                 return -EBUSY;
         }
   
         if (bond_dev == slave_dev) {
+               NL_SET_ERR_MSG(extack, "Cannot enslave bond to itself.");
                 netdev_err(bond_dev, "cannot enslave bond to itself.\n");
                 return -EPERM;
         }
@@@ -1362,6 -1361,7 +1361,7 @@@
                 netdev_dbg(bond_dev, "%s is NETIF_F_VLAN_CHALLENGED\n",
                            slave_dev->name);
                 if (vlan_uses_dev(bond_dev)) {
+                       NL_SET_ERR_MSG(extack, "Can not enslave VLAN challenged device to VLAN enabled bond");
                         netdev_err(bond_dev, "Error: cannot enslave VLAN challenged slave %s on VLAN enabled bond %s\n",
                                    slave_dev->name, bond_dev->name);
                         return -EPERM;
@@@ -1381,6 -1381,7 +1381,7 @@@
          * enslaving it; the old ifenslave will not.
          */
         if (slave_dev->flags & IFF_UP) {
+               NL_SET_ERR_MSG(extack, "Device can not be enslaved while up");
                 netdev_err(bond_dev, "%s is up - this may be due to an out of date ifenslave\n",
                            slave_dev->name);
                 return -EPERM;
@@@ -1421,6 -1422,7 +1422,7 @@@
                                                  bond_dev);
                 }
         } else if (bond_dev->type != slave_dev->type) {
+               NL_SET_ERR_MSG(extack, "Device type is different from other slaves");
                 netdev_err(bond_dev, "%s ether type (%d) is different from other slaves (%d), can not enslave it\n",
                            slave_dev->name, slave_dev->type, bond_dev->type);
                 return -EINVAL;
@@@ -1428,6 -1430,7 +1430,7 @@@
   
         if (slave_dev->type == ARPHRD_INFINIBAND &&
             BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {
+               NL_SET_ERR_MSG(extack, "Only active-backup mode is supported for infiniband slaves");
                 netdev_warn(bond_dev, "Type (%d) supports only active-backup mode\n",
                             slave_dev->type);
                 res = -EOPNOTSUPP;
@@@ -1443,6 -1446,7 +1446,7 @@@
                                 bond->params.fail_over_mac = BOND_FOM_ACTIVE;
                                 netdev_warn(bond_dev, "Setting fail_over_mac to active for active-backup mode\n");
                         } else {
+                               NL_SET_ERR_MSG(extack, "Slave device does not support setting the MAC address, but fail_over_mac is not set to active");
                                 netdev_err(bond_dev, "The slave device specified does not support setting the MAC address, but fail_over_mac is not set to active\n");
                                 res = -EOPNOTSUPP;
                                 goto err_undo_flags;
@@@ -1709,7 -1713,7 +1713,7 @@@
                 goto err_detach;
         }
   
-       res = bond_master_upper_dev_link(bond, new_slave);
+       res = bond_master_upper_dev_link(bond, new_slave, extack);
         if (res) {
                 netdev_dbg(bond_dev, "Error %d calling bond_master_upper_dev_link\n", res);
                 goto err_unregister;
@@@ -2492,7 -2496,8 +2496,8 @@@ int bond_arp_rcv(const struct sk_buff *
         struct slave *curr_active_slave, *curr_arp_slave;
         unsigned char *arp_ptr;
         __be32 sip, tip;
-       int alen, is_arp = skb->protocol == __cpu_to_be16(ETH_P_ARP);
+       int is_arp = skb->protocol == __cpu_to_be16(ETH_P_ARP);
+       unsigned int alen;
   
         if (!slave_do_arp_validate(bond, slave)) {
                 if ((slave_do_arp_validate_only(bond) && is_arp) ||
@@@ -3073,7 -3078,16 +3078,16 @@@ static int bond_slave_netdev_event(unsi
                 break;
         case NETDEV_UP:
         case NETDEV_CHANGE:
-               bond_update_speed_duplex(slave);
+               /* For 802.3ad mode only:
+                * Getting invalid Speed/Duplex values here will put slave
+                * in weird state. So mark it as link-down for the time
+                * being and let link-monitoring (miimon) set it right when
+                * correct speeds/duplex are available.
+                */
+               if (bond_update_speed_duplex(slave) &&
+                   BOND_MODE(bond) == BOND_MODE_8023AD)
+                       slave->link = BOND_LINK_DOWN;
+ 
                 if (BOND_MODE(bond) == BOND_MODE_8023AD)
                         bond_3ad_adapter_speed_duplex_changed(slave);
                 /* Fallthrough */
@@@ -3483,7 -3497,7 +3497,7 @@@ static int bond_do_ioctl(struct net_dev
         switch (cmd) {
         case BOND_ENSLAVE_OLD:
         case SIOCBONDENSLAVE:
-               res = bond_enslave(bond_dev, slave_dev);
+               res = bond_enslave(bond_dev, slave_dev, NULL);
                 break;
         case BOND_RELEASE_OLD:
         case SIOCBONDRELEASE:
@@@ -3811,7 -3825,7 +3825,7 @@@ static int bond_xmit_roundrobin(struct 
                 else
                         bond_xmit_slave_id(bond, skb, 0);
         } else {
- -              int slave_cnt = ACCESS_ONCE(bond->slave_cnt);
+ +              int slave_cnt = READ_ONCE(bond->slave_cnt);
   
                 if (likely(slave_cnt)) {
                         slave_id = bond_rr_gen_slave_id(bond);
@@@ -3973,7 -3987,7 +3987,7 @@@ static int bond_3ad_xor_xmit(struct sk_
         unsigned int count;
   
         slaves = rcu_dereference(bond->slave_arr);
- -      count = slaves ? ACCESS_ONCE(slaves->count) : 0;
+ +      count = slaves ? READ_ONCE(slaves->count) : 0;
         if (likely(count)) {
                 slave = slaves->arr[bond_xmit_hash(bond, skb) % count];
                 bond_dev_queue_xmit(bond, skb, slave->dev);
diff --combined drivers/net/ethernet/chelsio/cxgb4/sge.c

index fe5cedd96a248da0f24dfd766b1398bbb1d5085a,486b01fe23bd6de0f44b2cbd7a81f71ada4c0ef8..922f2f93778930226c364dc743e0cc8429c54b0e
--- 1/drivers/net/ethernet/chelsio/cxgb4/sge.c
--- 2/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@@ -405,7 -405,7 +405,7 @@@ void free_tx_desc(struct adapter *adap
    */
   static inline int reclaimable(const struct sge_txq *q)
   {
- -      int hw_cidx = ntohs(ACCESS_ONCE(q->stat->cidx));
+ +      int hw_cidx = ntohs(READ_ONCE(q->stat->cidx));
         hw_cidx -= q->cidx;
         return hw_cidx < 0 ? hw_cidx + q->size : hw_cidx;
   }
@@@ -1375,7 -1375,7 +1375,7 @@@ out_free:       dev_kfree_skb_any(skb)
    */
   static inline void reclaim_completed_tx_imm(struct sge_txq *q)
   {
- -      int hw_cidx = ntohs(ACCESS_ONCE(q->stat->cidx));
+ +      int hw_cidx = ntohs(READ_ONCE(q->stat->cidx));
         int reclaim = hw_cidx - q->cidx;
   
         if (reclaim < 0)
@@@ -1537,13 -1537,7 +1537,13 @@@ int t4_mgmt_tx(struct adapter *adap, st
    */
   static inline int is_ofld_imm(const struct sk_buff *skb)
   {
- -      return skb->len <= MAX_IMM_TX_PKT_LEN;
+ +      struct work_request_hdr *req = (struct work_request_hdr *)skb->data;
+ +      unsigned long opcode = FW_WR_OP_G(ntohl(req->wr_hi));
+ +
+ +      if (opcode == FW_CRYPTO_LOOKASIDE_WR)
+ +              return skb->len <= SGE_MAX_WR_LEN;
+ +      else
+ +              return skb->len <= MAX_IMM_TX_PKT_LEN;
   }
   
   /**
@@@ -2589,11 -2583,11 +2589,11 @@@ irq_handler_t t4_intr_handler(struct ad
         return t4_intr_intx;
   }
   
- static void sge_rx_timer_cb(unsigned long data)
+ static void sge_rx_timer_cb(struct timer_list *t)
   {
         unsigned long m;
         unsigned int i;
-       struct adapter *adap = (struct adapter *)data;
+       struct adapter *adap = from_timer(adap, t, sge.rx_timer);
         struct sge *s = &adap->sge;
   
         for (i = 0; i < BITS_TO_LONGS(s->egr_sz); i++)
@@@ -2626,11 -2620,11 +2626,11 @@@ done
         mod_timer(&s->rx_timer, jiffies + RX_QCHECK_PERIOD);
   }
   
- static void sge_tx_timer_cb(unsigned long data)
+ static void sge_tx_timer_cb(struct timer_list *t)
   {
         unsigned long m;
         unsigned int i, budget;
-       struct adapter *adap = (struct adapter *)data;
+       struct adapter *adap = from_timer(adap, t, sge.tx_timer);
         struct sge *s = &adap->sge;
   
         for (i = 0; i < BITS_TO_LONGS(s->egr_sz); i++)
@@@ -3464,8 -3458,8 +3464,8 @@@ int t4_sge_init(struct adapter *adap
         /* Set up timers used for recuring callbacks to process RX and TX
          * administrative tasks.
          */
-       setup_timer(&s->rx_timer, sge_rx_timer_cb, (unsigned long)adap);
-       setup_timer(&s->tx_timer, sge_tx_timer_cb, (unsigned long)adap);
+       timer_setup(&s->rx_timer, sge_rx_timer_cb, 0);
+       timer_setup(&s->tx_timer, sge_tx_timer_cb, 0);
   
         spin_lock_init(&s->intrq_lock);
   
diff --combined drivers/net/ethernet/intel/i40e/i40e_debugfs.c

index 2cb9539c931e51f7a18696db9a307e3709c6d2f4,2b8bbc84e34f2b055ae6d999aa13603748a308b5..4c3b4243cf652a2102ac0a7e6a21b2ac0e0386e3
--- 1/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
--- 2/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@@ -264,7 -264,7 +264,7 @@@ static void i40e_dbg_dump_vsi_seid(stru
                  vsi->rx_buf_failed, vsi->rx_page_failed);
         rcu_read_lock();
         for (i = 0; i < vsi->num_queue_pairs; i++) {
- -              struct i40e_ring *rx_ring = ACCESS_ONCE(vsi->rx_rings[i]);
+ +              struct i40e_ring *rx_ring = READ_ONCE(vsi->rx_rings[i]);
   
                 if (!rx_ring)
                         continue;
@@@ -278,8 -278,8 +278,8 @@@
                          rx_ring->netdev,
                          rx_ring->rx_bi);
                 dev_info(&pf->pdev->dev,
-                        "    rx_rings[%i]: state = %li, queue_index = %d, reg_idx = %d\n",
-                        i, rx_ring->state,
+                        "    rx_rings[%i]: state = %lu, queue_index = %d, reg_idx = %d\n",
+                        i, *rx_ring->state,
                          rx_ring->queue_index,
                          rx_ring->reg_idx);
                 dev_info(&pf->pdev->dev,
@@@ -320,7 -320,7 +320,7 @@@
                          ITR_IS_DYNAMIC(rx_ring->rx_itr_setting) ? "dynamic" : "fixed");
         }
         for (i = 0; i < vsi->num_queue_pairs; i++) {
- -              struct i40e_ring *tx_ring = ACCESS_ONCE(vsi->tx_rings[i]);
+ +              struct i40e_ring *tx_ring = READ_ONCE(vsi->tx_rings[i]);
   
                 if (!tx_ring)
                         continue;
@@@ -334,8 -334,8 +334,8 @@@
                          tx_ring->netdev,
                          tx_ring->tx_bi);
                 dev_info(&pf->pdev->dev,
-                        "    tx_rings[%i]: state = %li, queue_index = %d, reg_idx = %d\n",
-                        i, tx_ring->state,
+                        "    tx_rings[%i]: state = %lu, queue_index = %d, reg_idx = %d\n",
+                        i, *tx_ring->state,
                          tx_ring->queue_index,
                          tx_ring->reg_idx);
                 dev_info(&pf->pdev->dev,
@@@ -798,8 -798,7 +798,7 @@@ static ssize_t i40e_dbg_command_write(s
                  */
                 if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) {
                         pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
-                       i40e_do_reset_safe(pf,
-                                          BIT_ULL(__I40E_PF_RESET_REQUESTED));
+                       i40e_do_reset_safe(pf, I40E_PF_RESET_FLAG);
                 }
   
                 vsi = i40e_vsi_setup(pf, I40E_VSI_VMDQ2, vsi_seid, 0);
diff --combined drivers/net/ethernet/intel/i40e/i40e_ethtool.c

index e9e04a485e0a765e392afef2f943f8bd84bb3400,dc9b8dcf4a1ee1bca390d101a192a3b6467dfb8d..5f6cf7212d4fc230b11fc6fe79fb6427f2b8c2b0
--- 1/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
--- 2/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@@ -227,6 -227,8 +227,8 @@@ static const struct i40e_priv_flags i40
         I40E_PRIV_FLAG("veb-stats", I40E_FLAG_VEB_STATS_ENABLED, 0),
         I40E_PRIV_FLAG("hw-atr-eviction", I40E_FLAG_HW_ATR_EVICT_ENABLED, 0),
         I40E_PRIV_FLAG("legacy-rx", I40E_FLAG_LEGACY_RX, 0),
+       I40E_PRIV_FLAG("disable-source-pruning",
+                      I40E_FLAG_SOURCE_PRUNING_DISABLED, 0),
   };
   
   #define I40E_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40e_gstrings_priv_flags)
@@@ -251,428 -253,557 +253,557 @@@ static void i40e_partition_setting_comp
   
   /**
    * i40e_phy_type_to_ethtool - convert the phy_types to ethtool link modes
-  * @phy_types: PHY types to convert
-  * @supported: pointer to the ethtool supported variable to fill in
-  * @advertising: pointer to the ethtool advertising variable to fill in
+  * @pf: PF struct with phy_types
+  * @ks: ethtool link ksettings struct to fill out
    *
    **/
- static void i40e_phy_type_to_ethtool(struct i40e_pf *pf, u32 *supported,
-                                    u32 *advertising)
+ static void i40e_phy_type_to_ethtool(struct i40e_pf *pf,
+                                    struct ethtool_link_ksettings *ks)
   {
         struct i40e_link_status *hw_link_info = &pf->hw.phy.link_info;
         u64 phy_types = pf->hw.phy.phy_types;
   
-       *supported = 0x0;
-       *advertising = 0x0;
+       ethtool_link_ksettings_zero_link_mode(ks, supported);
+       ethtool_link_ksettings_zero_link_mode(ks, advertising);
   
         if (phy_types & I40E_CAP_PHY_TYPE_SGMII) {
-               *supported |= SUPPORTED_Autoneg |
-                             SUPPORTED_1000baseT_Full;
-               *advertising |= ADVERTISED_Autoneg;
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    1000baseT_Full);
                 if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
-                       *advertising |= ADVERTISED_1000baseT_Full;
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            1000baseT_Full);
                 if (pf->hw_features & I40E_HW_100M_SGMII_CAPABLE) {
-                       *supported |= SUPPORTED_100baseT_Full;
-                       *advertising |= ADVERTISED_100baseT_Full;
+                       ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                            100baseT_Full);
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            100baseT_Full);
                 }
         }
         if (phy_types & I40E_CAP_PHY_TYPE_XAUI ||
             phy_types & I40E_CAP_PHY_TYPE_XFI ||
             phy_types & I40E_CAP_PHY_TYPE_SFI ||
             phy_types & I40E_CAP_PHY_TYPE_10GBASE_SFPP_CU ||
-           phy_types & I40E_CAP_PHY_TYPE_10GBASE_AOC)
-               *supported |= SUPPORTED_10000baseT_Full;
-       if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1_CU ||
-           phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1 ||
-           phy_types & I40E_CAP_PHY_TYPE_10GBASE_T ||
-           phy_types & I40E_CAP_PHY_TYPE_10GBASE_SR ||
-           phy_types & I40E_CAP_PHY_TYPE_10GBASE_LR) {
-               *supported |= SUPPORTED_Autoneg |
-                             SUPPORTED_10000baseT_Full;
-               *advertising |= ADVERTISED_Autoneg;
+           phy_types & I40E_CAP_PHY_TYPE_10GBASE_AOC) {
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    10000baseT_Full);
+               if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            10000baseT_Full);
+       }
+       if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_T) {
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    10000baseT_Full);
                 if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
-                       *advertising |= ADVERTISED_10000baseT_Full;
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            10000baseT_Full);
         }
         if (phy_types & I40E_CAP_PHY_TYPE_XLAUI ||
             phy_types & I40E_CAP_PHY_TYPE_XLPPI ||
             phy_types & I40E_CAP_PHY_TYPE_40GBASE_AOC)
-               *supported |= SUPPORTED_40000baseCR4_Full;
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    40000baseCR4_Full);
         if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_CR4_CU ||
             phy_types & I40E_CAP_PHY_TYPE_40GBASE_CR4) {
-               *supported |= SUPPORTED_Autoneg |
-                             SUPPORTED_40000baseCR4_Full;
-               *advertising |= ADVERTISED_Autoneg;
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    40000baseCR4_Full);
                 if (hw_link_info->requested_speeds & I40E_LINK_SPEED_40GB)
-                       *advertising |= ADVERTISED_40000baseCR4_Full;
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            40000baseCR4_Full);
         }
         if (phy_types & I40E_CAP_PHY_TYPE_100BASE_TX) {
-               *supported |= SUPPORTED_Autoneg |
-                             SUPPORTED_100baseT_Full;
-               *advertising |= ADVERTISED_Autoneg;
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    100baseT_Full);
                 if (hw_link_info->requested_speeds & I40E_LINK_SPEED_100MB)
-                       *advertising |= ADVERTISED_100baseT_Full;
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            100baseT_Full);
         }
-       if (phy_types & I40E_CAP_PHY_TYPE_1000BASE_T ||
-           phy_types & I40E_CAP_PHY_TYPE_1000BASE_SX ||
-           phy_types & I40E_CAP_PHY_TYPE_1000BASE_LX ||
-           phy_types & I40E_CAP_PHY_TYPE_1000BASE_T_OPTICAL) {
-               *supported |= SUPPORTED_Autoneg |
-                             SUPPORTED_1000baseT_Full;
-               *advertising |= ADVERTISED_Autoneg;
+       if (phy_types & I40E_CAP_PHY_TYPE_1000BASE_T) {
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    1000baseT_Full);
                 if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
-                       *advertising |= ADVERTISED_1000baseT_Full;
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            1000baseT_Full);
         }
         if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_SR4)
-               *supported |= SUPPORTED_40000baseSR4_Full;
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    40000baseSR4_Full);
         if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_LR4)
-               *supported |= SUPPORTED_40000baseLR4_Full;
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    40000baseLR4_Full);
         if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_KR4) {
-               *supported |= SUPPORTED_40000baseKR4_Full |
-                             SUPPORTED_Autoneg;
-               *advertising |= ADVERTISED_40000baseKR4_Full |
-                               ADVERTISED_Autoneg;
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    40000baseLR4_Full);
+               ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                    40000baseLR4_Full);
         }
         if (phy_types & I40E_CAP_PHY_TYPE_20GBASE_KR2) {
-               *supported |= SUPPORTED_20000baseKR2_Full |
-                             SUPPORTED_Autoneg;
-               *advertising |= ADVERTISED_Autoneg;
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    20000baseKR2_Full);
                 if (hw_link_info->requested_speeds & I40E_LINK_SPEED_20GB)
-                       *advertising |= ADVERTISED_20000baseKR2_Full;
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            20000baseKR2_Full);
         }
-       if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_KR) {
-               if (!(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER))
-                       *supported |= SUPPORTED_10000baseKR_Full |
-                                     SUPPORTED_Autoneg;
-               *advertising |= ADVERTISED_Autoneg;
+       if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_KX4) {
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    10000baseKX4_Full);
                 if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
-                       if (!(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER))
-                               *advertising |= ADVERTISED_10000baseKR_Full;
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            10000baseKX4_Full);
         }
-       if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_KX4) {
-               *supported |= SUPPORTED_10000baseKX4_Full |
-                             SUPPORTED_Autoneg;
-               *advertising |= ADVERTISED_Autoneg;
+       if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_KR &&
+           !(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER)) {
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    10000baseKR_Full);
                 if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
-                       *advertising |= ADVERTISED_10000baseKX4_Full;
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            10000baseKR_Full);
         }
-       if (phy_types & I40E_CAP_PHY_TYPE_1000BASE_KX) {
-               if (!(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER))
-                       *supported |= SUPPORTED_1000baseKX_Full |
-                                     SUPPORTED_Autoneg;
-               *advertising |= ADVERTISED_Autoneg;
+       if (phy_types & I40E_CAP_PHY_TYPE_1000BASE_KX &&
+           !(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER)) {
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    1000baseKX_Full);
                 if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
-                       if (!(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER))
-                               *advertising |= ADVERTISED_1000baseKX_Full;
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            1000baseKX_Full);
         }
-       if (phy_types & I40E_CAP_PHY_TYPE_25GBASE_KR ||
-           phy_types & I40E_CAP_PHY_TYPE_25GBASE_CR ||
-           phy_types & I40E_CAP_PHY_TYPE_25GBASE_SR ||
+       /* need to add 25G PHY types */
+       if (phy_types & I40E_CAP_PHY_TYPE_25GBASE_KR) {
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    25000baseKR_Full);
+               if (hw_link_info->requested_speeds & I40E_LINK_SPEED_25GB)
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            25000baseKR_Full);
+       }
+       if (phy_types & I40E_CAP_PHY_TYPE_25GBASE_CR) {
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    25000baseCR_Full);
+               if (hw_link_info->requested_speeds & I40E_LINK_SPEED_25GB)
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            25000baseCR_Full);
+       }
+       if (phy_types & I40E_CAP_PHY_TYPE_25GBASE_SR ||
             phy_types & I40E_CAP_PHY_TYPE_25GBASE_LR) {
-               *supported |= SUPPORTED_Autoneg;
-               *advertising |= ADVERTISED_Autoneg;
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    25000baseSR_Full);
+               if (hw_link_info->requested_speeds & I40E_LINK_SPEED_25GB)
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            25000baseSR_Full);
+       }
+       if (phy_types & I40E_CAP_PHY_TYPE_25GBASE_AOC ||
+           phy_types & I40E_CAP_PHY_TYPE_25GBASE_ACC) {
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    25000baseCR_Full);
+               if (hw_link_info->requested_speeds & I40E_LINK_SPEED_25GB)
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            25000baseCR_Full);
+       }
+       /* need to add new 10G PHY types */
+       if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1 ||
+           phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1_CU) {
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    10000baseCR_Full);
+               if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            10000baseCR_Full);
+       }
+       if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_SR) {
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    10000baseSR_Full);
+               if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            10000baseSR_Full);
+       }
+       if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_LR) {
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    10000baseLR_Full);
+               if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            10000baseLR_Full);
+       }
+       if (phy_types & I40E_CAP_PHY_TYPE_1000BASE_SX ||
+           phy_types & I40E_CAP_PHY_TYPE_1000BASE_LX ||
+           phy_types & I40E_CAP_PHY_TYPE_1000BASE_T_OPTICAL) {
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    1000baseX_Full);
+               if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            1000baseX_Full);
+       }
+       /* Autoneg PHY types */
+       if (phy_types & I40E_CAP_PHY_TYPE_SGMII ||
+           phy_types & I40E_CAP_PHY_TYPE_40GBASE_KR4 ||
+           phy_types & I40E_CAP_PHY_TYPE_40GBASE_CR4_CU ||
+           phy_types & I40E_CAP_PHY_TYPE_40GBASE_CR4 ||
+           phy_types & I40E_CAP_PHY_TYPE_25GBASE_SR ||
+           phy_types & I40E_CAP_PHY_TYPE_25GBASE_LR ||
+           phy_types & I40E_CAP_PHY_TYPE_25GBASE_KR ||
+           phy_types & I40E_CAP_PHY_TYPE_25GBASE_CR ||
+           phy_types & I40E_CAP_PHY_TYPE_20GBASE_KR2 ||
+           phy_types & I40E_CAP_PHY_TYPE_10GBASE_T ||
+           phy_types & I40E_CAP_PHY_TYPE_10GBASE_SR ||
+           phy_types & I40E_CAP_PHY_TYPE_10GBASE_LR ||
+           phy_types & I40E_CAP_PHY_TYPE_10GBASE_KX4 ||
+           phy_types & I40E_CAP_PHY_TYPE_10GBASE_KR ||
+           phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1_CU ||
+           phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1 ||
+           phy_types & I40E_CAP_PHY_TYPE_1000BASE_T_OPTICAL ||
+           phy_types & I40E_CAP_PHY_TYPE_1000BASE_T ||
+           phy_types & I40E_CAP_PHY_TYPE_1000BASE_SX ||
+           phy_types & I40E_CAP_PHY_TYPE_1000BASE_LX ||
+           phy_types & I40E_CAP_PHY_TYPE_1000BASE_KX ||
+           phy_types & I40E_CAP_PHY_TYPE_100BASE_TX) {
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    Autoneg);
+               ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                    Autoneg);
         }
   }
   
   /**
    * i40e_get_settings_link_up - Get the Link settings for when link is up
    * @hw: hw structure
-  * @ecmd: ethtool command to fill in
+  * @ks: ethtool ksettings to fill in
    * @netdev: network interface device structure
-  *
+  * @pf: pointer to physical function struct
    **/
   static void i40e_get_settings_link_up(struct i40e_hw *hw,
-                                     struct ethtool_link_ksettings *cmd,
+                                     struct ethtool_link_ksettings *ks,
                                       struct net_device *netdev,
                                       struct i40e_pf *pf)
   {
         struct i40e_link_status *hw_link_info = &hw->phy.link_info;
+       struct ethtool_link_ksettings cap_ksettings;
         u32 link_speed = hw_link_info->link_speed;
-       u32 e_advertising = 0x0;
-       u32 e_supported = 0x0;
-       u32 supported, advertising;
- 
-       ethtool_convert_link_mode_to_legacy_u32(&supported,
-                                               cmd->link_modes.supported);
-       ethtool_convert_link_mode_to_legacy_u32(&advertising,
-                                               cmd->link_modes.advertising);
   
         /* Initialize supported and advertised settings based on phy settings */
         switch (hw_link_info->phy_type) {
         case I40E_PHY_TYPE_40GBASE_CR4:
         case I40E_PHY_TYPE_40GBASE_CR4_CU:
-               supported = SUPPORTED_Autoneg |
-                           SUPPORTED_40000baseCR4_Full;
-               advertising = ADVERTISED_Autoneg |
-                             ADVERTISED_40000baseCR4_Full;
+               ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    40000baseCR4_Full);
+               ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+               ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                    40000baseCR4_Full);
                 break;
         case I40E_PHY_TYPE_XLAUI:
         case I40E_PHY_TYPE_XLPPI:
         case I40E_PHY_TYPE_40GBASE_AOC:
-               supported = SUPPORTED_40000baseCR4_Full;
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    40000baseCR4_Full);
                 break;
         case I40E_PHY_TYPE_40GBASE_SR4:
-               supported = SUPPORTED_40000baseSR4_Full;
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    40000baseSR4_Full);
                 break;
         case I40E_PHY_TYPE_40GBASE_LR4:
-               supported = SUPPORTED_40000baseLR4_Full;
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    40000baseLR4_Full);
                 break;
+       case I40E_PHY_TYPE_25GBASE_SR:
+       case I40E_PHY_TYPE_25GBASE_LR:
         case I40E_PHY_TYPE_10GBASE_SR:
         case I40E_PHY_TYPE_10GBASE_LR:
         case I40E_PHY_TYPE_1000BASE_SX:
         case I40E_PHY_TYPE_1000BASE_LX:
-               supported = SUPPORTED_10000baseT_Full;
+               ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+               ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    25000baseSR_Full);
+               ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                    25000baseSR_Full);
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    10000baseSR_Full);
+               ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                    10000baseSR_Full);
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    10000baseLR_Full);
+               ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                    10000baseLR_Full);
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    1000baseX_Full);
+               ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                    1000baseX_Full);
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    10000baseT_Full);
                 if (hw_link_info->module_type[2] &
                     I40E_MODULE_TYPE_1000BASE_SX ||
                     hw_link_info->module_type[2] &
                     I40E_MODULE_TYPE_1000BASE_LX) {
-                       supported |= SUPPORTED_1000baseT_Full;
+                       ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                            1000baseT_Full);
                         if (hw_link_info->requested_speeds &
                             I40E_LINK_SPEED_1GB)
-                               advertising |= ADVERTISED_1000baseT_Full;
+                               ethtool_link_ksettings_add_link_mode(
+                                    ks, advertising, 1000baseT_Full);
                 }
                 if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
-                       advertising |= ADVERTISED_10000baseT_Full;
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            10000baseT_Full);
                 break;
         case I40E_PHY_TYPE_10GBASE_T:
         case I40E_PHY_TYPE_1000BASE_T:
         case I40E_PHY_TYPE_100BASE_TX:
-               supported = SUPPORTED_Autoneg |
-                           SUPPORTED_10000baseT_Full |
-                           SUPPORTED_1000baseT_Full |
-                           SUPPORTED_100baseT_Full;
-               advertising = ADVERTISED_Autoneg;
+               ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    10000baseT_Full);
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    1000baseT_Full);
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    100baseT_Full);
+               ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
                 if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
-                       advertising |= ADVERTISED_10000baseT_Full;
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            10000baseT_Full);
                 if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
-                       advertising |= ADVERTISED_1000baseT_Full;
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            1000baseT_Full);
                 if (hw_link_info->requested_speeds & I40E_LINK_SPEED_100MB)
-                       advertising |= ADVERTISED_100baseT_Full;
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            100baseT_Full);
                 break;
         case I40E_PHY_TYPE_1000BASE_T_OPTICAL:
-               supported = SUPPORTED_Autoneg |
-                           SUPPORTED_1000baseT_Full;
-               advertising = ADVERTISED_Autoneg |
-                             ADVERTISED_1000baseT_Full;
+               ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    1000baseT_Full);
+               ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+               ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                    1000baseT_Full);
                 break;
         case I40E_PHY_TYPE_10GBASE_CR1_CU:
         case I40E_PHY_TYPE_10GBASE_CR1:
-               supported = SUPPORTED_Autoneg |
-                           SUPPORTED_10000baseT_Full;
-               advertising = ADVERTISED_Autoneg |
-                             ADVERTISED_10000baseT_Full;
+               ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    10000baseT_Full);
+               ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+               ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                    10000baseT_Full);
                 break;
         case I40E_PHY_TYPE_XAUI:
         case I40E_PHY_TYPE_XFI:
         case I40E_PHY_TYPE_SFI:
         case I40E_PHY_TYPE_10GBASE_SFPP_CU:
         case I40E_PHY_TYPE_10GBASE_AOC:
-               supported = SUPPORTED_10000baseT_Full;
-               advertising = SUPPORTED_10000baseT_Full;
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    10000baseT_Full);
+               if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            10000baseT_Full);
                 break;
         case I40E_PHY_TYPE_SGMII:
-               supported = SUPPORTED_Autoneg |
-                           SUPPORTED_1000baseT_Full;
+               ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    1000baseT_Full);
                 if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
-                       advertising |= ADVERTISED_1000baseT_Full;
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            1000baseT_Full);
                 if (pf->hw_features & I40E_HW_100M_SGMII_CAPABLE) {
-                       supported |= SUPPORTED_100baseT_Full;
+                       ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                            100baseT_Full);
                         if (hw_link_info->requested_speeds &
                             I40E_LINK_SPEED_100MB)
-                               advertising |= ADVERTISED_100baseT_Full;
+                               ethtool_link_ksettings_add_link_mode(
+                                     ks, advertising, 100baseT_Full);
                 }
                 break;
         case I40E_PHY_TYPE_40GBASE_KR4:
+       case I40E_PHY_TYPE_25GBASE_KR:
         case I40E_PHY_TYPE_20GBASE_KR2:
         case I40E_PHY_TYPE_10GBASE_KR:
         case I40E_PHY_TYPE_10GBASE_KX4:
         case I40E_PHY_TYPE_1000BASE_KX:
-               supported |= SUPPORTED_40000baseKR4_Full |
-                            SUPPORTED_20000baseKR2_Full |
-                            SUPPORTED_10000baseKR_Full |
-                            SUPPORTED_10000baseKX4_Full |
-                            SUPPORTED_1000baseKX_Full |
-                            SUPPORTED_Autoneg;
-               advertising |= ADVERTISED_40000baseKR4_Full |
-                              ADVERTISED_20000baseKR2_Full |
-                              ADVERTISED_10000baseKR_Full |
-                              ADVERTISED_10000baseKX4_Full |
-                              ADVERTISED_1000baseKX_Full |
-                              ADVERTISED_Autoneg;
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    40000baseKR4_Full);
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    25000baseKR_Full);
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    20000baseKR2_Full);
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    10000baseKR_Full);
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    10000baseKX4_Full);
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    1000baseKX_Full);
+               ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+               ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                    40000baseKR4_Full);
+               ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                    25000baseKR_Full);
+               ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                    20000baseKR2_Full);
+               ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                    10000baseKR_Full);
+               ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                    10000baseKX4_Full);
+               ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                    1000baseKX_Full);
+               ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
                 break;
-       case I40E_PHY_TYPE_25GBASE_KR:
         case I40E_PHY_TYPE_25GBASE_CR:
-       case I40E_PHY_TYPE_25GBASE_SR:
-       case I40E_PHY_TYPE_25GBASE_LR:
-               supported = SUPPORTED_Autoneg;
-               advertising = ADVERTISED_Autoneg;
-               /* TODO: add speeds when ethtool is ready to support*/
+               ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+               ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    25000baseCR_Full);
+               ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                    25000baseCR_Full);
+               break;
+       case I40E_PHY_TYPE_25GBASE_AOC:
+       case I40E_PHY_TYPE_25GBASE_ACC:
+               ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+               ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    25000baseCR_Full);
+ 
+               ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                    25000baseCR_Full);
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    10000baseCR_Full);
+               ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                    10000baseCR_Full);
                 break;
         default:
                 /* if we got here and link is up something bad is afoot */
-               netdev_info(netdev, "WARNING: Link is up but PHY type 0x%x is not recognized.\n",
+               netdev_info(netdev,
+                           "WARNING: Link is up but PHY type 0x%x is not recognized.\n",
                             hw_link_info->phy_type);
         }
   
         /* Now that we've worked out everything that could be supported by the
-        * current PHY type, get what is supported by the NVM and them to
-        * get what is truly supported
+        * current PHY type, get what is supported by the NVM and intersect
+        * them to get what is truly supported
          */
-       i40e_phy_type_to_ethtool(pf, &e_supported,
-                                &e_advertising);
- 
-       supported = supported & e_supported;
-       advertising = advertising & e_advertising;
+       memset(&cap_ksettings, 0, sizeof(struct ethtool_link_ksettings));
+       i40e_phy_type_to_ethtool(pf, &cap_ksettings);
+       ethtool_intersect_link_masks(ks, &cap_ksettings);
   
         /* Set speed and duplex */
         switch (link_speed) {
         case I40E_LINK_SPEED_40GB:
-               cmd->base.speed = SPEED_40000;
+               ks->base.speed = SPEED_40000;
                 break;
         case I40E_LINK_SPEED_25GB:
- #ifdef SPEED_25000
-               cmd->base.speed = SPEED_25000;
- #else
-               netdev_info(netdev,
-                           "Speed is 25G, display not supported by this version of ethtool.\n");
- #endif
+               ks->base.speed = SPEED_25000;
                 break;
         case I40E_LINK_SPEED_20GB:
-               cmd->base.speed = SPEED_20000;
+               ks->base.speed = SPEED_20000;
                 break;
         case I40E_LINK_SPEED_10GB:
-               cmd->base.speed = SPEED_10000;
+               ks->base.speed = SPEED_10000;
                 break;
         case I40E_LINK_SPEED_1GB:
-               cmd->base.speed = SPEED_1000;
+               ks->base.speed = SPEED_1000;
                 break;
         case I40E_LINK_SPEED_100MB:
-               cmd->base.speed = SPEED_100;
+               ks->base.speed = SPEED_100;
                 break;
         default:
                 break;
         }
-       cmd->base.duplex = DUPLEX_FULL;
- 
-       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
-                                               supported);
-       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
-                                               advertising);
+       ks->base.duplex = DUPLEX_FULL;
   }
   
   /**
    * i40e_get_settings_link_down - Get the Link settings for when link is down
    * @hw: hw structure
-  * @ecmd: ethtool command to fill in
+  * @ks: ethtool ksettings to fill in
+  * @pf: pointer to physical function struct
    *
    * Reports link settings that can be determined when link is down
    **/
   static void i40e_get_settings_link_down(struct i40e_hw *hw,
-                                       struct ethtool_link_ksettings *cmd,
+                                       struct ethtool_link_ksettings *ks,
                                         struct i40e_pf *pf)
   {
-       u32 supported, advertising;
- 
         /* link is down and the driver needs to fall back on
          * supported phy types to figure out what info to display
          */
-       i40e_phy_type_to_ethtool(pf, &supported, &advertising);
- 
-       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
-                                               supported);
-       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
-                                               advertising);
+       i40e_phy_type_to_ethtool(pf, ks);
   
         /* With no link speed and duplex are unknown */
-       cmd->base.speed = SPEED_UNKNOWN;
-       cmd->base.duplex = DUPLEX_UNKNOWN;
+       ks->base.speed = SPEED_UNKNOWN;
+       ks->base.duplex = DUPLEX_UNKNOWN;
   }
   
   /**
-  * i40e_get_settings - Get Link Speed and Duplex settings
+  * i40e_get_link_ksettings - Get Link Speed and Duplex settings
    * @netdev: network interface device structure
-  * @ecmd: ethtool command
+  * @ks: ethtool ksettings
    *
    * Reports speed/duplex settings based on media_type
    **/
   static int i40e_get_link_ksettings(struct net_device *netdev,
-                                  struct ethtool_link_ksettings *cmd)
+                                  struct ethtool_link_ksettings *ks)
   {
         struct i40e_netdev_priv *np = netdev_priv(netdev);
         struct i40e_pf *pf = np->vsi->back;
         struct i40e_hw *hw = &pf->hw;
         struct i40e_link_status *hw_link_info = &hw->phy.link_info;
         bool link_up = hw_link_info->link_info & I40E_AQ_LINK_UP;
-       u32 advertising;
+ 
+       ethtool_link_ksettings_zero_link_mode(ks, supported);
+       ethtool_link_ksettings_zero_link_mode(ks, advertising);
   
         if (link_up)
-               i40e_get_settings_link_up(hw, cmd, netdev, pf);
+               i40e_get_settings_link_up(hw, ks, netdev, pf);
         else
-               i40e_get_settings_link_down(hw, cmd, pf);
+               i40e_get_settings_link_down(hw, ks, pf);
   
         /* Now set the settings that don't rely on link being up/down */
         /* Set autoneg settings */
-       cmd->base.autoneg = ((hw_link_info->an_info & I40E_AQ_AN_COMPLETED) ?
-                         AUTONEG_ENABLE : AUTONEG_DISABLE);
+       ks->base.autoneg = ((hw_link_info->an_info & I40E_AQ_AN_COMPLETED) ?
+                           AUTONEG_ENABLE : AUTONEG_DISABLE);
   
+       /* Set media type settings */
         switch (hw->phy.media_type) {
         case I40E_MEDIA_TYPE_BACKPLANE:
-               ethtool_link_ksettings_add_link_mode(cmd, supported,
-                                                    Autoneg);
-               ethtool_link_ksettings_add_link_mode(cmd, supported,
-                                                    Backplane);
-               ethtool_link_ksettings_add_link_mode(cmd, advertising,
-                                                    Autoneg);
-               ethtool_link_ksettings_add_link_mode(cmd, advertising,
+               ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+               ethtool_link_ksettings_add_link_mode(ks, supported, Backplane);
+               ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+               ethtool_link_ksettings_add_link_mode(ks, advertising,
                                                      Backplane);
-               cmd->base.port = PORT_NONE;
+               ks->base.port = PORT_NONE;
                 break;
         case I40E_MEDIA_TYPE_BASET:
-               ethtool_link_ksettings_add_link_mode(cmd, supported, TP);
-               ethtool_link_ksettings_add_link_mode(cmd, advertising, TP);
-               cmd->base.port = PORT_TP;
+               ethtool_link_ksettings_add_link_mode(ks, supported, TP);
+               ethtool_link_ksettings_add_link_mode(ks, advertising, TP);
+               ks->base.port = PORT_TP;
                 break;
         case I40E_MEDIA_TYPE_DA:
         case I40E_MEDIA_TYPE_CX4:
-               ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE);
-               ethtool_link_ksettings_add_link_mode(cmd, advertising, FIBRE);
-               cmd->base.port = PORT_DA;
+               ethtool_link_ksettings_add_link_mode(ks, supported, FIBRE);
+               ethtool_link_ksettings_add_link_mode(ks, advertising, FIBRE);
+               ks->base.port = PORT_DA;
                 break;
         case I40E_MEDIA_TYPE_FIBER:
-               ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE);
-               cmd->base.port = PORT_FIBRE;
+               ethtool_link_ksettings_add_link_mode(ks, supported, FIBRE);
+               ks->base.port = PORT_FIBRE;
                 break;
         case I40E_MEDIA_TYPE_UNKNOWN:
         default:
-               cmd->base.port = PORT_OTHER;
+               ks->base.port = PORT_OTHER;
                 break;
         }
   
         /* Set flow control settings */
-       ethtool_link_ksettings_add_link_mode(cmd, supported, Pause);
+       ethtool_link_ksettings_add_link_mode(ks, supported, Pause);
   
         switch (hw->fc.requested_mode) {
         case I40E_FC_FULL:
-               ethtool_link_ksettings_add_link_mode(cmd, advertising,
-                                                    Pause);
+               ethtool_link_ksettings_add_link_mode(ks, advertising, Pause);
                 break;
         case I40E_FC_TX_PAUSE:
-               ethtool_link_ksettings_add_link_mode(cmd, advertising,
+               ethtool_link_ksettings_add_link_mode(ks, advertising,
                                                      Asym_Pause);
                 break;
         case I40E_FC_RX_PAUSE:
-               ethtool_link_ksettings_add_link_mode(cmd, advertising,
-                                                    Pause);
-               ethtool_link_ksettings_add_link_mode(cmd, advertising,
+               ethtool_link_ksettings_add_link_mode(ks, advertising, Pause);
+               ethtool_link_ksettings_add_link_mode(ks, advertising,
                                                      Asym_Pause);
                 break;
         default:
-               ethtool_convert_link_mode_to_legacy_u32(
-                       &advertising, cmd->link_modes.advertising);
- 
-               advertising &= ~(ADVERTISED_Pause | ADVERTISED_Asym_Pause);
- 
-               ethtool_convert_legacy_u32_to_link_mode(
-                       cmd->link_modes.advertising, advertising);
+               ethtool_link_ksettings_del_link_mode(ks, advertising, Pause);
+               ethtool_link_ksettings_del_link_mode(ks, advertising,
+                                                    Asym_Pause);
                 break;
         }
   
@@@ -680,30 -811,28 +811,28 @@@
   }
   
   /**
-  * i40e_set_settings - Set Speed and Duplex
+  * i40e_set_link_ksettings - Set Speed and Duplex
    * @netdev: network interface device structure
-  * @ecmd: ethtool command
+  * @ks: ethtool ksettings
    *
    * Set speed/duplex per media_types advertised/forced
    **/
   static int i40e_set_link_ksettings(struct net_device *netdev,
-                                  const struct ethtool_link_ksettings *cmd)
+                                  const struct ethtool_link_ksettings *ks)
   {
         struct i40e_netdev_priv *np = netdev_priv(netdev);
         struct i40e_aq_get_phy_abilities_resp abilities;
+       struct ethtool_link_ksettings safe_ks;
+       struct ethtool_link_ksettings copy_ks;
         struct i40e_aq_set_phy_config config;
         struct i40e_pf *pf = np->vsi->back;
         struct i40e_vsi *vsi = np->vsi;
         struct i40e_hw *hw = &pf->hw;
-       struct ethtool_link_ksettings safe_cmd;
-       struct ethtool_link_ksettings copy_cmd;
+       bool autoneg_changed = false;
         i40e_status status = 0;
-       bool change = false;
         int timeout = 50;
         int err = 0;
-       u32 autoneg;
-       u32 advertise;
-       u32 tmp;
+       u8 autoneg;
   
         /* Changing port settings is not supported if this isn't the
          * port's controlling PF
@@@ -712,17 -841,14 +841,14 @@@
                 i40e_partition_setting_complaint(pf);
                 return -EOPNOTSUPP;
         }
- 
         if (vsi != pf->vsi[pf->lan_vsi])
                 return -EOPNOTSUPP;
- 
         if (hw->phy.media_type != I40E_MEDIA_TYPE_BASET &&
             hw->phy.media_type != I40E_MEDIA_TYPE_FIBER &&
             hw->phy.media_type != I40E_MEDIA_TYPE_BACKPLANE &&
             hw->phy.media_type != I40E_MEDIA_TYPE_DA &&
             hw->phy.link_info.link_info & I40E_AQ_LINK_UP)
                 return -EOPNOTSUPP;
- 
         if (hw->device_id == I40E_DEV_ID_KX_B ||
             hw->device_id == I40E_DEV_ID_KX_C ||
             hw->device_id == I40E_DEV_ID_20G_KR2 ||
@@@ -731,31 -857,37 +857,37 @@@
                 return -EOPNOTSUPP;
         }
   
-       /* copy the cmd to copy_cmd to avoid modifying the origin */
-       memcpy(&copy_cmd, cmd, sizeof(struct ethtool_link_ksettings));
+       /* copy the ksettings to copy_ks to avoid modifying the origin */
+       memcpy(&copy_ks, ks, sizeof(struct ethtool_link_ksettings));
   
-       /* get our own copy of the bits to check against */
-       memset(&safe_cmd, 0, sizeof(struct ethtool_link_ksettings));
-       i40e_get_link_ksettings(netdev, &safe_cmd);
+       /* save autoneg out of ksettings */
+       autoneg = copy_ks.base.autoneg;
   
-       /* save autoneg and speed out of cmd */
-       autoneg = cmd->base.autoneg;
-       ethtool_convert_link_mode_to_legacy_u32(&advertise,
-                                               cmd->link_modes.advertising);
+       memset(&safe_ks, 0, sizeof(safe_ks));
+       /* Get link modes supported by hardware and check against modes
+        * requested by the user.  Return an error if unsupported mode was set.
+        */
+       i40e_phy_type_to_ethtool(pf, &safe_ks);
+       if (!bitmap_subset(copy_ks.link_modes.advertising,
+                          safe_ks.link_modes.supported,
+                          __ETHTOOL_LINK_MODE_MASK_NBITS))
+               return -EINVAL;
   
-       /* set autoneg and speed back to what they currently are */
-       copy_cmd.base.autoneg = safe_cmd.base.autoneg;
-       ethtool_convert_link_mode_to_legacy_u32(
-               &tmp, safe_cmd.link_modes.advertising);
-       ethtool_convert_legacy_u32_to_link_mode(
-               copy_cmd.link_modes.advertising, tmp);
+       /* get our own copy of the bits to check against */
+       memset(&safe_ks, 0, sizeof(struct ethtool_link_ksettings));
+       safe_ks.base.cmd = copy_ks.base.cmd;
+       safe_ks.base.link_mode_masks_nwords =
+               copy_ks.base.link_mode_masks_nwords;
+       i40e_get_link_ksettings(netdev, &safe_ks);
   
-       copy_cmd.base.cmd = safe_cmd.base.cmd;
+       /* set autoneg back to what it currently is */
+       copy_ks.base.autoneg = safe_ks.base.autoneg;
   
-       /* If copy_cmd and safe_cmd are not the same now, then they are
-        * trying to set something that we do not support
+       /* If copy_ks.base and safe_ks.base are not the same now, then they are
+        * trying to set something that we do not support.
          */
-       if (memcmp(&copy_cmd, &safe_cmd, sizeof(struct ethtool_link_ksettings)))
+       if (memcmp(&copy_ks.base, &safe_ks.base,
+                  sizeof(struct ethtool_link_settings)))
                 return -EOPNOTSUPP;
   
         while (test_and_set_bit(__I40E_CONFIG_BUSY, pf->state)) {
@@@ -784,8 -916,9 +916,9 @@@
                 /* If autoneg was not already enabled */
                 if (!(hw->phy.link_info.an_info & I40E_AQ_AN_COMPLETED)) {
                         /* If autoneg is not supported, return error */
-                       if (!ethtool_link_ksettings_test_link_mode(
-                                   &safe_cmd, supported, Autoneg)) {
+                       if (!ethtool_link_ksettings_test_link_mode(&safe_ks,
+                                                                  supported,
+                                                                  Autoneg)) {
                                 netdev_info(netdev, "Autoneg not supported on this phy\n");
                                 err = -EINVAL;
                                 goto done;
@@@ -793,7 -926,7 +926,7 @@@
                         /* Autoneg is allowed to change */
                         config.abilities = abilities.abilities |
                                            I40E_AQ_PHY_ENABLE_AN;
-                       change = true;
+                       autoneg_changed = true;
                 }
         } else {
                 /* If autoneg is currently enabled */
@@@ -801,8 -934,9 +934,9 @@@
                         /* If autoneg is supported 10GBASE_T is the only PHY
                          * that can disable it, so otherwise return error
                          */
-                       if (ethtool_link_ksettings_test_link_mode(
-                                   &safe_cmd, supported, Autoneg) &&
+                       if (ethtool_link_ksettings_test_link_mode(&safe_ks,
+                                                                 supported,
+                                                                 Autoneg) &&
                             hw->phy.link_info.phy_type !=
                             I40E_PHY_TYPE_10GBASE_T) {
                                 netdev_info(netdev, "Autoneg cannot be disabled on this phy\n");
@@@ -812,32 -946,49 +946,49 @@@
                         /* Autoneg is allowed to change */
                         config.abilities = abilities.abilities &
                                            ~I40E_AQ_PHY_ENABLE_AN;
-                       change = true;
+                       autoneg_changed = true;
                 }
         }
   
-       ethtool_convert_link_mode_to_legacy_u32(&tmp,
-                                               safe_cmd.link_modes.supported);
-       if (advertise & ~tmp) {
-               err = -EINVAL;
-               goto done;
-       }
- 
-       if (advertise & ADVERTISED_100baseT_Full)
+       if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+                                                 100baseT_Full))
                 config.link_speed |= I40E_LINK_SPEED_100MB;
-       if (advertise & ADVERTISED_1000baseT_Full ||
-           advertise & ADVERTISED_1000baseKX_Full)
+       if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+                                                 1000baseT_Full) ||
+           ethtool_link_ksettings_test_link_mode(ks, advertising,
+                                                 1000baseX_Full) ||
+           ethtool_link_ksettings_test_link_mode(ks, advertising,
+                                                 1000baseKX_Full))
                 config.link_speed |= I40E_LINK_SPEED_1GB;
-       if (advertise & ADVERTISED_10000baseT_Full ||
-           advertise & ADVERTISED_10000baseKX4_Full ||
-           advertise & ADVERTISED_10000baseKR_Full)
+       if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+                                                 10000baseT_Full) ||
+           ethtool_link_ksettings_test_link_mode(ks, advertising,
+                                                 10000baseKX4_Full) ||
+           ethtool_link_ksettings_test_link_mode(ks, advertising,
+                                                 10000baseKR_Full) ||
+           ethtool_link_ksettings_test_link_mode(ks, advertising,
+                                                 10000baseCR_Full) ||
+           ethtool_link_ksettings_test_link_mode(ks, advertising,
+                                                 10000baseSR_Full))
                 config.link_speed |= I40E_LINK_SPEED_10GB;
-       if (advertise & ADVERTISED_20000baseKR2_Full)
+       if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+                                                 20000baseKR2_Full))
                 config.link_speed |= I40E_LINK_SPEED_20GB;
-       if (advertise & ADVERTISED_40000baseKR4_Full ||
-           advertise & ADVERTISED_40000baseCR4_Full ||
-           advertise & ADVERTISED_40000baseSR4_Full ||
-           advertise & ADVERTISED_40000baseLR4_Full)
+       if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+                                                 25000baseCR_Full) ||
+           ethtool_link_ksettings_test_link_mode(ks, advertising,
+                                                 25000baseKR_Full) ||
+           ethtool_link_ksettings_test_link_mode(ks, advertising,
+                                                 25000baseSR_Full))
+               config.link_speed |= I40E_LINK_SPEED_25GB;
+       if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+                                                 40000baseKR4_Full) ||
+           ethtool_link_ksettings_test_link_mode(ks, advertising,
+                                                 40000baseCR4_Full) ||
+           ethtool_link_ksettings_test_link_mode(ks, advertising,
+                                                 40000baseSR4_Full) ||
+           ethtool_link_ksettings_test_link_mode(ks, advertising,
+                                                 40000baseLR4_Full))
                 config.link_speed |= I40E_LINK_SPEED_40GB;
   
         /* If speed didn't get set, set it to what it currently is.
@@@ -846,8 -997,7 +997,7 @@@
          */
         if (!config.link_speed)
                 config.link_speed = abilities.link_speed;
- 
-       if (change || (abilities.link_speed != config.link_speed)) {
+       if (autoneg_changed || abilities.link_speed != config.link_speed) {
                 /* copy over the rest of the abilities */
                 config.phy_type = abilities.phy_type;
                 config.phy_type_ext = abilities.phy_type_ext;
@@@ -874,7 -1024,8 +1024,8 @@@
                 /* make the aq call */
                 status = i40e_aq_set_phy_config(hw, &config, NULL);
                 if (status) {
-                       netdev_info(netdev, "Set phy config failed, err %s aq_err %s\n",
+                       netdev_info(netdev,
+                                   "Set phy config failed, err %s aq_err %s\n",
                                     i40e_stat_str(hw, status),
                                     i40e_aq_str(hw, hw->aq.asq_last_status));
                         err = -EAGAIN;
@@@ -883,7 -1034,8 +1034,8 @@@
   
                 status = i40e_update_link_info(hw);
                 if (status)
-                       netdev_dbg(netdev, "Updating link info failed with err %s aq_err %s\n",
+                       netdev_dbg(netdev,
+                                  "Updating link info failed with err %s aq_err %s\n",
                                    i40e_stat_str(hw, status),
                                    i40e_aq_str(hw, hw->aq.asq_last_status));
   
@@@ -1570,7 -1722,7 +1722,7 @@@ static void i40e_get_ethtool_stats(stru
         }
         rcu_read_lock();
         for (j = 0; j < vsi->num_queue_pairs; j++) {
- -              tx_ring = ACCESS_ONCE(vsi->tx_rings[j]);
+ +              tx_ring = READ_ONCE(vsi->tx_rings[j]);
   
                 if (!tx_ring)
                         continue;
@@@ -2008,7 -2160,9 +2160,9 @@@ static int i40e_set_phys_id(struct net_
                 if (!(pf->hw_features & I40E_HW_PHY_CONTROLS_LEDS)) {
                         pf->led_status = i40e_led_get(hw);
                 } else {
-                       i40e_aq_set_phy_debug(hw, I40E_PHY_DEBUG_ALL, NULL);
+                       if (!(hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE))
+                               i40e_aq_set_phy_debug(hw, I40E_PHY_DEBUG_ALL,
+                                                     NULL);
                         ret = i40e_led_get_phy(hw, &temp_status,
                                                &pf->phy_led_val);
                         pf->led_status = temp_status;
@@@ -2033,7 -2187,8 +2187,8 @@@
                         ret = i40e_led_set_phy(hw, false, pf->led_status,
                                                (pf->phy_led_val |
                                                I40E_PHY_LED_MODE_ORIG));
-                       i40e_aq_set_phy_debug(hw, 0, NULL);
+                       if (!(hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE))
+                               i40e_aq_set_phy_debug(hw, 0, NULL);
                 }
                 break;
         default:
@@@ -2071,14 -2226,13 +2226,13 @@@ static int __i40e_get_coalesce(struct n
         ec->tx_max_coalesced_frames_irq = vsi->work_limit;
         ec->rx_max_coalesced_frames_irq = vsi->work_limit;
   
-       /* rx and tx usecs has per queue value. If user doesn't specify the queue,
-        * return queue 0's value to represent.
+       /* rx and tx usecs has per queue value. If user doesn't specify the
+        * queue, return queue 0's value to represent.
          */
-       if (queue < 0) {
+       if (queue < 0)
                 queue = 0;
-       } else if (queue >= vsi->num_queue_pairs) {
+       else if (queue >= vsi->num_queue_pairs)
                 return -EINVAL;
-       }
   
         rx_ring = vsi->rx_rings[queue];
         tx_ring = vsi->tx_rings[queue];
@@@ -2092,7 -2246,6 +2246,6 @@@
         ec->rx_coalesce_usecs = rx_ring->rx_itr_setting & ~I40E_ITR_DYNAMIC;
         ec->tx_coalesce_usecs = tx_ring->tx_itr_setting & ~I40E_ITR_DYNAMIC;
   
- 
         /* we use the _usecs_high to store/set the interrupt rate limit
          * that the hardware supports, that almost but not quite
          * fits the original intent of the ethtool variable,
@@@ -2142,7 -2295,6 +2295,6 @@@ static int i40e_get_per_queue_coalesce(
    *
    * Change the ITR settings for a specific queue.
    **/
- 
   static void i40e_set_itr_per_queue(struct i40e_vsi *vsi,
                                    struct ethtool_coalesce *ec,
                                    int queue)
@@@ -2264,8 -2416,8 +2416,8 @@@ static int __i40e_set_coalesce(struct n
                            vsi->int_rate_limit);
         }
   
-       /* rx and tx usecs has per queue value. If user doesn't specify the queue,
-        * apply to all queues.
+       /* rx and tx usecs has per queue value. If user doesn't specify the
+        * queue, apply to all queues.
          */
         if (queue < 0) {
                 for (i = 0; i < vsi->num_queue_pairs; i++)
@@@ -2647,7 -2799,7 +2799,7 @@@ static int i40e_get_rxnfc(struct net_de
   
         switch (cmd->cmd) {
         case ETHTOOL_GRXRINGS:
-               cmd->data = vsi->num_queue_pairs;
+               cmd->data = vsi->rss_size;
                 ret = 0;
                 break;
         case ETHTOOL_GRXFH:
@@@ -3892,6 -4044,12 +4044,12 @@@ static int i40e_set_channels(struct net
         if (vsi->type != I40E_VSI_MAIN)
                 return -EINVAL;
   
+       /* We do not support setting channels via ethtool when TCs are
+        * configured through mqprio
+        */
+       if (pf->flags & I40E_FLAG_TC_MQPRIO)
+               return -EINVAL;
+ 
         /* verify they are not requesting separate vectors */
         if (!count || ch->rx_count || ch->tx_count)
                 return -EINVAL;
@@@ -3959,6 -4117,16 +4117,16 @@@ static u32 i40e_get_rxfh_indir_size(str
         return I40E_HLUT_ARRAY_SIZE;
   }
   
+ /**
+  * i40e_get_rxfh - get the rx flow hash indirection table
+  * @netdev: network interface device structure
+  * @indir: indirection table
+  * @key: hash key
+  * @hfunc: hash function
+  *
+  * Reads the indirection table directly from the hardware. Returns 0 on
+  * success.
+  **/
   static int i40e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
                          u8 *hfunc)
   {
@@@ -4090,7 -4258,7 +4258,7 @@@ static int i40e_set_priv_flags(struct n
         struct i40e_netdev_priv *np = netdev_priv(dev);
         struct i40e_vsi *vsi = np->vsi;
         struct i40e_pf *pf = vsi->back;
-       u64 orig_flags, new_flags, changed_flags;
+       u32 orig_flags, new_flags, changed_flags;
         u32 i, j;
   
         orig_flags = READ_ONCE(pf->flags);
@@@ -4142,12 -4310,12 +4310,12 @@@ flags_complete
                 return -EOPNOTSUPP;
   
         /* Compare and exchange the new flags into place. If we failed, that
-        * is if cmpxchg64 returns anything but the old value, this means that
+        * is if cmpxchg returns anything but the old value, this means that
          * something else has modified the flags variable since we copied it
          * originally. We'll just punt with an error and log something in the
          * message buffer.
          */
-       if (cmpxchg64(&pf->flags, orig_flags, new_flags) != orig_flags) {
+       if (cmpxchg(&pf->flags, orig_flags, new_flags) != orig_flags) {
                 dev_warn(&pf->pdev->dev,
                          "Unable to update pf->flags as it was modified by another thread...\n");
                 return -EAGAIN;
@@@ -4175,7 -4343,7 +4343,7 @@@
                         sw_flags = I40E_AQ_SET_SWITCH_CFG_PROMISC;
                 valid_flags = I40E_AQ_SET_SWITCH_CFG_PROMISC;
                 ret = i40e_aq_set_switch_config(&pf->hw, sw_flags, valid_flags,
-                                               NULL);
+                                               0, NULL);
                 if (ret && pf->hw.aq.asq_last_status != I40E_AQ_RC_ESRCH) {
                         dev_info(&pf->pdev->dev,
                                  "couldn't set switch config bits, err %s aq_err %s\n",
@@@ -4189,13 -4357,166 +4357,166 @@@
         /* Issue reset to cause things to take effect, as additional bits
          * are added we will need to create a mask of bits requiring reset
          */
-       if ((changed_flags & I40E_FLAG_VEB_STATS_ENABLED) ||
-           ((changed_flags & I40E_FLAG_LEGACY_RX) && netif_running(dev)))
+       if (changed_flags & (I40E_FLAG_VEB_STATS_ENABLED |
+                            I40E_FLAG_LEGACY_RX |
+                            I40E_FLAG_SOURCE_PRUNING_DISABLED))
                 i40e_do_reset(pf, BIT(__I40E_PF_RESET_REQUESTED), true);
   
         return 0;
   }
   
+ /**
+  * i40e_get_module_info - get (Q)SFP+ module type info
+  * @netdev: network interface device structure
+  * @modinfo: module EEPROM size and layout information structure
+  **/
+ static int i40e_get_module_info(struct net_device *netdev,
+                               struct ethtool_modinfo *modinfo)
+ {
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_vsi *vsi = np->vsi;
+       struct i40e_pf *pf = vsi->back;
+       struct i40e_hw *hw = &pf->hw;
+       u32 sff8472_comp = 0;
+       u32 sff8472_swap = 0;
+       u32 sff8636_rev = 0;
+       i40e_status status;
+       u32 type = 0;
+ 
+       /* Check if firmware supports reading module EEPROM. */
+       if (!(hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE)) {
+               netdev_err(vsi->netdev, "Module EEPROM memory read not supported. Please update the NVM image.\n");
+               return -EINVAL;
+       }
+ 
+       status = i40e_update_link_info(hw);
+       if (status)
+               return -EIO;
+ 
+       if (hw->phy.link_info.phy_type == I40E_PHY_TYPE_EMPTY) {
+               netdev_err(vsi->netdev, "Cannot read module EEPROM memory. No module connected.\n");
+               return -EINVAL;
+       }
+ 
+       type = hw->phy.link_info.module_type[0];
+ 
+       switch (type) {
+       case I40E_MODULE_TYPE_SFP:
+               status = i40e_aq_get_phy_register(hw,
+                               I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE,
+                               I40E_I2C_EEPROM_DEV_ADDR,
+                               I40E_MODULE_SFF_8472_COMP,
+                               &sff8472_comp, NULL);
+               if (status)
+                       return -EIO;
+ 
+               status = i40e_aq_get_phy_register(hw,
+                               I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE,
+                               I40E_I2C_EEPROM_DEV_ADDR,
+                               I40E_MODULE_SFF_8472_SWAP,
+                               &sff8472_swap, NULL);
+               if (status)
+                       return -EIO;
+ 
+               /* Check if the module requires address swap to access
+                * the other EEPROM memory page.
+                */
+               if (sff8472_swap & I40E_MODULE_SFF_ADDR_MODE) {
+                       netdev_warn(vsi->netdev, "Module address swap to access page 0xA2 is not supported.\n");
+                       modinfo->type = ETH_MODULE_SFF_8079;
+                       modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
+               } else if (sff8472_comp == 0x00) {
+                       /* Module is not SFF-8472 compliant */
+                       modinfo->type = ETH_MODULE_SFF_8079;
+                       modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
+               } else {
+                       modinfo->type = ETH_MODULE_SFF_8472;
+                       modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+               }
+               break;
+       case I40E_MODULE_TYPE_QSFP_PLUS:
+               /* Read from memory page 0. */
+               status = i40e_aq_get_phy_register(hw,
+                               I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE,
+                               0,
+                               I40E_MODULE_REVISION_ADDR,
+                               &sff8636_rev, NULL);
+               if (status)
+                       return -EIO;
+               /* Determine revision compliance byte */
+               if (sff8636_rev > 0x02) {
+                       /* Module is SFF-8636 compliant */
+                       modinfo->type = ETH_MODULE_SFF_8636;
+                       modinfo->eeprom_len = I40E_MODULE_QSFP_MAX_LEN;
+               } else {
+                       modinfo->type = ETH_MODULE_SFF_8436;
+                       modinfo->eeprom_len = I40E_MODULE_QSFP_MAX_LEN;
+               }
+               break;
+       case I40E_MODULE_TYPE_QSFP28:
+               modinfo->type = ETH_MODULE_SFF_8636;
+               modinfo->eeprom_len = I40E_MODULE_QSFP_MAX_LEN;
+               break;
+       default:
+               netdev_err(vsi->netdev, "Module type unrecognized\n");
+               return -EINVAL;
+       }
+       return 0;
+ }
+ 
+ /**
+  * i40e_get_module_eeprom - fills buffer with (Q)SFP+ module memory contents
+  * @netdev: network interface device structure
+  * @ee: EEPROM dump request structure
+  * @data: buffer to be filled with EEPROM contents
+  **/
+ static int i40e_get_module_eeprom(struct net_device *netdev,
+                                 struct ethtool_eeprom *ee,
+                                 u8 *data)
+ {
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_vsi *vsi = np->vsi;
+       struct i40e_pf *pf = vsi->back;
+       struct i40e_hw *hw = &pf->hw;
+       bool is_sfp = false;
+       i40e_status status;
+       u32 value = 0;
+       int i;
+ 
+       if (!ee || !ee->len || !data)
+               return -EINVAL;
+ 
+       if (hw->phy.link_info.module_type[0] == I40E_MODULE_TYPE_SFP)
+               is_sfp = true;
+ 
+       for (i = 0; i < ee->len; i++) {
+               u32 offset = i + ee->offset;
+               u32 addr = is_sfp ? I40E_I2C_EEPROM_DEV_ADDR : 0;
+ 
+               /* Check if we need to access the other memory page */
+               if (is_sfp) {
+                       if (offset >= ETH_MODULE_SFF_8079_LEN) {
+                               offset -= ETH_MODULE_SFF_8079_LEN;
+                               addr = I40E_I2C_EEPROM_DEV_ADDR2;
+                       }
+               } else {
+                       while (offset >= ETH_MODULE_SFF_8436_LEN) {
+                               /* Compute memory page number and offset. */
+                               offset -= ETH_MODULE_SFF_8436_LEN / 2;
+                               addr++;
+                       }
+               }
+ 
+               status = i40e_aq_get_phy_register(hw,
+                               I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE,
+                               addr, offset, &value, NULL);
+               if (status)
+                       return -EIO;
+               data[i] = value;
+       }
+       return 0;
+ }
+ 
   static const struct ethtool_ops i40e_ethtool_ops = {
         .get_drvinfo            = i40e_get_drvinfo,
         .get_regs_len           = i40e_get_regs_len,
@@@ -4228,6 -4549,8 +4549,8 @@@
         .set_rxfh               = i40e_set_rxfh,
         .get_channels           = i40e_get_channels,
         .set_channels           = i40e_set_channels,
+       .get_module_info        = i40e_get_module_info,
+       .get_module_eeprom      = i40e_get_module_eeprom,
         .get_ts_info            = i40e_get_ts_info,
         .get_priv_flags         = i40e_get_priv_flags,
         .set_priv_flags         = i40e_set_priv_flags,
diff --combined drivers/net/ethernet/intel/i40e/i40e_main.c

index de1fcac7834de30173109b38001554b267e697d3,17e6f64299cf94747561fa5fb260faf967a18004..4a964d6e4a9ebcdb7b55b157bb9b6006a5fd2aa8
--- 1/drivers/net/ethernet/intel/i40e/i40e_main.c
--- 2/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@@ -69,6 -69,15 +69,15 @@@ static int i40e_reset(struct i40e_pf *p
   static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired);
   static void i40e_fdir_sb_setup(struct i40e_pf *pf);
   static int i40e_veb_get_bw_info(struct i40e_veb *veb);
+ static int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,
+                                    struct i40e_cloud_filter *filter,
+                                    bool add);
+ static int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
+                                            struct i40e_cloud_filter *filter,
+                                            bool add);
+ static int i40e_get_capabilities(struct i40e_pf *pf,
+                                enum i40e_admin_queue_opc list_type);
+ 
   
   /* i40e_pci_tbl - PCI Device ID Table
    *
@@@ -455,7 -464,7 +464,7 @@@ static void i40e_get_netdev_stats_struc
                 u64 bytes, packets;
                 unsigned int start;
   
- -              tx_ring = ACCESS_ONCE(vsi->tx_rings[i]);
+ +              tx_ring = READ_ONCE(vsi->tx_rings[i]);
                 if (!tx_ring)
                         continue;
                 i40e_get_netdev_stats_struct_tx(tx_ring, stats);
@@@ -599,6 -608,20 +608,20 @@@ static void i40e_stat_update32(struct i
                 *stat = (u32)((new_data + BIT_ULL(32)) - *offset);
   }
   
+ /**
+  * i40e_stat_update_and_clear32 - read and clear hw reg, update a 32 bit stat
+  * @hw: ptr to the hardware info
+  * @reg: the hw reg to read and clear
+  * @stat: ptr to the stat
+  **/
+ static void i40e_stat_update_and_clear32(struct i40e_hw *hw, u32 reg, u64 *stat)
+ {
+       u32 new_data = rd32(hw, reg);
+ 
+       wr32(hw, reg, 1); /* must write a nonzero value to clear register */
+       *stat += new_data;
+ }
+ 
   /**
    * i40e_update_eth_stats - Update VSI-specific ethernet statistics counters.
    * @vsi: the VSI to be updated
@@@ -791,7 -814,7 +814,7 @@@ static void i40e_update_vsi_stats(struc
         rcu_read_lock();
         for (q = 0; q < vsi->num_queue_pairs; q++) {
                 /* locate Tx ring */
- -              p = ACCESS_ONCE(vsi->tx_rings[q]);
+ +              p = READ_ONCE(vsi->tx_rings[q]);
   
                 do {
                         start = u64_stats_fetch_begin_irq(&p->syncp);
@@@ -1040,18 -1063,15 +1063,15 @@@ static void i40e_update_pf_stats(struc
                            &osd->rx_jabber, &nsd->rx_jabber);
   
         /* FDIR stats */
-       i40e_stat_update32(hw,
-                          I40E_GLQF_PCNT(I40E_FD_ATR_STAT_IDX(pf->hw.pf_id)),
-                          pf->stat_offsets_loaded,
-                          &osd->fd_atr_match, &nsd->fd_atr_match);
-       i40e_stat_update32(hw,
-                          I40E_GLQF_PCNT(I40E_FD_SB_STAT_IDX(pf->hw.pf_id)),
-                          pf->stat_offsets_loaded,
-                          &osd->fd_sb_match, &nsd->fd_sb_match);
-       i40e_stat_update32(hw,
-                     I40E_GLQF_PCNT(I40E_FD_ATR_TUNNEL_STAT_IDX(pf->hw.pf_id)),
-                     pf->stat_offsets_loaded,
-                     &osd->fd_atr_tunnel_match, &nsd->fd_atr_tunnel_match);
+       i40e_stat_update_and_clear32(hw,
+                       I40E_GLQF_PCNT(I40E_FD_ATR_STAT_IDX(hw->pf_id)),
+                       &nsd->fd_atr_match);
+       i40e_stat_update_and_clear32(hw,
+                       I40E_GLQF_PCNT(I40E_FD_SB_STAT_IDX(hw->pf_id)),
+                       &nsd->fd_sb_match);
+       i40e_stat_update_and_clear32(hw,
+                       I40E_GLQF_PCNT(I40E_FD_ATR_TUNNEL_STAT_IDX(hw->pf_id)),
+                       &nsd->fd_atr_tunnel_match);
   
         val = rd32(hw, I40E_PRTPM_EEE_STAT);
         nsd->tx_lpi_status =
@@@ -1577,6 -1597,170 +1597,170 @@@ static int i40e_set_mac(struct net_devi
         return 0;
   }
   
+ /**
+  * i40e_config_rss_aq - Prepare for RSS using AQ commands
+  * @vsi: vsi structure
+  * @seed: RSS hash seed
+  **/
+ static int i40e_config_rss_aq(struct i40e_vsi *vsi, const u8 *seed,
+                             u8 *lut, u16 lut_size)
+ {
+       struct i40e_pf *pf = vsi->back;
+       struct i40e_hw *hw = &pf->hw;
+       int ret = 0;
+ 
+       if (seed) {
+               struct i40e_aqc_get_set_rss_key_data *seed_dw =
+                       (struct i40e_aqc_get_set_rss_key_data *)seed;
+               ret = i40e_aq_set_rss_key(hw, vsi->id, seed_dw);
+               if (ret) {
+                       dev_info(&pf->pdev->dev,
+                                "Cannot set RSS key, err %s aq_err %s\n",
+                                i40e_stat_str(hw, ret),
+                                i40e_aq_str(hw, hw->aq.asq_last_status));
+                       return ret;
+               }
+       }
+       if (lut) {
+               bool pf_lut = vsi->type == I40E_VSI_MAIN ? true : false;
+ 
+               ret = i40e_aq_set_rss_lut(hw, vsi->id, pf_lut, lut, lut_size);
+               if (ret) {
+                       dev_info(&pf->pdev->dev,
+                                "Cannot set RSS lut, err %s aq_err %s\n",
+                                i40e_stat_str(hw, ret),
+                                i40e_aq_str(hw, hw->aq.asq_last_status));
+                       return ret;
+               }
+       }
+       return ret;
+ }
+ 
+ /**
+  * i40e_vsi_config_rss - Prepare for VSI(VMDq) RSS if used
+  * @vsi: VSI structure
+  **/
+ static int i40e_vsi_config_rss(struct i40e_vsi *vsi)
+ {
+       struct i40e_pf *pf = vsi->back;
+       u8 seed[I40E_HKEY_ARRAY_SIZE];
+       u8 *lut;
+       int ret;
+ 
+       if (!(pf->hw_features & I40E_HW_RSS_AQ_CAPABLE))
+               return 0;
+       if (!vsi->rss_size)
+               vsi->rss_size = min_t(int, pf->alloc_rss_size,
+                                     vsi->num_queue_pairs);
+       if (!vsi->rss_size)
+               return -EINVAL;
+       lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
+       if (!lut)
+               return -ENOMEM;
+ 
+       /* Use the user configured hash keys and lookup table if there is one,
+        * otherwise use default
+        */
+       if (vsi->rss_lut_user)
+               memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size);
+       else
+               i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size);
+       if (vsi->rss_hkey_user)
+               memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE);
+       else
+               netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE);
+       ret = i40e_config_rss_aq(vsi, seed, lut, vsi->rss_table_size);
+       kfree(lut);
+       return ret;
+ }
+ 
+ /**
+  * i40e_vsi_setup_queue_map_mqprio - Prepares mqprio based tc_config
+  * @vsi: the VSI being configured,
+  * @ctxt: VSI context structure
+  * @enabled_tc: number of traffic classes to enable
+  *
+  * Prepares VSI tc_config to have queue configurations based on MQPRIO options.
+  **/
+ static int i40e_vsi_setup_queue_map_mqprio(struct i40e_vsi *vsi,
+                                          struct i40e_vsi_context *ctxt,
+                                          u8 enabled_tc)
+ {
+       u16 qcount = 0, max_qcount, qmap, sections = 0;
+       int i, override_q, pow, num_qps, ret;
+       u8 netdev_tc = 0, offset = 0;
+ 
+       if (vsi->type != I40E_VSI_MAIN)
+               return -EINVAL;
+       sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID;
+       sections |= I40E_AQ_VSI_PROP_SCHED_VALID;
+       vsi->tc_config.numtc = vsi->mqprio_qopt.qopt.num_tc;
+       vsi->tc_config.enabled_tc = enabled_tc ? enabled_tc : 1;
+       num_qps = vsi->mqprio_qopt.qopt.count[0];
+ 
+       /* find the next higher power-of-2 of num queue pairs */
+       pow = ilog2(num_qps);
+       if (!is_power_of_2(num_qps))
+               pow++;
+       qmap = (offset << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) |
+               (pow << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT);
+ 
+       /* Setup queue offset/count for all TCs for given VSI */
+       max_qcount = vsi->mqprio_qopt.qopt.count[0];
+       for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+               /* See if the given TC is enabled for the given VSI */
+               if (vsi->tc_config.enabled_tc & BIT(i)) {
+                       offset = vsi->mqprio_qopt.qopt.offset[i];
+                       qcount = vsi->mqprio_qopt.qopt.count[i];
+                       if (qcount > max_qcount)
+                               max_qcount = qcount;
+                       vsi->tc_config.tc_info[i].qoffset = offset;
+                       vsi->tc_config.tc_info[i].qcount = qcount;
+                       vsi->tc_config.tc_info[i].netdev_tc = netdev_tc++;
+               } else {
+                       /* TC is not enabled so set the offset to
+                        * default queue and allocate one queue
+                        * for the given TC.
+                        */
+                       vsi->tc_config.tc_info[i].qoffset = 0;
+                       vsi->tc_config.tc_info[i].qcount = 1;
+                       vsi->tc_config.tc_info[i].netdev_tc = 0;
+               }
+       }
+ 
+       /* Set actual Tx/Rx queue pairs */
+       vsi->num_queue_pairs = offset + qcount;
+ 
+       /* Setup queue TC[0].qmap for given VSI context */
+       ctxt->info.tc_mapping[0] = cpu_to_le16(qmap);
+       ctxt->info.mapping_flags |= cpu_to_le16(I40E_AQ_VSI_QUE_MAP_CONTIG);
+       ctxt->info.queue_mapping[0] = cpu_to_le16(vsi->base_queue);
+       ctxt->info.valid_sections |= cpu_to_le16(sections);
+ 
+       /* Reconfigure RSS for main VSI with max queue count */
+       vsi->rss_size = max_qcount;
+       ret = i40e_vsi_config_rss(vsi);
+       if (ret) {
+               dev_info(&vsi->back->pdev->dev,
+                        "Failed to reconfig rss for num_queues (%u)\n",
+                        max_qcount);
+               return ret;
+       }
+       vsi->reconfig_rss = true;
+       dev_dbg(&vsi->back->pdev->dev,
+               "Reconfigured rss with num_queues (%u)\n", max_qcount);
+ 
+       /* Find queue count available for channel VSIs and starting offset
+        * for channel VSIs
+        */
+       override_q = vsi->mqprio_qopt.qopt.count[0];
+       if (override_q && override_q < vsi->num_queue_pairs) {
+               vsi->cnt_q_avail = vsi->num_queue_pairs - override_q;
+               vsi->next_base_queue = override_q;
+       }
+       return 0;
+ }
+ 
   /**
    * i40e_vsi_setup_queue_map - Setup a VSI queue map based on enabled_tc
    * @vsi: the VSI being setup
@@@ -1615,7 -1799,7 +1799,7 @@@ static void i40e_vsi_setup_queue_map(st
                         numtc = 1;
                 }
         } else {
-               /* At least TC0 is enabled in case of non-DCB case */
+               /* At least TC0 is enabled in non-DCB, non-MQPRIO case */
                 numtc = 1;
         }
   
@@@ -1765,11 -1949,6 +1949,6 @@@ static void i40e_set_rx_mode(struct net
                 vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
                 vsi->back->flags |= I40E_FLAG_FILTER_SYNC;
         }
- 
-       /* schedule our worker thread which will take care of
-        * applying the new filter changes
-        */
-       i40e_service_event_schedule(vsi->back);
   }
   
   /**
@@@ -2873,22 -3052,18 +3052,18 @@@ static void i40e_vsi_free_rx_resources(
    **/
   static void i40e_config_xps_tx_ring(struct i40e_ring *ring)
   {
-       struct i40e_vsi *vsi = ring->vsi;
+       int cpu;
   
-       if (!ring->q_vector || !ring->netdev)
+       if (!ring->q_vector || !ring->netdev || ring->ch)
                 return;
   
-       if ((vsi->tc_config.numtc <= 1) &&
-           !test_and_set_bit(__I40E_TX_XPS_INIT_DONE, &ring->state)) {
-               netif_set_xps_queue(ring->netdev,
-                                   get_cpu_mask(ring->q_vector->v_idx),
-                                   ring->queue_index);
-       }
+       /* We only initialize XPS once, so as not to overwrite user settings */
+       if (test_and_set_bit(__I40E_TX_XPS_INIT_DONE, ring->state))
+               return;
   
-       /* schedule our worker thread which will take care of
-        * applying the new filter changes
-        */
-       i40e_service_event_schedule(vsi->back);
+       cpu = cpumask_local_spread(ring->q_vector->v_idx, -1);
+       netif_set_xps_queue(ring->netdev, get_cpu_mask(cpu),
+                           ring->queue_index);
   }
   
   /**
@@@ -2942,7 -3117,14 +3117,14 @@@ static int i40e_configure_tx_ring(struc
          * initialization. This has to be done regardless of
          * DCB as by default everything is mapped to TC0.
          */
-       tx_ctx.rdylist = le16_to_cpu(vsi->info.qs_handle[ring->dcb_tc]);
+ 
+       if (ring->ch)
+               tx_ctx.rdylist =
+                       le16_to_cpu(ring->ch->info.qs_handle[ring->dcb_tc]);
+ 
+       else
+               tx_ctx.rdylist = le16_to_cpu(vsi->info.qs_handle[ring->dcb_tc]);
+ 
         tx_ctx.rdylist_act = 0;
   
         /* clear the context in the HMC */
@@@ -2964,12 -3146,23 +3146,23 @@@
         }
   
         /* Now associate this queue with this PCI function */
-       if (vsi->type == I40E_VSI_VMDQ2) {
-               qtx_ctl = I40E_QTX_CTL_VM_QUEUE;
-               qtx_ctl |= ((vsi->id) << I40E_QTX_CTL_VFVM_INDX_SHIFT) &
-                          I40E_QTX_CTL_VFVM_INDX_MASK;
+       if (ring->ch) {
+               if (ring->ch->type == I40E_VSI_VMDQ2)
+                       qtx_ctl = I40E_QTX_CTL_VM_QUEUE;
+               else
+                       return -EINVAL;
+ 
+               qtx_ctl |= (ring->ch->vsi_number <<
+                           I40E_QTX_CTL_VFVM_INDX_SHIFT) &
+                           I40E_QTX_CTL_VFVM_INDX_MASK;
         } else {
-               qtx_ctl = I40E_QTX_CTL_PF_QUEUE;
+               if (vsi->type == I40E_VSI_VMDQ2) {
+                       qtx_ctl = I40E_QTX_CTL_VM_QUEUE;
+                       qtx_ctl |= ((vsi->id) << I40E_QTX_CTL_VFVM_INDX_SHIFT) &
+                                   I40E_QTX_CTL_VFVM_INDX_MASK;
+               } else {
+                       qtx_ctl = I40E_QTX_CTL_PF_QUEUE;
+               }
         }
   
         qtx_ctl |= ((hw->pf_id << I40E_QTX_CTL_PF_INDX_SHIFT) &
@@@ -2998,7 -3191,7 +3191,7 @@@ static int i40e_configure_rx_ring(struc
         struct i40e_hmc_obj_rxq rx_ctx;
         i40e_status err = 0;
   
-       ring->state = 0;
+       bitmap_zero(ring->state, __I40E_RING_STATE_NBITS);
   
         /* clear the context structure first */
         memset(&rx_ctx, 0, sizeof(rx_ctx));
@@@ -3023,7 -3216,7 +3216,7 @@@
         if (hw->revision_id == 0)
                 rx_ctx.lrxqthresh = 0;
         else
-               rx_ctx.lrxqthresh = 2;
+               rx_ctx.lrxqthresh = 1;
         rx_ctx.crcstrip = 1;
         rx_ctx.l2tsel = 1;
         /* this controls whether VLAN is stripped from inner headers */
@@@ -3138,6 -3331,7 +3331,7 @@@ static void i40e_vsi_config_dcb_rings(s
                         rx_ring->dcb_tc = 0;
                         tx_ring->dcb_tc = 0;
                 }
+               return;
         }
   
         for (n = 0; n < I40E_MAX_TRAFFIC_CLASS; n++) {
@@@ -3396,15 -3590,14 +3590,14 @@@ void i40e_irq_dynamic_disable_icr0(stru
   /**
    * i40e_irq_dynamic_enable_icr0 - Enable default interrupt generation for icr0
    * @pf: board private structure
-  * @clearpba: true when all pending interrupt events should be cleared
    **/
- void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf, bool clearpba)
+ void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf)
   {
         struct i40e_hw *hw = &pf->hw;
         u32 val;
   
         val = I40E_PFINT_DYN_CTL0_INTENA_MASK   |
-             (clearpba ? I40E_PFINT_DYN_CTL0_CLEARPBA_MASK : 0) |
+             I40E_PFINT_DYN_CTL0_CLEARPBA_MASK |
               (I40E_ITR_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT);
   
         wr32(hw, I40E_PFINT_DYN_CTL0, val);
@@@ -3471,6 -3664,7 +3664,7 @@@ static int i40e_vsi_request_irq_msix(st
         int tx_int_idx = 0;
         int vector, err;
         int irq_num;
+       int cpu;
   
         for (vector = 0; vector < q_vectors; vector++) {
                 struct i40e_q_vector *q_vector = vsi->q_vectors[vector];
@@@ -3506,10 -3700,14 +3700,14 @@@
                 q_vector->affinity_notify.notify = i40e_irq_affinity_notify;
                 q_vector->affinity_notify.release = i40e_irq_affinity_release;
                 irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify);
-               /* get_cpu_mask returns a static constant mask with
-                * a permanent lifetime so it's ok to use here.
+               /* Spread affinity hints out across online CPUs.
+                *
+                * get_cpu_mask returns a static constant mask with
+                * a permanent lifetime so it's ok to pass to
+                * irq_set_affinity_hint without making a copy.
                  */
-               irq_set_affinity_hint(irq_num, get_cpu_mask(q_vector->v_idx));
+               cpu = cpumask_local_spread(q_vector->v_idx, -1);
+               irq_set_affinity_hint(irq_num, get_cpu_mask(cpu));
         }
   
         vsi->irqs_ready = true;
@@@ -3585,7 -3783,7 +3783,7 @@@ static int i40e_vsi_enable_irq(struct i
                 for (i = 0; i < vsi->num_q_vectors; i++)
                         i40e_irq_dynamic_enable(vsi, i);
         } else {
-               i40e_irq_dynamic_enable_icr0(pf, true);
+               i40e_irq_dynamic_enable_icr0(pf);
         }
   
         i40e_flush(&pf->hw);
@@@ -3593,14 -3791,20 +3791,20 @@@
   }
   
   /**
-  * i40e_stop_misc_vector - Stop the vector that handles non-queue events
+  * i40e_free_misc_vector - Free the vector that handles non-queue events
    * @pf: board private structure
    **/
- static void i40e_stop_misc_vector(struct i40e_pf *pf)
+ static void i40e_free_misc_vector(struct i40e_pf *pf)
   {
         /* Disable ICR 0 */
         wr32(&pf->hw, I40E_PFINT_ICR0_ENA, 0);
         i40e_flush(&pf->hw);
+ 
+       if (pf->flags & I40E_FLAG_MSIX_ENABLED && pf->msix_entries) {
+               synchronize_irq(pf->msix_entries[0].vector);
+               free_irq(pf->msix_entries[0].vector, pf);
+               clear_bit(__I40E_MISC_IRQ_REQUESTED, pf->state);
+       }
   }
   
   /**
@@@ -3728,7 -3932,7 +3932,7 @@@ enable_intr
         wr32(hw, I40E_PFINT_ICR0_ENA, ena_mask);
         if (!test_bit(__I40E_DOWN, pf->state)) {
                 i40e_service_event_schedule(pf);
-               i40e_irq_dynamic_enable_icr0(pf, false);
+               i40e_irq_dynamic_enable_icr0(pf);
         }
   
         return ret;
@@@ -4455,11 -4659,7 +4659,7 @@@ static void i40e_clear_interrupt_scheme
   {
         int i;
   
-       i40e_stop_misc_vector(pf);
-       if (pf->flags & I40E_FLAG_MSIX_ENABLED && pf->msix_entries) {
-               synchronize_irq(pf->msix_entries[0].vector);
-               free_irq(pf->msix_entries[0].vector, pf);
-       }
+       i40e_free_misc_vector(pf);
   
         i40e_put_lump(pf->irq_pile, pf->iwarp_base_vector,
                       I40E_IWARP_IRQ_PILE_ID);
@@@ -4847,6 -5047,24 +5047,24 @@@ static u8 i40e_dcb_get_enabled_tc(struc
         return enabled_tc;
   }
   
+ /**
+  * i40e_mqprio_get_enabled_tc - Get enabled traffic classes
+  * @pf: PF being queried
+  *
+  * Query the current MQPRIO configuration and return the number of
+  * traffic classes enabled.
+  **/
+ static u8 i40e_mqprio_get_enabled_tc(struct i40e_pf *pf)
+ {
+       struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
+       u8 num_tc = vsi->mqprio_qopt.qopt.num_tc;
+       u8 enabled_tc = 1, i;
+ 
+       for (i = 1; i < num_tc; i++)
+               enabled_tc |= BIT(i);
+       return enabled_tc;
+ }
+ 
   /**
    * i40e_pf_get_num_tc - Get enabled traffic classes for PF
    * @pf: PF being queried
@@@ -4860,7 -5078,10 +5078,10 @@@ static u8 i40e_pf_get_num_tc(struct i40
         u8 num_tc = 0;
         struct i40e_dcbx_config *dcbcfg = &hw->local_dcbx_config;
   
-       /* If DCB is not enabled then always in single TC */
+       if (pf->flags & I40E_FLAG_TC_MQPRIO)
+               return pf->vsi[pf->lan_vsi]->mqprio_qopt.qopt.num_tc;
+ 
+       /* If neither MQPRIO nor DCB is enabled, then always use single TC */
         if (!(pf->flags & I40E_FLAG_DCB_ENABLED))
                 return 1;
   
@@@ -4889,7 -5110,12 +5110,12 @@@
    **/
   static u8 i40e_pf_get_tc_map(struct i40e_pf *pf)
   {
-       /* If DCB is not enabled for this PF then just return default TC */
+       if (pf->flags & I40E_FLAG_TC_MQPRIO)
+               return i40e_mqprio_get_enabled_tc(pf);
+ 
+       /* If neither MQPRIO nor DCB is enabled for this PF then just return
+        * default TC
+        */
         if (!(pf->flags & I40E_FLAG_DCB_ENABLED))
                 return I40E_DEFAULT_TRAFFIC_CLASS;
   
@@@ -4979,6 -5205,16 +5205,16 @@@ static int i40e_vsi_configure_bw_alloc(
         i40e_status ret;
         int i;
   
+       if (vsi->back->flags & I40E_FLAG_TC_MQPRIO)
+               return 0;
+       if (!vsi->mqprio_qopt.qopt.hw) {
+               ret = i40e_set_bw_limit(vsi, vsi->seid, 0);
+               if (ret)
+                       dev_info(&vsi->back->pdev->dev,
+                                "Failed to reset tx rate for vsi->seid %u\n",
+                                vsi->seid);
+               return ret;
+       }
         bw_data.tc_valid_bits = enabled_tc;
         for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
                 bw_data.tc_bw_credits[i] = bw_share[i];
@@@ -5041,6 -5277,9 +5277,9 @@@ static void i40e_vsi_config_netdev_tc(s
                                         vsi->tc_config.tc_info[i].qoffset);
         }
   
+       if (pf->flags & I40E_FLAG_TC_MQPRIO)
+               return;
+ 
         /* Assign UP2TC map for the VSI */
         for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
                 /* Get the actual TC# for the UP */
@@@ -5091,7 -5330,8 +5330,8 @@@ static int i40e_vsi_config_tc(struct i4
         int i;
   
         /* Check if enabled_tc is same as existing or new TCs */
-       if (vsi->tc_config.enabled_tc == enabled_tc)
+       if (vsi->tc_config.enabled_tc == enabled_tc &&
+           vsi->mqprio_qopt.mode != TC_MQPRIO_MODE_CHANNEL)
                 return ret;
   
         /* Enable ETS TCs with equal BW Share for now across all VSIs */
@@@ -5114,15 -5354,37 +5354,37 @@@
         ctxt.vf_num = 0;
         ctxt.uplink_seid = vsi->uplink_seid;
         ctxt.info = vsi->info;
-       i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, false);
+       if (vsi->back->flags & I40E_FLAG_TC_MQPRIO) {
+               ret = i40e_vsi_setup_queue_map_mqprio(vsi, &ctxt, enabled_tc);
+               if (ret)
+                       goto out;
+       } else {
+               i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, false);
+       }
   
+       /* On destroying the qdisc, reset vsi->rss_size, as number of enabled
+        * queues changed.
+        */
+       if (!vsi->mqprio_qopt.qopt.hw && vsi->reconfig_rss) {
+               vsi->rss_size = min_t(int, vsi->back->alloc_rss_size,
+                                     vsi->num_queue_pairs);
+               ret = i40e_vsi_config_rss(vsi);
+               if (ret) {
+                       dev_info(&vsi->back->pdev->dev,
+                                "Failed to reconfig rss for num_queues\n");
+                       return ret;
+               }
+               vsi->reconfig_rss = false;
+       }
         if (vsi->back->flags & I40E_FLAG_IWARP_ENABLED) {
                 ctxt.info.valid_sections |=
                                 cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID);
                 ctxt.info.queueing_opt_flags |= I40E_AQ_VSI_QUE_OPT_TCP_ENA;
         }
   
-       /* Update the VSI after updating the VSI queue-mapping information */
+       /* Update the VSI after updating the VSI queue-mapping
+        * information
+        */
         ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL);
         if (ret) {
                 dev_info(&vsi->back->pdev->dev,
@@@ -5154,482 -5416,2147 +5416,2147 @@@ out
   }
   
   /**
-  * i40e_veb_config_tc - Configure TCs for given VEB
-  * @veb: given VEB
-  * @enabled_tc: TC bitmap
+  * i40e_get_link_speed - Returns link speed for the interface
+  * @vsi: VSI to be configured
    *
-  * Configures given TC bitmap for VEB (switching) element
    **/
- int i40e_veb_config_tc(struct i40e_veb *veb, u8 enabled_tc)
+ int i40e_get_link_speed(struct i40e_vsi *vsi)
   {
-       struct i40e_aqc_configure_switching_comp_bw_config_data bw_data = {0};
-       struct i40e_pf *pf = veb->pf;
-       int ret = 0;
-       int i;
- 
-       /* No TCs or already enabled TCs just return */
-       if (!enabled_tc || veb->enabled_tc == enabled_tc)
-               return ret;
- 
-       bw_data.tc_valid_bits = enabled_tc;
-       /* bw_data.absolute_credits is not set (relative) */
- 
-       /* Enable ETS TCs with equal BW Share for now */
-       for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
-               if (enabled_tc & BIT(i))
-                       bw_data.tc_bw_share_credits[i] = 1;
-       }
- 
-       ret = i40e_aq_config_switch_comp_bw_config(&pf->hw, veb->seid,
-                                                  &bw_data, NULL);
-       if (ret) {
-               dev_info(&pf->pdev->dev,
-                        "VEB bw config failed, err %s aq_err %s\n",
-                        i40e_stat_str(&pf->hw, ret),
-                        i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
-               goto out;
-       }
+       struct i40e_pf *pf = vsi->back;
   
-       /* Update the BW information */
-       ret = i40e_veb_get_bw_info(veb);
-       if (ret) {
-               dev_info(&pf->pdev->dev,
-                        "Failed getting veb bw config, err %s aq_err %s\n",
-                        i40e_stat_str(&pf->hw, ret),
-                        i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+       switch (pf->hw.phy.link_info.link_speed) {
+       case I40E_LINK_SPEED_40GB:
+               return 40000;
+       case I40E_LINK_SPEED_25GB:
+               return 25000;
+       case I40E_LINK_SPEED_20GB:
+               return 20000;
+       case I40E_LINK_SPEED_10GB:
+               return 10000;
+       case I40E_LINK_SPEED_1GB:
+               return 1000;
+       default:
+               return -EINVAL;
         }
- 
- out:
-       return ret;
   }
   
- #ifdef CONFIG_I40E_DCB
   /**
-  * i40e_dcb_reconfigure - Reconfigure all VEBs and VSIs
-  * @pf: PF struct
+  * i40e_set_bw_limit - setup BW limit for Tx traffic based on max_tx_rate
+  * @vsi: VSI to be configured
+  * @seid: seid of the channel/VSI
+  * @max_tx_rate: max TX rate to be configured as BW limit
    *
-  * Reconfigure VEB/VSIs on a given PF; it is assumed that
-  * the caller would've quiesce all the VSIs before calling
-  * this function
+  * Helper function to set BW limit for a given VSI
    **/
- static void i40e_dcb_reconfigure(struct i40e_pf *pf)
+ int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate)
   {
-       u8 tc_map = 0;
-       int ret;
-       u8 v;
+       struct i40e_pf *pf = vsi->back;
+       u64 credits = 0;
+       int speed = 0;
+       int ret = 0;
   
-       /* Enable the TCs available on PF to all VEBs */
-       tc_map = i40e_pf_get_tc_map(pf);
-       for (v = 0; v < I40E_MAX_VEB; v++) {
-               if (!pf->veb[v])
+       speed = i40e_get_link_speed(vsi);
+       if (max_tx_rate > speed) {
+               dev_err(&pf->pdev->dev,
+                       "Invalid max tx rate %llu specified for VSI seid %d.",
+                       max_tx_rate, seid);
+               return -EINVAL;
+       }
+       if (max_tx_rate && max_tx_rate < 50) {
+               dev_warn(&pf->pdev->dev,
+                        "Setting max tx rate to minimum usable value of 50Mbps.\n");
+               max_tx_rate = 50;
+       }
+ 
+       /* Tx rate credits are in values of 50Mbps, 0 is disabled */
+       credits = max_tx_rate;
+       do_div(credits, I40E_BW_CREDIT_DIVISOR);
+       ret = i40e_aq_config_vsi_bw_limit(&pf->hw, seid, credits,
+                                         I40E_MAX_BW_INACTIVE_ACCUM, NULL);
+       if (ret)
+               dev_err(&pf->pdev->dev,
+                       "Failed set tx rate (%llu Mbps) for vsi->seid %u, err %s aq_err %s\n",
+                       max_tx_rate, seid, i40e_stat_str(&pf->hw, ret),
+                       i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+       return ret;
+ }
+ 
+ /**
+  * i40e_remove_queue_channels - Remove queue channels for the TCs
+  * @vsi: VSI to be configured
+  *
+  * Remove queue channels for the TCs
+  **/
+ static void i40e_remove_queue_channels(struct i40e_vsi *vsi)
+ {
+       enum i40e_admin_queue_err last_aq_status;
+       struct i40e_cloud_filter *cfilter;
+       struct i40e_channel *ch, *ch_tmp;
+       struct i40e_pf *pf = vsi->back;
+       struct hlist_node *node;
+       int ret, i;
+ 
+       /* Reset rss size that was stored when reconfiguring rss for
+        * channel VSIs with non-power-of-2 queue count.
+        */
+       vsi->current_rss_size = 0;
+ 
+       /* perform cleanup for channels if they exist */
+       if (list_empty(&vsi->ch_list))
+               return;
+ 
+       list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) {
+               struct i40e_vsi *p_vsi;
+ 
+               list_del(&ch->list);
+               p_vsi = ch->parent_vsi;
+               if (!p_vsi || !ch->initialized) {
+                       kfree(ch);
                         continue;
-               ret = i40e_veb_config_tc(pf->veb[v], tc_map);
+               }
+               /* Reset queue contexts */
+               for (i = 0; i < ch->num_queue_pairs; i++) {
+                       struct i40e_ring *tx_ring, *rx_ring;
+                       u16 pf_q;
+ 
+                       pf_q = ch->base_queue + i;
+                       tx_ring = vsi->tx_rings[pf_q];
+                       tx_ring->ch = NULL;
+ 
+                       rx_ring = vsi->rx_rings[pf_q];
+                       rx_ring->ch = NULL;
+               }
+ 
+               /* Reset BW configured for this VSI via mqprio */
+               ret = i40e_set_bw_limit(vsi, ch->seid, 0);
+               if (ret)
+                       dev_info(&vsi->back->pdev->dev,
+                                "Failed to reset tx rate for ch->seid %u\n",
+                                ch->seid);
+ 
+               /* delete cloud filters associated with this channel */
+               hlist_for_each_entry_safe(cfilter, node,
+                                         &pf->cloud_filter_list, cloud_node) {
+                       if (cfilter->seid != ch->seid)
+                               continue;
+ 
+                       hash_del(&cfilter->cloud_node);
+                       if (cfilter->dst_port)
+                               ret = i40e_add_del_cloud_filter_big_buf(vsi,
+                                                                       cfilter,
+                                                                       false);
+                       else
+                               ret = i40e_add_del_cloud_filter(vsi, cfilter,
+                                                               false);
+                       last_aq_status = pf->hw.aq.asq_last_status;
+                       if (ret)
+                               dev_info(&pf->pdev->dev,
+                                        "Failed to delete cloud filter, err %s aq_err %s\n",
+                                        i40e_stat_str(&pf->hw, ret),
+                                        i40e_aq_str(&pf->hw, last_aq_status));
+                       kfree(cfilter);
+               }
+ 
+               /* delete VSI from FW */
+               ret = i40e_aq_delete_element(&vsi->back->hw, ch->seid,
+                                            NULL);
+               if (ret)
+                       dev_err(&vsi->back->pdev->dev,
+                               "unable to remove channel (%d) for parent VSI(%d)\n",
+                               ch->seid, p_vsi->seid);
+               kfree(ch);
+       }
+       INIT_LIST_HEAD(&vsi->ch_list);
+ }
+ 
+ /**
+  * i40e_is_any_channel - channel exist or not
+  * @vsi: ptr to VSI to which channels are associated with
+  *
+  * Returns true or false if channel(s) exist for associated VSI or not
+  **/
+ static bool i40e_is_any_channel(struct i40e_vsi *vsi)
+ {
+       struct i40e_channel *ch, *ch_tmp;
+ 
+       list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) {
+               if (ch->initialized)
+                       return true;
+       }
+ 
+       return false;
+ }
+ 
+ /**
+  * i40e_get_max_queues_for_channel
+  * @vsi: ptr to VSI to which channels are associated with
+  *
+  * Helper function which returns max value among the queue counts set on the
+  * channels/TCs created.
+  **/
+ static int i40e_get_max_queues_for_channel(struct i40e_vsi *vsi)
+ {
+       struct i40e_channel *ch, *ch_tmp;
+       int max = 0;
+ 
+       list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) {
+               if (!ch->initialized)
+                       continue;
+               if (ch->num_queue_pairs > max)
+                       max = ch->num_queue_pairs;
+       }
+ 
+       return max;
+ }
+ 
+ /**
+  * i40e_validate_num_queues - validate num_queues w.r.t channel
+  * @pf: ptr to PF device
+  * @num_queues: number of queues
+  * @vsi: the parent VSI
+  * @reconfig_rss: indicates should the RSS be reconfigured or not
+  *
+  * This function validates number of queues in the context of new channel
+  * which is being established and determines if RSS should be reconfigured
+  * or not for parent VSI.
+  **/
+ static int i40e_validate_num_queues(struct i40e_pf *pf, int num_queues,
+                                   struct i40e_vsi *vsi, bool *reconfig_rss)
+ {
+       int max_ch_queues;
+ 
+       if (!reconfig_rss)
+               return -EINVAL;
+ 
+       *reconfig_rss = false;
+ 
+       if (num_queues > I40E_MAX_QUEUES_PER_CH) {
+               dev_err(&pf->pdev->dev,
+                       "Failed to create VMDq VSI. User requested num_queues (%d) > I40E_MAX_QUEUES_PER_VSI (%u)\n",
+                       num_queues, I40E_MAX_QUEUES_PER_CH);
+               return -EINVAL;
+       }
+ 
+       if (vsi->current_rss_size) {
+               if (num_queues > vsi->current_rss_size) {
+                       dev_dbg(&pf->pdev->dev,
+                               "Error: num_queues (%d) > vsi's current_size(%d)\n",
+                               num_queues, vsi->current_rss_size);
+                       return -EINVAL;
+               } else if ((num_queues < vsi->current_rss_size) &&
+                          (!is_power_of_2(num_queues))) {
+                       dev_dbg(&pf->pdev->dev,
+                               "Error: num_queues (%d) < vsi's current_size(%d), but not power of 2\n",
+                               num_queues, vsi->current_rss_size);
+                       return -EINVAL;
+               }
+       }
+ 
+       if (!is_power_of_2(num_queues)) {
+               /* Find the max num_queues configured for channel if channel
+                * exist.
+                * if channel exist, then enforce 'num_queues' to be more than
+                * max ever queues configured for channel.
+                */
+               max_ch_queues = i40e_get_max_queues_for_channel(vsi);
+               if (num_queues < max_ch_queues) {
+                       dev_dbg(&pf->pdev->dev,
+                               "Error: num_queues (%d) < max queues configured for channel(%d)\n",
+                               num_queues, max_ch_queues);
+                       return -EINVAL;
+               }
+               *reconfig_rss = true;
+       }
+ 
+       return 0;
+ }
+ 
+ /**
+  * i40e_vsi_reconfig_rss - reconfig RSS based on specified rss_size
+  * @vsi: the VSI being setup
+  * @rss_size: size of RSS, accordingly LUT gets reprogrammed
+  *
+  * This function reconfigures RSS by reprogramming LUTs using 'rss_size'
+  **/
+ static int i40e_vsi_reconfig_rss(struct i40e_vsi *vsi, u16 rss_size)
+ {
+       struct i40e_pf *pf = vsi->back;
+       u8 seed[I40E_HKEY_ARRAY_SIZE];
+       struct i40e_hw *hw = &pf->hw;
+       int local_rss_size;
+       u8 *lut;
+       int ret;
+ 
+       if (!vsi->rss_size)
+               return -EINVAL;
+ 
+       if (rss_size > vsi->rss_size)
+               return -EINVAL;
+ 
+       local_rss_size = min_t(int, vsi->rss_size, rss_size);
+       lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
+       if (!lut)
+               return -ENOMEM;
+ 
+       /* Ignoring user configured lut if there is one */
+       i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, local_rss_size);
+ 
+       /* Use user configured hash key if there is one, otherwise
+        * use default.
+        */
+       if (vsi->rss_hkey_user)
+               memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE);
+       else
+               netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE);
+ 
+       ret = i40e_config_rss(vsi, seed, lut, vsi->rss_table_size);
+       if (ret) {
+               dev_info(&pf->pdev->dev,
+                        "Cannot set RSS lut, err %s aq_err %s\n",
+                        i40e_stat_str(hw, ret),
+                        i40e_aq_str(hw, hw->aq.asq_last_status));
+               kfree(lut);
+               return ret;
+       }
+       kfree(lut);
+ 
+       /* Do the update w.r.t. storing rss_size */
+       if (!vsi->orig_rss_size)
+               vsi->orig_rss_size = vsi->rss_size;
+       vsi->current_rss_size = local_rss_size;
+ 
+       return ret;
+ }
+ 
+ /**
+  * i40e_channel_setup_queue_map - Setup a channel queue map
+  * @pf: ptr to PF device
+  * @vsi: the VSI being setup
+  * @ctxt: VSI context structure
+  * @ch: ptr to channel structure
+  *
+  * Setup queue map for a specific channel
+  **/
+ static void i40e_channel_setup_queue_map(struct i40e_pf *pf,
+                                        struct i40e_vsi_context *ctxt,
+                                        struct i40e_channel *ch)
+ {
+       u16 qcount, qmap, sections = 0;
+       u8 offset = 0;
+       int pow;
+ 
+       sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID;
+       sections |= I40E_AQ_VSI_PROP_SCHED_VALID;
+ 
+       qcount = min_t(int, ch->num_queue_pairs, pf->num_lan_msix);
+       ch->num_queue_pairs = qcount;
+ 
+       /* find the next higher power-of-2 of num queue pairs */
+       pow = ilog2(qcount);
+       if (!is_power_of_2(qcount))
+               pow++;
+ 
+       qmap = (offset << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) |
+               (pow << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT);
+ 
+       /* Setup queue TC[0].qmap for given VSI context */
+       ctxt->info.tc_mapping[0] = cpu_to_le16(qmap);
+ 
+       ctxt->info.up_enable_bits = 0x1; /* TC0 enabled */
+       ctxt->info.mapping_flags |= cpu_to_le16(I40E_AQ_VSI_QUE_MAP_CONTIG);
+       ctxt->info.queue_mapping[0] = cpu_to_le16(ch->base_queue);
+       ctxt->info.valid_sections |= cpu_to_le16(sections);
+ }
+ 
+ /**
+  * i40e_add_channel - add a channel by adding VSI
+  * @pf: ptr to PF device
+  * @uplink_seid: underlying HW switching element (VEB) ID
+  * @ch: ptr to channel structure
+  *
+  * Add a channel (VSI) using add_vsi and queue_map
+  **/
+ static int i40e_add_channel(struct i40e_pf *pf, u16 uplink_seid,
+                           struct i40e_channel *ch)
+ {
+       struct i40e_hw *hw = &pf->hw;
+       struct i40e_vsi_context ctxt;
+       u8 enabled_tc = 0x1; /* TC0 enabled */
+       int ret;
+ 
+       if (ch->type != I40E_VSI_VMDQ2) {
+               dev_info(&pf->pdev->dev,
+                        "add new vsi failed, ch->type %d\n", ch->type);
+               return -EINVAL;
+       }
+ 
+       memset(&ctxt, 0, sizeof(ctxt));
+       ctxt.pf_num = hw->pf_id;
+       ctxt.vf_num = 0;
+       ctxt.uplink_seid = uplink_seid;
+       ctxt.connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL;
+       if (ch->type == I40E_VSI_VMDQ2)
+               ctxt.flags = I40E_AQ_VSI_TYPE_VMDQ2;
+ 
+       if (pf->flags & I40E_FLAG_VEB_MODE_ENABLED) {
+               ctxt.info.valid_sections |=
+                    cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
+               ctxt.info.switch_id =
+                  cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB);
+       }
+ 
+       /* Set queue map for a given VSI context */
+       i40e_channel_setup_queue_map(pf, &ctxt, ch);
+ 
+       /* Now time to create VSI */
+       ret = i40e_aq_add_vsi(hw, &ctxt, NULL);
+       if (ret) {
+               dev_info(&pf->pdev->dev,
+                        "add new vsi failed, err %s aq_err %s\n",
+                        i40e_stat_str(&pf->hw, ret),
+                        i40e_aq_str(&pf->hw,
+                                    pf->hw.aq.asq_last_status));
+               return -ENOENT;
+       }
+ 
+       /* Success, update channel */
+       ch->enabled_tc = enabled_tc;
+       ch->seid = ctxt.seid;
+       ch->vsi_number = ctxt.vsi_number;
+       ch->stat_counter_idx = cpu_to_le16(ctxt.info.stat_counter_idx);
+ 
+       /* copy just the sections touched not the entire info
+        * since not all sections are valid as returned by
+        * update vsi params
+        */
+       ch->info.mapping_flags = ctxt.info.mapping_flags;
+       memcpy(&ch->info.queue_mapping,
+              &ctxt.info.queue_mapping, sizeof(ctxt.info.queue_mapping));
+       memcpy(&ch->info.tc_mapping, ctxt.info.tc_mapping,
+              sizeof(ctxt.info.tc_mapping));
+ 
+       return 0;
+ }
+ 
+ static int i40e_channel_config_bw(struct i40e_vsi *vsi, struct i40e_channel *ch,
+                                 u8 *bw_share)
+ {
+       struct i40e_aqc_configure_vsi_tc_bw_data bw_data;
+       i40e_status ret;
+       int i;
+ 
+       bw_data.tc_valid_bits = ch->enabled_tc;
+       for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
+               bw_data.tc_bw_credits[i] = bw_share[i];
+ 
+       ret = i40e_aq_config_vsi_tc_bw(&vsi->back->hw, ch->seid,
+                                      &bw_data, NULL);
+       if (ret) {
+               dev_info(&vsi->back->pdev->dev,
+                        "Config VSI BW allocation per TC failed, aq_err: %d for new_vsi->seid %u\n",
+                        vsi->back->hw.aq.asq_last_status, ch->seid);
+               return -EINVAL;
+       }
+ 
+       for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
+               ch->info.qs_handle[i] = bw_data.qs_handles[i];
+ 
+       return 0;
+ }
+ 
+ /**
+  * i40e_channel_config_tx_ring - config TX ring associated with new channel
+  * @pf: ptr to PF device
+  * @vsi: the VSI being setup
+  * @ch: ptr to channel structure
+  *
+  * Configure TX rings associated with channel (VSI) since queues are being
+  * from parent VSI.
+  **/
+ static int i40e_channel_config_tx_ring(struct i40e_pf *pf,
+                                      struct i40e_vsi *vsi,
+                                      struct i40e_channel *ch)
+ {
+       i40e_status ret;
+       int i;
+       u8 bw_share[I40E_MAX_TRAFFIC_CLASS] = {0};
+ 
+       /* Enable ETS TCs with equal BW Share for now across all VSIs */
+       for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+               if (ch->enabled_tc & BIT(i))
+                       bw_share[i] = 1;
+       }
+ 
+       /* configure BW for new VSI */
+       ret = i40e_channel_config_bw(vsi, ch, bw_share);
+       if (ret) {
+               dev_info(&vsi->back->pdev->dev,
+                        "Failed configuring TC map %d for channel (seid %u)\n",
+                        ch->enabled_tc, ch->seid);
+               return ret;
+       }
+ 
+       for (i = 0; i < ch->num_queue_pairs; i++) {
+               struct i40e_ring *tx_ring, *rx_ring;
+               u16 pf_q;
+ 
+               pf_q = ch->base_queue + i;
+ 
+               /* Get to TX ring ptr of main VSI, for re-setup TX queue
+                * context
+                */
+               tx_ring = vsi->tx_rings[pf_q];
+               tx_ring->ch = ch;
+ 
+               /* Get the RX ring ptr */
+               rx_ring = vsi->rx_rings[pf_q];
+               rx_ring->ch = ch;
+       }
+ 
+       return 0;
+ }
+ 
+ /**
+  * i40e_setup_hw_channel - setup new channel
+  * @pf: ptr to PF device
+  * @vsi: the VSI being setup
+  * @ch: ptr to channel structure
+  * @uplink_seid: underlying HW switching element (VEB) ID
+  * @type: type of channel to be created (VMDq2/VF)
+  *
+  * Setup new channel (VSI) based on specified type (VMDq2/VF)
+  * and configures TX rings accordingly
+  **/
+ static inline int i40e_setup_hw_channel(struct i40e_pf *pf,
+                                       struct i40e_vsi *vsi,
+                                       struct i40e_channel *ch,
+                                       u16 uplink_seid, u8 type)
+ {
+       int ret;
+ 
+       ch->initialized = false;
+       ch->base_queue = vsi->next_base_queue;
+       ch->type = type;
+ 
+       /* Proceed with creation of channel (VMDq2) VSI */
+       ret = i40e_add_channel(pf, uplink_seid, ch);
+       if (ret) {
+               dev_info(&pf->pdev->dev,
+                        "failed to add_channel using uplink_seid %u\n",
+                        uplink_seid);
+               return ret;
+       }
+ 
+       /* Mark the successful creation of channel */
+       ch->initialized = true;
+ 
+       /* Reconfigure TX queues using QTX_CTL register */
+       ret = i40e_channel_config_tx_ring(pf, vsi, ch);
+       if (ret) {
+               dev_info(&pf->pdev->dev,
+                        "failed to configure TX rings for channel %u\n",
+                        ch->seid);
+               return ret;
+       }
+ 
+       /* update 'next_base_queue' */
+       vsi->next_base_queue = vsi->next_base_queue + ch->num_queue_pairs;
+       dev_dbg(&pf->pdev->dev,
+               "Added channel: vsi_seid %u, vsi_number %u, stat_counter_idx %u, num_queue_pairs %u, pf->next_base_queue %d\n",
+               ch->seid, ch->vsi_number, ch->stat_counter_idx,
+               ch->num_queue_pairs,
+               vsi->next_base_queue);
+       return ret;
+ }
+ 
+ /**
+  * i40e_setup_channel - setup new channel using uplink element
+  * @pf: ptr to PF device
+  * @type: type of channel to be created (VMDq2/VF)
+  * @uplink_seid: underlying HW switching element (VEB) ID
+  * @ch: ptr to channel structure
+  *
+  * Setup new channel (VSI) based on specified type (VMDq2/VF)
+  * and uplink switching element (uplink_seid)
+  **/
+ static bool i40e_setup_channel(struct i40e_pf *pf, struct i40e_vsi *vsi,
+                              struct i40e_channel *ch)
+ {
+       u8 vsi_type;
+       u16 seid;
+       int ret;
+ 
+       if (vsi->type == I40E_VSI_MAIN) {
+               vsi_type = I40E_VSI_VMDQ2;
+       } else {
+               dev_err(&pf->pdev->dev, "unsupported parent vsi type(%d)\n",
+                       vsi->type);
+               return false;
+       }
+ 
+       /* underlying switching element */
+       seid = pf->vsi[pf->lan_vsi]->uplink_seid;
+ 
+       /* create channel (VSI), configure TX rings */
+       ret = i40e_setup_hw_channel(pf, vsi, ch, seid, vsi_type);
+       if (ret) {
+               dev_err(&pf->pdev->dev, "failed to setup hw_channel\n");
+               return false;
+       }
+ 
+       return ch->initialized ? true : false;
+ }
+ 
+ /**
+  * i40e_validate_and_set_switch_mode - sets up switch mode correctly
+  * @vsi: ptr to VSI which has PF backing
+  *
+  * Sets up switch mode correctly if it needs to be changed and perform
+  * what are allowed modes.
+  **/
+ static int i40e_validate_and_set_switch_mode(struct i40e_vsi *vsi)
+ {
+       u8 mode;
+       struct i40e_pf *pf = vsi->back;
+       struct i40e_hw *hw = &pf->hw;
+       int ret;
+ 
+       ret = i40e_get_capabilities(pf, i40e_aqc_opc_list_dev_capabilities);
+       if (ret)
+               return -EINVAL;
+ 
+       if (hw->dev_caps.switch_mode) {
+               /* if switch mode is set, support mode2 (non-tunneled for
+                * cloud filter) for now
+                */
+               u32 switch_mode = hw->dev_caps.switch_mode &
+                                 I40E_SWITCH_MODE_MASK;
+               if (switch_mode >= I40E_CLOUD_FILTER_MODE1) {
+                       if (switch_mode == I40E_CLOUD_FILTER_MODE2)
+                               return 0;
+                       dev_err(&pf->pdev->dev,
+                               "Invalid switch_mode (%d), only non-tunneled mode for cloud filter is supported\n",
+                               hw->dev_caps.switch_mode);
+                       return -EINVAL;
+               }
+       }
+ 
+       /* Set Bit 7 to be valid */
+       mode = I40E_AQ_SET_SWITCH_BIT7_VALID;
+ 
+       /* Set L4type to both TCP and UDP support */
+       mode |= I40E_AQ_SET_SWITCH_L4_TYPE_BOTH;
+ 
+       /* Set cloud filter mode */
+       mode |= I40E_AQ_SET_SWITCH_MODE_NON_TUNNEL;
+ 
+       /* Prep mode field for set_switch_config */
+       ret = i40e_aq_set_switch_config(hw, pf->last_sw_conf_flags,
+                                       pf->last_sw_conf_valid_flags,
+                                       mode, NULL);
+       if (ret && hw->aq.asq_last_status != I40E_AQ_RC_ESRCH)
+               dev_err(&pf->pdev->dev,
+                       "couldn't set switch config bits, err %s aq_err %s\n",
+                       i40e_stat_str(hw, ret),
+                       i40e_aq_str(hw,
+                                   hw->aq.asq_last_status));
+ 
+       return ret;
+ }
+ 
+ /**
+  * i40e_create_queue_channel - function to create channel
+  * @vsi: VSI to be configured
+  * @ch: ptr to channel (it contains channel specific params)
+  *
+  * This function creates channel (VSI) using num_queues specified by user,
+  * reconfigs RSS if needed.
+  **/
+ int i40e_create_queue_channel(struct i40e_vsi *vsi,
+                             struct i40e_channel *ch)
+ {
+       struct i40e_pf *pf = vsi->back;
+       bool reconfig_rss;
+       int err;
+ 
+       if (!ch)
+               return -EINVAL;
+ 
+       if (!ch->num_queue_pairs) {
+               dev_err(&pf->pdev->dev, "Invalid num_queues requested: %d\n",
+                       ch->num_queue_pairs);
+               return -EINVAL;
+       }
+ 
+       /* validate user requested num_queues for channel */
+       err = i40e_validate_num_queues(pf, ch->num_queue_pairs, vsi,
+                                      &reconfig_rss);
+       if (err) {
+               dev_info(&pf->pdev->dev, "Failed to validate num_queues (%d)\n",
+                        ch->num_queue_pairs);
+               return -EINVAL;
+       }
+ 
+       /* By default we are in VEPA mode, if this is the first VF/VMDq
+        * VSI to be added switch to VEB mode.
+        */
+       if ((!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) ||
+           (!i40e_is_any_channel(vsi))) {
+               if (!is_power_of_2(vsi->tc_config.tc_info[0].qcount)) {
+                       dev_dbg(&pf->pdev->dev,
+                               "Failed to create channel. Override queues (%u) not power of 2\n",
+                               vsi->tc_config.tc_info[0].qcount);
+                       return -EINVAL;
+               }
+ 
+               if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) {
+                       pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
+ 
+                       if (vsi->type == I40E_VSI_MAIN) {
+                               if (pf->flags & I40E_FLAG_TC_MQPRIO)
+                                       i40e_do_reset(pf, I40E_PF_RESET_FLAG,
+                                                     true);
+                               else
+                                       i40e_do_reset_safe(pf,
+                                                          I40E_PF_RESET_FLAG);
+                       }
+               }
+               /* now onwards for main VSI, number of queues will be value
+                * of TC0's queue count
+                */
+       }
+ 
+       /* By this time, vsi->cnt_q_avail shall be set to non-zero and
+        * it should be more than num_queues
+        */
+       if (!vsi->cnt_q_avail || vsi->cnt_q_avail < ch->num_queue_pairs) {
+               dev_dbg(&pf->pdev->dev,
+                       "Error: cnt_q_avail (%u) less than num_queues %d\n",
+                       vsi->cnt_q_avail, ch->num_queue_pairs);
+               return -EINVAL;
+       }
+ 
+       /* reconfig_rss only if vsi type is MAIN_VSI */
+       if (reconfig_rss && (vsi->type == I40E_VSI_MAIN)) {
+               err = i40e_vsi_reconfig_rss(vsi, ch->num_queue_pairs);
+               if (err) {
+                       dev_info(&pf->pdev->dev,
+                                "Error: unable to reconfig rss for num_queues (%u)\n",
+                                ch->num_queue_pairs);
+                       return -EINVAL;
+               }
+       }
+ 
+       if (!i40e_setup_channel(pf, vsi, ch)) {
+               dev_info(&pf->pdev->dev, "Failed to setup channel\n");
+               return -EINVAL;
+       }
+ 
+       dev_info(&pf->pdev->dev,
+                "Setup channel (id:%u) utilizing num_queues %d\n",
+                ch->seid, ch->num_queue_pairs);
+ 
+       /* configure VSI for BW limit */
+       if (ch->max_tx_rate) {
+               u64 credits = ch->max_tx_rate;
+ 
+               if (i40e_set_bw_limit(vsi, ch->seid, ch->max_tx_rate))
+                       return -EINVAL;
+ 
+               do_div(credits, I40E_BW_CREDIT_DIVISOR);
+               dev_dbg(&pf->pdev->dev,
+                       "Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n",
+                       ch->max_tx_rate,
+                       credits,
+                       ch->seid);
+       }
+ 
+       /* in case of VF, this will be main SRIOV VSI */
+       ch->parent_vsi = vsi;
+ 
+       /* and update main_vsi's count for queue_available to use */
+       vsi->cnt_q_avail -= ch->num_queue_pairs;
+ 
+       return 0;
+ }
+ 
+ /**
+  * i40e_configure_queue_channels - Add queue channel for the given TCs
+  * @vsi: VSI to be configured
+  *
+  * Configures queue channel mapping to the given TCs
+  **/
+ static int i40e_configure_queue_channels(struct i40e_vsi *vsi)
+ {
+       struct i40e_channel *ch;
+       u64 max_rate = 0;
+       int ret = 0, i;
+ 
+       /* Create app vsi with the TCs. Main VSI with TC0 is already set up */
+       vsi->tc_seid_map[0] = vsi->seid;
+       for (i = 1; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+               if (vsi->tc_config.enabled_tc & BIT(i)) {
+                       ch = kzalloc(sizeof(*ch), GFP_KERNEL);
+                       if (!ch) {
+                               ret = -ENOMEM;
+                               goto err_free;
+                       }
+ 
+                       INIT_LIST_HEAD(&ch->list);
+                       ch->num_queue_pairs =
+                               vsi->tc_config.tc_info[i].qcount;
+                       ch->base_queue =
+                               vsi->tc_config.tc_info[i].qoffset;
+ 
+                       /* Bandwidth limit through tc interface is in bytes/s,
+                        * change to Mbit/s
+                        */
+                       max_rate = vsi->mqprio_qopt.max_rate[i];
+                       do_div(max_rate, I40E_BW_MBPS_DIVISOR);
+                       ch->max_tx_rate = max_rate;
+ 
+                       list_add_tail(&ch->list, &vsi->ch_list);
+ 
+                       ret = i40e_create_queue_channel(vsi, ch);
+                       if (ret) {
+                               dev_err(&vsi->back->pdev->dev,
+                                       "Failed creating queue channel with TC%d: queues %d\n",
+                                       i, ch->num_queue_pairs);
+                               goto err_free;
+                       }
+                       vsi->tc_seid_map[i] = ch->seid;
+               }
+       }
+       return ret;
+ 
+ err_free:
+       i40e_remove_queue_channels(vsi);
+       return ret;
+ }
+ 
+ /**
+  * i40e_veb_config_tc - Configure TCs for given VEB
+  * @veb: given VEB
+  * @enabled_tc: TC bitmap
+  *
+  * Configures given TC bitmap for VEB (switching) element
+  **/
+ int i40e_veb_config_tc(struct i40e_veb *veb, u8 enabled_tc)
+ {
+       struct i40e_aqc_configure_switching_comp_bw_config_data bw_data = {0};
+       struct i40e_pf *pf = veb->pf;
+       int ret = 0;
+       int i;
+ 
+       /* No TCs or already enabled TCs just return */
+       if (!enabled_tc || veb->enabled_tc == enabled_tc)
+               return ret;
+ 
+       bw_data.tc_valid_bits = enabled_tc;
+       /* bw_data.absolute_credits is not set (relative) */
+ 
+       /* Enable ETS TCs with equal BW Share for now */
+       for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+               if (enabled_tc & BIT(i))
+                       bw_data.tc_bw_share_credits[i] = 1;
+       }
+ 
+       ret = i40e_aq_config_switch_comp_bw_config(&pf->hw, veb->seid,
+                                                  &bw_data, NULL);
+       if (ret) {
+               dev_info(&pf->pdev->dev,
+                        "VEB bw config failed, err %s aq_err %s\n",
+                        i40e_stat_str(&pf->hw, ret),
+                        i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+               goto out;
+       }
+ 
+       /* Update the BW information */
+       ret = i40e_veb_get_bw_info(veb);
+       if (ret) {
+               dev_info(&pf->pdev->dev,
+                        "Failed getting veb bw config, err %s aq_err %s\n",
+                        i40e_stat_str(&pf->hw, ret),
+                        i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+       }
+ 
+ out:
+       return ret;
+ }
+ 
+ #ifdef CONFIG_I40E_DCB
+ /**
+  * i40e_dcb_reconfigure - Reconfigure all VEBs and VSIs
+  * @pf: PF struct
+  *
+  * Reconfigure VEB/VSIs on a given PF; it is assumed that
+  * the caller would've quiesce all the VSIs before calling
+  * this function
+  **/
+ static void i40e_dcb_reconfigure(struct i40e_pf *pf)
+ {
+       u8 tc_map = 0;
+       int ret;
+       u8 v;
+ 
+       /* Enable the TCs available on PF to all VEBs */
+       tc_map = i40e_pf_get_tc_map(pf);
+       for (v = 0; v < I40E_MAX_VEB; v++) {
+               if (!pf->veb[v])
+                       continue;
+               ret = i40e_veb_config_tc(pf->veb[v], tc_map);
+               if (ret) {
+                       dev_info(&pf->pdev->dev,
+                                "Failed configuring TC for VEB seid=%d\n",
+                                pf->veb[v]->seid);
+                       /* Will try to configure as many components */
+               }
+       }
+ 
+       /* Update each VSI */
+       for (v = 0; v < pf->num_alloc_vsi; v++) {
+               if (!pf->vsi[v])
+                       continue;
+ 
+               /* - Enable all TCs for the LAN VSI
+                * - For all others keep them at TC0 for now
+                */
+               if (v == pf->lan_vsi)
+                       tc_map = i40e_pf_get_tc_map(pf);
+               else
+                       tc_map = I40E_DEFAULT_TRAFFIC_CLASS;
+ 
+               ret = i40e_vsi_config_tc(pf->vsi[v], tc_map);
+               if (ret) {
+                       dev_info(&pf->pdev->dev,
+                                "Failed configuring TC for VSI seid=%d\n",
+                                pf->vsi[v]->seid);
+                       /* Will try to configure as many components */
+               } else {
+                       /* Re-configure VSI vectors based on updated TC map */
+                       i40e_vsi_map_rings_to_vectors(pf->vsi[v]);
+                       if (pf->vsi[v]->netdev)
+                               i40e_dcbnl_set_all(pf->vsi[v]);
+               }
+       }
+ }
+ 
+ /**
+  * i40e_resume_port_tx - Resume port Tx
+  * @pf: PF struct
+  *
+  * Resume a port's Tx and issue a PF reset in case of failure to
+  * resume.
+  **/
+ static int i40e_resume_port_tx(struct i40e_pf *pf)
+ {
+       struct i40e_hw *hw = &pf->hw;
+       int ret;
+ 
+       ret = i40e_aq_resume_port_tx(hw, NULL);
+       if (ret) {
+               dev_info(&pf->pdev->dev,
+                        "Resume Port Tx failed, err %s aq_err %s\n",
+                         i40e_stat_str(&pf->hw, ret),
+                         i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+               /* Schedule PF reset to recover */
+               set_bit(__I40E_PF_RESET_REQUESTED, pf->state);
+               i40e_service_event_schedule(pf);
+       }
+ 
+       return ret;
+ }
+ 
+ /**
+  * i40e_init_pf_dcb - Initialize DCB configuration
+  * @pf: PF being configured
+  *
+  * Query the current DCB configuration and cache it
+  * in the hardware structure
+  **/
+ static int i40e_init_pf_dcb(struct i40e_pf *pf)
+ {
+       struct i40e_hw *hw = &pf->hw;
+       int err = 0;
+ 
+       /* Do not enable DCB for SW1 and SW2 images even if the FW is capable */
+       if (pf->hw_features & I40E_HW_NO_DCB_SUPPORT)
+               goto out;
+ 
+       /* Get the initial DCB configuration */
+       err = i40e_init_dcb(hw);
+       if (!err) {
+               /* Device/Function is not DCBX capable */
+               if ((!hw->func_caps.dcb) ||
+                   (hw->dcbx_status == I40E_DCBX_STATUS_DISABLED)) {
+                       dev_info(&pf->pdev->dev,
+                                "DCBX offload is not supported or is disabled for this PF.\n");
+               } else {
+                       /* When status is not DISABLED then DCBX in FW */
+                       pf->dcbx_cap = DCB_CAP_DCBX_LLD_MANAGED |
+                                      DCB_CAP_DCBX_VER_IEEE;
+ 
+                       pf->flags |= I40E_FLAG_DCB_CAPABLE;
+                       /* Enable DCB tagging only when more than one TC
+                        * or explicitly disable if only one TC
+                        */
+                       if (i40e_dcb_get_num_tc(&hw->local_dcbx_config) > 1)
+                               pf->flags |= I40E_FLAG_DCB_ENABLED;
+                       else
+                               pf->flags &= ~I40E_FLAG_DCB_ENABLED;
+                       dev_dbg(&pf->pdev->dev,
+                               "DCBX offload is supported for this PF.\n");
+               }
+       } else {
+               dev_info(&pf->pdev->dev,
+                        "Query for DCB configuration failed, err %s aq_err %s\n",
+                        i40e_stat_str(&pf->hw, err),
+                        i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+       }
+ 
+ out:
+       return err;
+ }
+ #endif /* CONFIG_I40E_DCB */
+ #define SPEED_SIZE 14
+ #define FC_SIZE 8
+ /**
+  * i40e_print_link_message - print link up or down
+  * @vsi: the VSI for which link needs a message
+  */
+ void i40e_print_link_message(struct i40e_vsi *vsi, bool isup)
+ {
+       enum i40e_aq_link_speed new_speed;
+       struct i40e_pf *pf = vsi->back;
+       char *speed = "Unknown";
+       char *fc = "Unknown";
+       char *fec = "";
+       char *req_fec = "";
+       char *an = "";
+ 
+       new_speed = pf->hw.phy.link_info.link_speed;
+ 
+       if ((vsi->current_isup == isup) && (vsi->current_speed == new_speed))
+               return;
+       vsi->current_isup = isup;
+       vsi->current_speed = new_speed;
+       if (!isup) {
+               netdev_info(vsi->netdev, "NIC Link is Down\n");
+               return;
+       }
+ 
+       /* Warn user if link speed on NPAR enabled partition is not at
+        * least 10GB
+        */
+       if (pf->hw.func_caps.npar_enable &&
+           (pf->hw.phy.link_info.link_speed == I40E_LINK_SPEED_1GB ||
+            pf->hw.phy.link_info.link_speed == I40E_LINK_SPEED_100MB))
+               netdev_warn(vsi->netdev,
+                           "The partition detected link speed that is less than 10Gbps\n");
+ 
+       switch (pf->hw.phy.link_info.link_speed) {
+       case I40E_LINK_SPEED_40GB:
+               speed = "40 G";
+               break;
+       case I40E_LINK_SPEED_20GB:
+               speed = "20 G";
+               break;
+       case I40E_LINK_SPEED_25GB:
+               speed = "25 G";
+               break;
+       case I40E_LINK_SPEED_10GB:
+               speed = "10 G";
+               break;
+       case I40E_LINK_SPEED_1GB:
+               speed = "1000 M";
+               break;
+       case I40E_LINK_SPEED_100MB:
+               speed = "100 M";
+               break;
+       default:
+               break;
+       }
+ 
+       switch (pf->hw.fc.current_mode) {
+       case I40E_FC_FULL:
+               fc = "RX/TX";
+               break;
+       case I40E_FC_TX_PAUSE:
+               fc = "TX";
+               break;
+       case I40E_FC_RX_PAUSE:
+               fc = "RX";
+               break;
+       default:
+               fc = "None";
+               break;
+       }
+ 
+       if (pf->hw.phy.link_info.link_speed == I40E_LINK_SPEED_25GB) {
+               req_fec = ", Requested FEC: None";
+               fec = ", FEC: None";
+               an = ", Autoneg: False";
+ 
+               if (pf->hw.phy.link_info.an_info & I40E_AQ_AN_COMPLETED)
+                       an = ", Autoneg: True";
+ 
+               if (pf->hw.phy.link_info.fec_info &
+                   I40E_AQ_CONFIG_FEC_KR_ENA)
+                       fec = ", FEC: CL74 FC-FEC/BASE-R";
+               else if (pf->hw.phy.link_info.fec_info &
+                        I40E_AQ_CONFIG_FEC_RS_ENA)
+                       fec = ", FEC: CL108 RS-FEC";
+ 
+               /* 'CL108 RS-FEC' should be displayed when RS is requested, or
+                * both RS and FC are requested
+                */
+               if (vsi->back->hw.phy.link_info.req_fec_info &
+                   (I40E_AQ_REQUEST_FEC_KR | I40E_AQ_REQUEST_FEC_RS)) {
+                       if (vsi->back->hw.phy.link_info.req_fec_info &
+                           I40E_AQ_REQUEST_FEC_RS)
+                               req_fec = ", Requested FEC: CL108 RS-FEC";
+                       else
+                               req_fec = ", Requested FEC: CL74 FC-FEC/BASE-R";
+               }
+       }
+ 
+       netdev_info(vsi->netdev, "NIC Link is Up, %sbps Full Duplex%s%s%s, Flow Control: %s\n",
+                   speed, req_fec, fec, an, fc);
+ }
+ 
+ /**
+  * i40e_up_complete - Finish the last steps of bringing up a connection
+  * @vsi: the VSI being configured
+  **/
+ static int i40e_up_complete(struct i40e_vsi *vsi)
+ {
+       struct i40e_pf *pf = vsi->back;
+       int err;
+ 
+       if (pf->flags & I40E_FLAG_MSIX_ENABLED)
+               i40e_vsi_configure_msix(vsi);
+       else
+               i40e_configure_msi_and_legacy(vsi);
+ 
+       /* start rings */
+       err = i40e_vsi_start_rings(vsi);
+       if (err)
+               return err;
+ 
+       clear_bit(__I40E_VSI_DOWN, vsi->state);
+       i40e_napi_enable_all(vsi);
+       i40e_vsi_enable_irq(vsi);
+ 
+       if ((pf->hw.phy.link_info.link_info & I40E_AQ_LINK_UP) &&
+           (vsi->netdev)) {
+               i40e_print_link_message(vsi, true);
+               netif_tx_start_all_queues(vsi->netdev);
+               netif_carrier_on(vsi->netdev);
+       }
+ 
+       /* replay FDIR SB filters */
+       if (vsi->type == I40E_VSI_FDIR) {
+               /* reset fd counters */
+               pf->fd_add_err = 0;
+               pf->fd_atr_cnt = 0;
+               i40e_fdir_filter_restore(vsi);
+       }
+ 
+       /* On the next run of the service_task, notify any clients of the new
+        * opened netdev
+        */
+       pf->flags |= I40E_FLAG_SERVICE_CLIENT_REQUESTED;
+       i40e_service_event_schedule(pf);
+ 
+       return 0;
+ }
+ 
+ /**
+  * i40e_vsi_reinit_locked - Reset the VSI
+  * @vsi: the VSI being configured
+  *
+  * Rebuild the ring structs after some configuration
+  * has changed, e.g. MTU size.
+  **/
+ static void i40e_vsi_reinit_locked(struct i40e_vsi *vsi)
+ {
+       struct i40e_pf *pf = vsi->back;
+ 
+       WARN_ON(in_interrupt());
+       while (test_and_set_bit(__I40E_CONFIG_BUSY, pf->state))
+               usleep_range(1000, 2000);
+       i40e_down(vsi);
+ 
+       i40e_up(vsi);
+       clear_bit(__I40E_CONFIG_BUSY, pf->state);
+ }
+ 
+ /**
+  * i40e_up - Bring the connection back up after being down
+  * @vsi: the VSI being configured
+  **/
+ int i40e_up(struct i40e_vsi *vsi)
+ {
+       int err;
+ 
+       err = i40e_vsi_configure(vsi);
+       if (!err)
+               err = i40e_up_complete(vsi);
+ 
+       return err;
+ }
+ 
+ /**
+  * i40e_down - Shutdown the connection processing
+  * @vsi: the VSI being stopped
+  **/
+ void i40e_down(struct i40e_vsi *vsi)
+ {
+       int i;
+ 
+       /* It is assumed that the caller of this function
+        * sets the vsi->state __I40E_VSI_DOWN bit.
+        */
+       if (vsi->netdev) {
+               netif_carrier_off(vsi->netdev);
+               netif_tx_disable(vsi->netdev);
+       }
+       i40e_vsi_disable_irq(vsi);
+       i40e_vsi_stop_rings(vsi);
+       i40e_napi_disable_all(vsi);
+ 
+       for (i = 0; i < vsi->num_queue_pairs; i++) {
+               i40e_clean_tx_ring(vsi->tx_rings[i]);
+               if (i40e_enabled_xdp_vsi(vsi))
+                       i40e_clean_tx_ring(vsi->xdp_rings[i]);
+               i40e_clean_rx_ring(vsi->rx_rings[i]);
+       }
+ 
+ }
+ 
+ /**
+  * i40e_validate_mqprio_qopt- validate queue mapping info
+  * @vsi: the VSI being configured
+  * @mqprio_qopt: queue parametrs
+  **/
+ static int i40e_validate_mqprio_qopt(struct i40e_vsi *vsi,
+                                    struct tc_mqprio_qopt_offload *mqprio_qopt)
+ {
+       u64 sum_max_rate = 0;
+       u64 max_rate = 0;
+       int i;
+ 
+       if (mqprio_qopt->qopt.offset[0] != 0 ||
+           mqprio_qopt->qopt.num_tc < 1 ||
+           mqprio_qopt->qopt.num_tc > I40E_MAX_TRAFFIC_CLASS)
+               return -EINVAL;
+       for (i = 0; ; i++) {
+               if (!mqprio_qopt->qopt.count[i])
+                       return -EINVAL;
+               if (mqprio_qopt->min_rate[i]) {
+                       dev_err(&vsi->back->pdev->dev,
+                               "Invalid min tx rate (greater than 0) specified\n");
+                       return -EINVAL;
+               }
+               max_rate = mqprio_qopt->max_rate[i];
+               do_div(max_rate, I40E_BW_MBPS_DIVISOR);
+               sum_max_rate += max_rate;
+ 
+               if (i >= mqprio_qopt->qopt.num_tc - 1)
+                       break;
+               if (mqprio_qopt->qopt.offset[i + 1] !=
+                   (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i]))
+                       return -EINVAL;
+       }
+       if (vsi->num_queue_pairs <
+           (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i])) {
+               return -EINVAL;
+       }
+       if (sum_max_rate > i40e_get_link_speed(vsi)) {
+               dev_err(&vsi->back->pdev->dev,
+                       "Invalid max tx rate specified\n");
+               return -EINVAL;
+       }
+       return 0;
+ }
+ 
+ /**
+  * i40e_vsi_set_default_tc_config - set default values for tc configuration
+  * @vsi: the VSI being configured
+  **/
+ static void i40e_vsi_set_default_tc_config(struct i40e_vsi *vsi)
+ {
+       u16 qcount;
+       int i;
+ 
+       /* Only TC0 is enabled */
+       vsi->tc_config.numtc = 1;
+       vsi->tc_config.enabled_tc = 1;
+       qcount = min_t(int, vsi->alloc_queue_pairs,
+                      i40e_pf_get_max_q_per_tc(vsi->back));
+       for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+               /* For the TC that is not enabled set the offset to to default
+                * queue and allocate one queue for the given TC.
+                */
+               vsi->tc_config.tc_info[i].qoffset = 0;
+               if (i == 0)
+                       vsi->tc_config.tc_info[i].qcount = qcount;
+               else
+                       vsi->tc_config.tc_info[i].qcount = 1;
+               vsi->tc_config.tc_info[i].netdev_tc = 0;
+       }
+ }
+ 
+ /**
+  * i40e_setup_tc - configure multiple traffic classes
+  * @netdev: net device to configure
+  * @type_data: tc offload data
+  **/
+ static int i40e_setup_tc(struct net_device *netdev, void *type_data)
+ {
+       struct tc_mqprio_qopt_offload *mqprio_qopt = type_data;
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_vsi *vsi = np->vsi;
+       struct i40e_pf *pf = vsi->back;
+       u8 enabled_tc = 0, num_tc, hw;
+       bool need_reset = false;
+       int ret = -EINVAL;
+       u16 mode;
+       int i;
+ 
+       num_tc = mqprio_qopt->qopt.num_tc;
+       hw = mqprio_qopt->qopt.hw;
+       mode = mqprio_qopt->mode;
+       if (!hw) {
+               pf->flags &= ~I40E_FLAG_TC_MQPRIO;
+               memcpy(&vsi->mqprio_qopt, mqprio_qopt, sizeof(*mqprio_qopt));
+               goto config_tc;
+       }
+ 
+       /* Check if MFP enabled */
+       if (pf->flags & I40E_FLAG_MFP_ENABLED) {
+               netdev_info(netdev,
+                           "Configuring TC not supported in MFP mode\n");
+               return ret;
+       }
+       switch (mode) {
+       case TC_MQPRIO_MODE_DCB:
+               pf->flags &= ~I40E_FLAG_TC_MQPRIO;
+ 
+               /* Check if DCB enabled to continue */
+               if (!(pf->flags & I40E_FLAG_DCB_ENABLED)) {
+                       netdev_info(netdev,
+                                   "DCB is not enabled for adapter\n");
+                       return ret;
+               }
+ 
+               /* Check whether tc count is within enabled limit */
+               if (num_tc > i40e_pf_get_num_tc(pf)) {
+                       netdev_info(netdev,
+                                   "TC count greater than enabled on link for adapter\n");
+                       return ret;
+               }
+               break;
+       case TC_MQPRIO_MODE_CHANNEL:
+               if (pf->flags & I40E_FLAG_DCB_ENABLED) {
+                       netdev_info(netdev,
+                                   "Full offload of TC Mqprio options is not supported when DCB is enabled\n");
+                       return ret;
+               }
+               if (!(pf->flags & I40E_FLAG_MSIX_ENABLED))
+                       return ret;
+               ret = i40e_validate_mqprio_qopt(vsi, mqprio_qopt);
+               if (ret)
+                       return ret;
+               memcpy(&vsi->mqprio_qopt, mqprio_qopt,
+                      sizeof(*mqprio_qopt));
+               pf->flags |= I40E_FLAG_TC_MQPRIO;
+               pf->flags &= ~I40E_FLAG_DCB_ENABLED;
+               break;
+       default:
+               return -EINVAL;
+       }
+ 
+ config_tc:
+       /* Generate TC map for number of tc requested */
+       for (i = 0; i < num_tc; i++)
+               enabled_tc |= BIT(i);
+ 
+       /* Requesting same TC configuration as already enabled */
+       if (enabled_tc == vsi->tc_config.enabled_tc &&
+           mode != TC_MQPRIO_MODE_CHANNEL)
+               return 0;
+ 
+       /* Quiesce VSI queues */
+       i40e_quiesce_vsi(vsi);
+ 
+       if (!hw && !(pf->flags & I40E_FLAG_TC_MQPRIO))
+               i40e_remove_queue_channels(vsi);
+ 
+       /* Configure VSI for enabled TCs */
+       ret = i40e_vsi_config_tc(vsi, enabled_tc);
+       if (ret) {
+               netdev_info(netdev, "Failed configuring TC for VSI seid=%d\n",
+                           vsi->seid);
+               need_reset = true;
+               goto exit;
+       }
+ 
+       if (pf->flags & I40E_FLAG_TC_MQPRIO) {
+               if (vsi->mqprio_qopt.max_rate[0]) {
+                       u64 max_tx_rate = vsi->mqprio_qopt.max_rate[0];
+ 
+                       do_div(max_tx_rate, I40E_BW_MBPS_DIVISOR);
+                       ret = i40e_set_bw_limit(vsi, vsi->seid, max_tx_rate);
+                       if (!ret) {
+                               u64 credits = max_tx_rate;
+ 
+                               do_div(credits, I40E_BW_CREDIT_DIVISOR);
+                               dev_dbg(&vsi->back->pdev->dev,
+                                       "Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n",
+                                       max_tx_rate,
+                                       credits,
+                                       vsi->seid);
+                       } else {
+                               need_reset = true;
+                               goto exit;
+                       }
+               }
+               ret = i40e_configure_queue_channels(vsi);
                 if (ret) {
-                       dev_info(&pf->pdev->dev,
-                                "Failed configuring TC for VEB seid=%d\n",
-                                pf->veb[v]->seid);
-                       /* Will try to configure as many components */
+                       netdev_info(netdev,
+                                   "Failed configuring queue channels\n");
+                       need_reset = true;
+                       goto exit;
                 }
         }
   
-       /* Update each VSI */
-       for (v = 0; v < pf->num_alloc_vsi; v++) {
-               if (!pf->vsi[v])
-                       continue;
+ exit:
+       /* Reset the configuration data to defaults, only TC0 is enabled */
+       if (need_reset) {
+               i40e_vsi_set_default_tc_config(vsi);
+               need_reset = false;
+       }
   
-               /* - Enable all TCs for the LAN VSI
-                * - For all others keep them at TC0 for now
-                */
-               if (v == pf->lan_vsi)
-                       tc_map = i40e_pf_get_tc_map(pf);
-               else
-                       tc_map = I40E_DEFAULT_TRAFFIC_CLASS;
+       /* Unquiesce VSI */
+       i40e_unquiesce_vsi(vsi);
+       return ret;
+ }
   
-               ret = i40e_vsi_config_tc(pf->vsi[v], tc_map);
-               if (ret) {
-                       dev_info(&pf->pdev->dev,
-                                "Failed configuring TC for VSI seid=%d\n",
-                                pf->vsi[v]->seid);
-                       /* Will try to configure as many components */
-               } else {
-                       /* Re-configure VSI vectors based on updated TC map */
-                       i40e_vsi_map_rings_to_vectors(pf->vsi[v]);
-                       if (pf->vsi[v]->netdev)
-                               i40e_dcbnl_set_all(pf->vsi[v]);
+ /**
+  * i40e_set_cld_element - sets cloud filter element data
+  * @filter: cloud filter rule
+  * @cld: ptr to cloud filter element data
+  *
+  * This is helper function to copy data into cloud filter element
+  **/
+ static inline void
+ i40e_set_cld_element(struct i40e_cloud_filter *filter,
+                    struct i40e_aqc_cloud_filters_element_data *cld)
+ {
+       int i, j;
+       u32 ipa;
+ 
+       memset(cld, 0, sizeof(*cld));
+       ether_addr_copy(cld->outer_mac, filter->dst_mac);
+       ether_addr_copy(cld->inner_mac, filter->src_mac);
+ 
+       if (filter->n_proto != ETH_P_IP && filter->n_proto != ETH_P_IPV6)
+               return;
+ 
+       if (filter->n_proto == ETH_P_IPV6) {
+ #define IPV6_MAX_INDEX        (ARRAY_SIZE(filter->dst_ipv6) - 1)
+               for (i = 0, j = 0; i < ARRAY_SIZE(filter->dst_ipv6);
+                    i++, j += 2) {
+                       ipa = be32_to_cpu(filter->dst_ipv6[IPV6_MAX_INDEX - i]);
+                       ipa = cpu_to_le32(ipa);
+                       memcpy(&cld->ipaddr.raw_v6.data[j], &ipa, sizeof(ipa));
                 }
+       } else {
+               ipa = be32_to_cpu(filter->dst_ipv4);
+               memcpy(&cld->ipaddr.v4.data, &ipa, sizeof(ipa));
         }
+ 
+       cld->inner_vlan = cpu_to_le16(ntohs(filter->vlan_id));
+ 
+       /* tenant_id is not supported by FW now, once the support is enabled
+        * fill the cld->tenant_id with cpu_to_le32(filter->tenant_id)
+        */
+       if (filter->tenant_id)
+               return;
   }
   
   /**
-  * i40e_resume_port_tx - Resume port Tx
-  * @pf: PF struct
+  * i40e_add_del_cloud_filter - Add/del cloud filter
+  * @vsi: pointer to VSI
+  * @filter: cloud filter rule
+  * @add: if true, add, if false, delete
    *
-  * Resume a port's Tx and issue a PF reset in case of failure to
-  * resume.
+  * Add or delete a cloud filter for a specific flow spec.
+  * Returns 0 if the filter were successfully added.
    **/
- static int i40e_resume_port_tx(struct i40e_pf *pf)
+ static int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,
+                                    struct i40e_cloud_filter *filter, bool add)
   {
-       struct i40e_hw *hw = &pf->hw;
+       struct i40e_aqc_cloud_filters_element_data cld_filter;
+       struct i40e_pf *pf = vsi->back;
         int ret;
+       static const u16 flag_table[128] = {
+               [I40E_CLOUD_FILTER_FLAGS_OMAC]  =
+                       I40E_AQC_ADD_CLOUD_FILTER_OMAC,
+               [I40E_CLOUD_FILTER_FLAGS_IMAC]  =
+                       I40E_AQC_ADD_CLOUD_FILTER_IMAC,
+               [I40E_CLOUD_FILTER_FLAGS_IMAC_IVLAN]  =
+                       I40E_AQC_ADD_CLOUD_FILTER_IMAC_IVLAN,
+               [I40E_CLOUD_FILTER_FLAGS_IMAC_TEN_ID] =
+                       I40E_AQC_ADD_CLOUD_FILTER_IMAC_TEN_ID,
+               [I40E_CLOUD_FILTER_FLAGS_OMAC_TEN_ID_IMAC] =
+                       I40E_AQC_ADD_CLOUD_FILTER_OMAC_TEN_ID_IMAC,
+               [I40E_CLOUD_FILTER_FLAGS_IMAC_IVLAN_TEN_ID] =
+                       I40E_AQC_ADD_CLOUD_FILTER_IMAC_IVLAN_TEN_ID,
+               [I40E_CLOUD_FILTER_FLAGS_IIP] =
+                       I40E_AQC_ADD_CLOUD_FILTER_IIP,
+       };
+ 
+       if (filter->flags >= ARRAY_SIZE(flag_table))
+               return I40E_ERR_CONFIG;
+ 
+       /* copy element needed to add cloud filter from filter */
+       i40e_set_cld_element(filter, &cld_filter);
+ 
+       if (filter->tunnel_type != I40E_CLOUD_TNL_TYPE_NONE)
+               cld_filter.flags = cpu_to_le16(filter->tunnel_type <<
+                                            I40E_AQC_ADD_CLOUD_TNL_TYPE_SHIFT);
+ 
+       if (filter->n_proto == ETH_P_IPV6)
+               cld_filter.flags |= cpu_to_le16(flag_table[filter->flags] |
+                                               I40E_AQC_ADD_CLOUD_FLAGS_IPV6);
+       else
+               cld_filter.flags |= cpu_to_le16(flag_table[filter->flags] |
+                                               I40E_AQC_ADD_CLOUD_FLAGS_IPV4);
   
-       ret = i40e_aq_resume_port_tx(hw, NULL);
-       if (ret) {
+       if (add)
+               ret = i40e_aq_add_cloud_filters(&pf->hw, filter->seid,
+                                               &cld_filter, 1);
+       else
+               ret = i40e_aq_rem_cloud_filters(&pf->hw, filter->seid,
+                                               &cld_filter, 1);
+       if (ret)
+               dev_dbg(&pf->pdev->dev,
+                       "Failed to %s cloud filter using l4 port %u, err %d aq_err %d\n",
+                       add ? "add" : "delete", filter->dst_port, ret,
+                       pf->hw.aq.asq_last_status);
+       else
                 dev_info(&pf->pdev->dev,
-                        "Resume Port Tx failed, err %s aq_err %s\n",
-                         i40e_stat_str(&pf->hw, ret),
-                         i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
-               /* Schedule PF reset to recover */
-               set_bit(__I40E_PF_RESET_REQUESTED, pf->state);
-               i40e_service_event_schedule(pf);
-       }
- 
+                        "%s cloud filter for VSI: %d\n",
+                        add ? "Added" : "Deleted", filter->seid);
         return ret;
   }
   
   /**
-  * i40e_init_pf_dcb - Initialize DCB configuration
-  * @pf: PF being configured
+  * i40e_add_del_cloud_filter_big_buf - Add/del cloud filter using big_buf
+  * @vsi: pointer to VSI
+  * @filter: cloud filter rule
+  * @add: if true, add, if false, delete
    *
-  * Query the current DCB configuration and cache it
-  * in the hardware structure
+  * Add or delete a cloud filter for a specific flow spec using big buffer.
+  * Returns 0 if the filter were successfully added.
    **/
- static int i40e_init_pf_dcb(struct i40e_pf *pf)
+ static int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
+                                            struct i40e_cloud_filter *filter,
+                                            bool add)
   {
-       struct i40e_hw *hw = &pf->hw;
-       int err = 0;
+       struct i40e_aqc_cloud_filters_element_bb cld_filter;
+       struct i40e_pf *pf = vsi->back;
+       int ret;
   
-       /* Do not enable DCB for SW1 and SW2 images even if the FW is capable */
-       if (pf->hw_features & I40E_HW_NO_DCB_SUPPORT)
-               goto out;
+       /* Both (src/dst) valid mac_addr are not supported */
+       if ((is_valid_ether_addr(filter->dst_mac) &&
+            is_valid_ether_addr(filter->src_mac)) ||
+           (is_multicast_ether_addr(filter->dst_mac) &&
+            is_multicast_ether_addr(filter->src_mac)))
+               return -EINVAL;
   
-       /* Get the initial DCB configuration */
-       err = i40e_init_dcb(hw);
-       if (!err) {
-               /* Device/Function is not DCBX capable */
-               if ((!hw->func_caps.dcb) ||
-                   (hw->dcbx_status == I40E_DCBX_STATUS_DISABLED)) {
-                       dev_info(&pf->pdev->dev,
-                                "DCBX offload is not supported or is disabled for this PF.\n");
-               } else {
-                       /* When status is not DISABLED then DCBX in FW */
-                       pf->dcbx_cap = DCB_CAP_DCBX_LLD_MANAGED |
-                                      DCB_CAP_DCBX_VER_IEEE;
+       /* Make sure port is specified, otherwise bail out, for channel
+        * specific cloud filter needs 'L4 port' to be non-zero
+        */
+       if (!filter->dst_port)
+               return -EINVAL;
   
-                       pf->flags |= I40E_FLAG_DCB_CAPABLE;
-                       /* Enable DCB tagging only when more than one TC
-                        * or explicitly disable if only one TC
-                        */
-                       if (i40e_dcb_get_num_tc(&hw->local_dcbx_config) > 1)
-                               pf->flags |= I40E_FLAG_DCB_ENABLED;
-                       else
-                               pf->flags &= ~I40E_FLAG_DCB_ENABLED;
-                       dev_dbg(&pf->pdev->dev,
-                               "DCBX offload is supported for this PF.\n");
+       /* adding filter using src_port/src_ip is not supported at this stage */
+       if (filter->src_port || filter->src_ipv4 ||
+           !ipv6_addr_any(&filter->ip.v6.src_ip6))
+               return -EINVAL;
+ 
+       /* copy element needed to add cloud filter from filter */
+       i40e_set_cld_element(filter, &cld_filter.element);
+ 
+       if (is_valid_ether_addr(filter->dst_mac) ||
+           is_valid_ether_addr(filter->src_mac) ||
+           is_multicast_ether_addr(filter->dst_mac) ||
+           is_multicast_ether_addr(filter->src_mac)) {
+               /* MAC + IP : unsupported mode */
+               if (filter->dst_ipv4)
+                       return -EINVAL;
+ 
+               /* since we validated that L4 port must be valid before
+                * we get here, start with respective "flags" value
+                * and update if vlan is present or not
+                */
+               cld_filter.element.flags =
+                       cpu_to_le16(I40E_AQC_ADD_CLOUD_FILTER_MAC_PORT);
+ 
+               if (filter->vlan_id) {
+                       cld_filter.element.flags =
+                       cpu_to_le16(I40E_AQC_ADD_CLOUD_FILTER_MAC_VLAN_PORT);
                 }
+ 
+       } else if (filter->dst_ipv4 ||
+                  !ipv6_addr_any(&filter->ip.v6.dst_ip6)) {
+               cld_filter.element.flags =
+                               cpu_to_le16(I40E_AQC_ADD_CLOUD_FILTER_IP_PORT);
+               if (filter->n_proto == ETH_P_IPV6)
+                       cld_filter.element.flags |=
+                               cpu_to_le16(I40E_AQC_ADD_CLOUD_FLAGS_IPV6);
+               else
+                       cld_filter.element.flags |=
+                               cpu_to_le16(I40E_AQC_ADD_CLOUD_FLAGS_IPV4);
         } else {
-               dev_info(&pf->pdev->dev,
-                        "Query for DCB configuration failed, err %s aq_err %s\n",
-                        i40e_stat_str(&pf->hw, err),
-                        i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+               dev_err(&pf->pdev->dev,
+                       "either mac or ip has to be valid for cloud filter\n");
+               return -EINVAL;
         }
   
- out:
-       return err;
+       /* Now copy L4 port in Byte 6..7 in general fields */
+       cld_filter.general_fields[I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD0] =
+                                               be16_to_cpu(filter->dst_port);
+ 
+       if (add) {
+               /* Validate current device switch mode, change if necessary */
+               ret = i40e_validate_and_set_switch_mode(vsi);
+               if (ret) {
+                       dev_err(&pf->pdev->dev,
+                               "failed to set switch mode, ret %d\n",
+                               ret);
+                       return ret;
+               }
+ 
+               ret = i40e_aq_add_cloud_filters_bb(&pf->hw, filter->seid,
+                                                  &cld_filter, 1);
+       } else {
+               ret = i40e_aq_rem_cloud_filters_bb(&pf->hw, filter->seid,
+                                                  &cld_filter, 1);
+       }
+ 
+       if (ret)
+               dev_dbg(&pf->pdev->dev,
+                       "Failed to %s cloud filter(big buffer) err %d aq_err %d\n",
+                       add ? "add" : "delete", ret, pf->hw.aq.asq_last_status);
+       else
+               dev_info(&pf->pdev->dev,
+                        "%s cloud filter for VSI: %d, L4 port: %d\n",
+                        add ? "add" : "delete", filter->seid,
+                        ntohs(filter->dst_port));
+       return ret;
   }
- #endif /* CONFIG_I40E_DCB */
- #define SPEED_SIZE 14
- #define FC_SIZE 8
+ 
   /**
-  * i40e_print_link_message - print link up or down
-  * @vsi: the VSI for which link needs a message
-  */
- void i40e_print_link_message(struct i40e_vsi *vsi, bool isup)
+  * i40e_parse_cls_flower - Parse tc flower filters provided by kernel
+  * @vsi: Pointer to VSI
+  * @cls_flower: Pointer to struct tc_cls_flower_offload
+  * @filter: Pointer to cloud filter structure
+  *
+  **/
+ static int i40e_parse_cls_flower(struct i40e_vsi *vsi,
+                                struct tc_cls_flower_offload *f,
+                                struct i40e_cloud_filter *filter)
   {
-       enum i40e_aq_link_speed new_speed;
-       char *speed = "Unknown";
-       char *fc = "Unknown";
-       char *fec = "";
-       char *req_fec = "";
-       char *an = "";
+       u16 n_proto_mask = 0, n_proto_key = 0, addr_type = 0;
+       struct i40e_pf *pf = vsi->back;
+       u8 field_flags = 0;
+ 
+       if (f->dissector->used_keys &
+           ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
+             BIT(FLOW_DISSECTOR_KEY_BASIC) |
+             BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
+             BIT(FLOW_DISSECTOR_KEY_VLAN) |
+             BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
+             BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
+             BIT(FLOW_DISSECTOR_KEY_PORTS) |
+             BIT(FLOW_DISSECTOR_KEY_ENC_KEYID))) {
+               dev_err(&pf->pdev->dev, "Unsupported key used: 0x%x\n",
+                       f->dissector->used_keys);
+               return -EOPNOTSUPP;
+       }
   
-       new_speed = vsi->back->hw.phy.link_info.link_speed;
+       if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+               struct flow_dissector_key_keyid *key =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_ENC_KEYID,
+                                                 f->key);
   
-       if ((vsi->current_isup == isup) && (vsi->current_speed == new_speed))
-               return;
-       vsi->current_isup = isup;
-       vsi->current_speed = new_speed;
-       if (!isup) {
-               netdev_info(vsi->netdev, "NIC Link is Down\n");
-               return;
+               struct flow_dissector_key_keyid *mask =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_ENC_KEYID,
+                                                 f->mask);
+ 
+               if (mask->keyid != 0)
+                       field_flags |= I40E_CLOUD_FIELD_TEN_ID;
+ 
+               filter->tenant_id = be32_to_cpu(key->keyid);
         }
   
-       /* Warn user if link speed on NPAR enabled partition is not at
-        * least 10GB
-        */
-       if (vsi->back->hw.func_caps.npar_enable &&
-           (vsi->back->hw.phy.link_info.link_speed == I40E_LINK_SPEED_1GB ||
-            vsi->back->hw.phy.link_info.link_speed == I40E_LINK_SPEED_100MB))
-               netdev_warn(vsi->netdev,
-                           "The partition detected link speed that is less than 10Gbps\n");
+       if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
+               struct flow_dissector_key_basic *key =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_BASIC,
+                                                 f->key);
   
-       switch (vsi->back->hw.phy.link_info.link_speed) {
-       case I40E_LINK_SPEED_40GB:
-               speed = "40 G";
-               break;
-       case I40E_LINK_SPEED_20GB:
-               speed = "20 G";
-               break;
-       case I40E_LINK_SPEED_25GB:
-               speed = "25 G";
-               break;
-       case I40E_LINK_SPEED_10GB:
-               speed = "10 G";
-               break;
-       case I40E_LINK_SPEED_1GB:
-               speed = "1000 M";
-               break;
-       case I40E_LINK_SPEED_100MB:
-               speed = "100 M";
-               break;
-       default:
-               break;
+               struct flow_dissector_key_basic *mask =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_BASIC,
+                                                 f->mask);
+ 
+               n_proto_key = ntohs(key->n_proto);
+               n_proto_mask = ntohs(mask->n_proto);
+ 
+               if (n_proto_key == ETH_P_ALL) {
+                       n_proto_key = 0;
+                       n_proto_mask = 0;
+               }
+               filter->n_proto = n_proto_key & n_proto_mask;
+               filter->ip_proto = key->ip_proto;
         }
   
-       switch (vsi->back->hw.fc.current_mode) {
-       case I40E_FC_FULL:
-               fc = "RX/TX";
-               break;
-       case I40E_FC_TX_PAUSE:
-               fc = "TX";
-               break;
-       case I40E_FC_RX_PAUSE:
-               fc = "RX";
-               break;
-       default:
-               fc = "None";
-               break;
+       if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+               struct flow_dissector_key_eth_addrs *key =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_ETH_ADDRS,
+                                                 f->key);
+ 
+               struct flow_dissector_key_eth_addrs *mask =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_ETH_ADDRS,
+                                                 f->mask);
+ 
+               /* use is_broadcast and is_zero to check for all 0xf or 0 */
+               if (!is_zero_ether_addr(mask->dst)) {
+                       if (is_broadcast_ether_addr(mask->dst)) {
+                               field_flags |= I40E_CLOUD_FIELD_OMAC;
+                       } else {
+                               dev_err(&pf->pdev->dev, "Bad ether dest mask %pM\n",
+                                       mask->dst);
+                               return I40E_ERR_CONFIG;
+                       }
+               }
+ 
+               if (!is_zero_ether_addr(mask->src)) {
+                       if (is_broadcast_ether_addr(mask->src)) {
+                               field_flags |= I40E_CLOUD_FIELD_IMAC;
+                       } else {
+                               dev_err(&pf->pdev->dev, "Bad ether src mask %pM\n",
+                                       mask->src);
+                               return I40E_ERR_CONFIG;
+                       }
+               }
+               ether_addr_copy(filter->dst_mac, key->dst);
+               ether_addr_copy(filter->src_mac, key->src);
         }
   
-       if (vsi->back->hw.phy.link_info.link_speed == I40E_LINK_SPEED_25GB) {
-               req_fec = ", Requested FEC: None";
-               fec = ", FEC: None";
-               an = ", Autoneg: False";
+       if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
+               struct flow_dissector_key_vlan *key =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_VLAN,
+                                                 f->key);
+               struct flow_dissector_key_vlan *mask =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_VLAN,
+                                                 f->mask);
   
-               if (vsi->back->hw.phy.link_info.an_info & I40E_AQ_AN_COMPLETED)
-                       an = ", Autoneg: True";
+               if (mask->vlan_id) {
+                       if (mask->vlan_id == VLAN_VID_MASK) {
+                               field_flags |= I40E_CLOUD_FIELD_IVLAN;
   
-               if (vsi->back->hw.phy.link_info.fec_info &
-                   I40E_AQ_CONFIG_FEC_KR_ENA)
-                       fec = ", FEC: CL74 FC-FEC/BASE-R";
-               else if (vsi->back->hw.phy.link_info.fec_info &
-                        I40E_AQ_CONFIG_FEC_RS_ENA)
-                       fec = ", FEC: CL108 RS-FEC";
+                       } else {
+                               dev_err(&pf->pdev->dev, "Bad vlan mask 0x%04x\n",
+                                       mask->vlan_id);
+                               return I40E_ERR_CONFIG;
+                       }
+               }
   
-               /* 'CL108 RS-FEC' should be displayed when RS is requested, or
-                * both RS and FC are requested
+               filter->vlan_id = cpu_to_be16(key->vlan_id);
+       }
+ 
+       if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
+               struct flow_dissector_key_control *key =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_CONTROL,
+                                                 f->key);
+ 
+               addr_type = key->addr_type;
+       }
+ 
+       if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+               struct flow_dissector_key_ipv4_addrs *key =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+                                                 f->key);
+               struct flow_dissector_key_ipv4_addrs *mask =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+                                                 f->mask);
+ 
+               if (mask->dst) {
+                       if (mask->dst == cpu_to_be32(0xffffffff)) {
+                               field_flags |= I40E_CLOUD_FIELD_IIP;
+                       } else {
+                               mask->dst = be32_to_cpu(mask->dst);
+                               dev_err(&pf->pdev->dev, "Bad ip dst mask %pI4\n",
+                                       &mask->dst);
+                               return I40E_ERR_CONFIG;
+                       }
+               }
+ 
+               if (mask->src) {
+                       if (mask->src == cpu_to_be32(0xffffffff)) {
+                               field_flags |= I40E_CLOUD_FIELD_IIP;
+                       } else {
+                               mask->src = be32_to_cpu(mask->src);
+                               dev_err(&pf->pdev->dev, "Bad ip src mask %pI4\n",
+                                       &mask->src);
+                               return I40E_ERR_CONFIG;
+                       }
+               }
+ 
+               if (field_flags & I40E_CLOUD_FIELD_TEN_ID) {
+                       dev_err(&pf->pdev->dev, "Tenant id not allowed for ip filter\n");
+                       return I40E_ERR_CONFIG;
+               }
+               filter->dst_ipv4 = key->dst;
+               filter->src_ipv4 = key->src;
+       }
+ 
+       if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+               struct flow_dissector_key_ipv6_addrs *key =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+                                                 f->key);
+               struct flow_dissector_key_ipv6_addrs *mask =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+                                                 f->mask);
+ 
+               /* src and dest IPV6 address should not be LOOPBACK
+                * (0:0:0:0:0:0:0:1), which can be represented as ::1
                  */
-               if (vsi->back->hw.phy.link_info.req_fec_info &
-                   (I40E_AQ_REQUEST_FEC_KR | I40E_AQ_REQUEST_FEC_RS)) {
-                       if (vsi->back->hw.phy.link_info.req_fec_info &
-                           I40E_AQ_REQUEST_FEC_RS)
-                               req_fec = ", Requested FEC: CL108 RS-FEC";
-                       else
-                               req_fec = ", Requested FEC: CL74 FC-FEC/BASE-R";
+               if (ipv6_addr_loopback(&key->dst) ||
+                   ipv6_addr_loopback(&key->src)) {
+                       dev_err(&pf->pdev->dev,
+                               "Bad ipv6, addr is LOOPBACK\n");
+                       return I40E_ERR_CONFIG;
+               }
+               if (!ipv6_addr_any(&mask->dst) || !ipv6_addr_any(&mask->src))
+                       field_flags |= I40E_CLOUD_FIELD_IIP;
+ 
+               memcpy(&filter->src_ipv6, &key->src.s6_addr32,
+                      sizeof(filter->src_ipv6));
+               memcpy(&filter->dst_ipv6, &key->dst.s6_addr32,
+                      sizeof(filter->dst_ipv6));
+       }
+ 
+       if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) {
+               struct flow_dissector_key_ports *key =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_PORTS,
+                                                 f->key);
+               struct flow_dissector_key_ports *mask =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_PORTS,
+                                                 f->mask);
+ 
+               if (mask->src) {
+                       if (mask->src == cpu_to_be16(0xffff)) {
+                               field_flags |= I40E_CLOUD_FIELD_IIP;
+                       } else {
+                               dev_err(&pf->pdev->dev, "Bad src port mask 0x%04x\n",
+                                       be16_to_cpu(mask->src));
+                               return I40E_ERR_CONFIG;
+                       }
+               }
+ 
+               if (mask->dst) {
+                       if (mask->dst == cpu_to_be16(0xffff)) {
+                               field_flags |= I40E_CLOUD_FIELD_IIP;
+                       } else {
+                               dev_err(&pf->pdev->dev, "Bad dst port mask 0x%04x\n",
+                                       be16_to_cpu(mask->dst));
+                               return I40E_ERR_CONFIG;
+                       }
+               }
+ 
+               filter->dst_port = key->dst;
+               filter->src_port = key->src;
+ 
+               switch (filter->ip_proto) {
+               case IPPROTO_TCP:
+               case IPPROTO_UDP:
+                       break;
+               default:
+                       dev_err(&pf->pdev->dev,
+                               "Only UDP and TCP transport are supported\n");
+                       return -EINVAL;
                 }
         }
+       filter->flags = field_flags;
+       return 0;
+ }
   
-       netdev_info(vsi->netdev, "NIC Link is Up, %sbps Full Duplex%s%s%s, Flow Control: %s\n",
-                   speed, req_fec, fec, an, fc);
+ /**
+  * i40e_handle_tclass: Forward to a traffic class on the device
+  * @vsi: Pointer to VSI
+  * @tc: traffic class index on the device
+  * @filter: Pointer to cloud filter structure
+  *
+  **/
+ static int i40e_handle_tclass(struct i40e_vsi *vsi, u32 tc,
+                             struct i40e_cloud_filter *filter)
+ {
+       struct i40e_channel *ch, *ch_tmp;
+ 
+       /* direct to a traffic class on the same device */
+       if (tc == 0) {
+               filter->seid = vsi->seid;
+               return 0;
+       } else if (vsi->tc_config.enabled_tc & BIT(tc)) {
+               if (!filter->dst_port) {
+                       dev_err(&vsi->back->pdev->dev,
+                               "Specify destination port to direct to traffic class that is not default\n");
+                       return -EINVAL;
+               }
+               if (list_empty(&vsi->ch_list))
+                       return -EINVAL;
+               list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list,
+                                        list) {
+                       if (ch->seid == vsi->tc_seid_map[tc])
+                               filter->seid = ch->seid;
+               }
+               return 0;
+       }
+       dev_err(&vsi->back->pdev->dev, "TC is not enabled\n");
+       return -EINVAL;
   }
   
   /**
-  * i40e_up_complete - Finish the last steps of bringing up a connection
-  * @vsi: the VSI being configured
+  * i40e_configure_clsflower - Configure tc flower filters
+  * @vsi: Pointer to VSI
+  * @cls_flower: Pointer to struct tc_cls_flower_offload
+  *
    **/
- static int i40e_up_complete(struct i40e_vsi *vsi)
+ static int i40e_configure_clsflower(struct i40e_vsi *vsi,
+                                   struct tc_cls_flower_offload *cls_flower)
   {
+       int tc = tc_classid_to_hwtc(vsi->netdev, cls_flower->classid);
+       struct i40e_cloud_filter *filter = NULL;
         struct i40e_pf *pf = vsi->back;
-       int err;
+       int err = 0;
   
-       if (pf->flags & I40E_FLAG_MSIX_ENABLED)
-               i40e_vsi_configure_msix(vsi);
-       else
-               i40e_configure_msi_and_legacy(vsi);
+       if (tc < 0) {
+               dev_err(&vsi->back->pdev->dev, "Invalid traffic class\n");
+               return -EINVAL;
+       }
   
-       /* start rings */
-       err = i40e_vsi_start_rings(vsi);
-       if (err)
-               return err;
+       if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) ||
+           test_bit(__I40E_RESET_INTR_RECEIVED, pf->state))
+               return -EBUSY;
   
-       clear_bit(__I40E_VSI_DOWN, vsi->state);
-       i40e_napi_enable_all(vsi);
-       i40e_vsi_enable_irq(vsi);
+       if (pf->fdir_pf_active_filters ||
+           (!hlist_empty(&pf->fdir_filter_list))) {
+               dev_err(&vsi->back->pdev->dev,
+                       "Flow Director Sideband filters exists, turn ntuple off to configure cloud filters\n");
+               return -EINVAL;
+       }
   
-       if ((pf->hw.phy.link_info.link_info & I40E_AQ_LINK_UP) &&
-           (vsi->netdev)) {
-               i40e_print_link_message(vsi, true);
-               netif_tx_start_all_queues(vsi->netdev);
-               netif_carrier_on(vsi->netdev);
-       } else if (vsi->netdev) {
-               i40e_print_link_message(vsi, false);
-               /* need to check for qualified module here*/
-               if ((pf->hw.phy.link_info.link_info &
-                       I40E_AQ_MEDIA_AVAILABLE) &&
-                   (!(pf->hw.phy.link_info.an_info &
-                       I40E_AQ_QUALIFIED_MODULE)))
-                       netdev_err(vsi->netdev,
-                                  "the driver failed to link because an unqualified module was detected.");
+       if (vsi->back->flags & I40E_FLAG_FD_SB_ENABLED) {
+               dev_err(&vsi->back->pdev->dev,
+                       "Disable Flow Director Sideband, configuring Cloud filters via tc-flower\n");
+               vsi->back->flags &= ~I40E_FLAG_FD_SB_ENABLED;
+               vsi->back->flags |= I40E_FLAG_FD_SB_TO_CLOUD_FILTER;
         }
   
-       /* replay FDIR SB filters */
-       if (vsi->type == I40E_VSI_FDIR) {
-               /* reset fd counters */
-               pf->fd_add_err = 0;
-               pf->fd_atr_cnt = 0;
-               i40e_fdir_filter_restore(vsi);
+       filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+       if (!filter)
+               return -ENOMEM;
+ 
+       filter->cookie = cls_flower->cookie;
+ 
+       err = i40e_parse_cls_flower(vsi, cls_flower, filter);
+       if (err < 0)
+               goto err;
+ 
+       err = i40e_handle_tclass(vsi, tc, filter);
+       if (err < 0)
+               goto err;
+ 
+       /* Add cloud filter */
+       if (filter->dst_port)
+               err = i40e_add_del_cloud_filter_big_buf(vsi, filter, true);
+       else
+               err = i40e_add_del_cloud_filter(vsi, filter, true);
+ 
+       if (err) {
+               dev_err(&pf->pdev->dev,
+                       "Failed to add cloud filter, err %s\n",
+                       i40e_stat_str(&pf->hw, err));
+               err = i40e_aq_rc_to_posix(err, pf->hw.aq.asq_last_status);
+               goto err;
         }
   
-       /* On the next run of the service_task, notify any clients of the new
-        * opened netdev
-        */
-       pf->flags |= I40E_FLAG_SERVICE_CLIENT_REQUESTED;
-       i40e_service_event_schedule(pf);
+       /* add filter to the ordered list */
+       INIT_HLIST_NODE(&filter->cloud_node);
   
-       return 0;
+       hlist_add_head(&filter->cloud_node, &pf->cloud_filter_list);
+ 
+       pf->num_cloud_filters++;
+ 
+       return err;
+ err:
+       kfree(filter);
+       return err;
   }
   
   /**
-  * i40e_vsi_reinit_locked - Reset the VSI
-  * @vsi: the VSI being configured
+  * i40e_find_cloud_filter - Find the could filter in the list
+  * @vsi: Pointer to VSI
+  * @cookie: filter specific cookie
    *
-  * Rebuild the ring structs after some configuration
-  * has changed, e.g. MTU size.
    **/
- static void i40e_vsi_reinit_locked(struct i40e_vsi *vsi)
+ static struct i40e_cloud_filter *i40e_find_cloud_filter(struct i40e_vsi *vsi,
+                                                       unsigned long *cookie)
   {
-       struct i40e_pf *pf = vsi->back;
- 
-       WARN_ON(in_interrupt());
-       while (test_and_set_bit(__I40E_CONFIG_BUSY, pf->state))
-               usleep_range(1000, 2000);
-       i40e_down(vsi);
+       struct i40e_cloud_filter *filter = NULL;
+       struct hlist_node *node2;
   
-       i40e_up(vsi);
-       clear_bit(__I40E_CONFIG_BUSY, pf->state);
+       hlist_for_each_entry_safe(filter, node2,
+                                 &vsi->back->cloud_filter_list, cloud_node)
+               if (!memcmp(cookie, &filter->cookie, sizeof(filter->cookie)))
+                       return filter;
+       return NULL;
   }
   
   /**
-  * i40e_up - Bring the connection back up after being down
-  * @vsi: the VSI being configured
+  * i40e_delete_clsflower - Remove tc flower filters
+  * @vsi: Pointer to VSI
+  * @cls_flower: Pointer to struct tc_cls_flower_offload
+  *
    **/
- int i40e_up(struct i40e_vsi *vsi)
+ static int i40e_delete_clsflower(struct i40e_vsi *vsi,
+                                struct tc_cls_flower_offload *cls_flower)
   {
-       int err;
+       struct i40e_cloud_filter *filter = NULL;
+       struct i40e_pf *pf = vsi->back;
+       int err = 0;
   
-       err = i40e_vsi_configure(vsi);
-       if (!err)
-               err = i40e_up_complete(vsi);
+       filter = i40e_find_cloud_filter(vsi, &cls_flower->cookie);
   
-       return err;
- }
+       if (!filter)
+               return -EINVAL;
   
- /**
-  * i40e_down - Shutdown the connection processing
-  * @vsi: the VSI being stopped
-  **/
- void i40e_down(struct i40e_vsi *vsi)
- {
-       int i;
+       hash_del(&filter->cloud_node);
   
-       /* It is assumed that the caller of this function
-        * sets the vsi->state __I40E_VSI_DOWN bit.
-        */
-       if (vsi->netdev) {
-               netif_carrier_off(vsi->netdev);
-               netif_tx_disable(vsi->netdev);
-       }
-       i40e_vsi_disable_irq(vsi);
-       i40e_vsi_stop_rings(vsi);
-       i40e_napi_disable_all(vsi);
+       if (filter->dst_port)
+               err = i40e_add_del_cloud_filter_big_buf(vsi, filter, false);
+       else
+               err = i40e_add_del_cloud_filter(vsi, filter, false);
   
-       for (i = 0; i < vsi->num_queue_pairs; i++) {
-               i40e_clean_tx_ring(vsi->tx_rings[i]);
-               if (i40e_enabled_xdp_vsi(vsi))
-                       i40e_clean_tx_ring(vsi->xdp_rings[i]);
-               i40e_clean_rx_ring(vsi->rx_rings[i]);
+       kfree(filter);
+       if (err) {
+               dev_err(&pf->pdev->dev,
+                       "Failed to delete cloud filter, err %s\n",
+                       i40e_stat_str(&pf->hw, err));
+               return i40e_aq_rc_to_posix(err, pf->hw.aq.asq_last_status);
         }
   
+       pf->num_cloud_filters--;
+       if (!pf->num_cloud_filters)
+               if ((pf->flags & I40E_FLAG_FD_SB_TO_CLOUD_FILTER) &&
+                   !(pf->flags & I40E_FLAG_FD_SB_INACTIVE)) {
+                       pf->flags |= I40E_FLAG_FD_SB_ENABLED;
+                       pf->flags &= ~I40E_FLAG_FD_SB_TO_CLOUD_FILTER;
+                       pf->flags &= ~I40E_FLAG_FD_SB_INACTIVE;
+               }
+       return 0;
   }
   
   /**
-  * i40e_setup_tc - configure multiple traffic classes
+  * i40e_setup_tc_cls_flower - flower classifier offloads
    * @netdev: net device to configure
-  * @tc: number of traffic classes to enable
+  * @type_data: offload data
    **/
- static int i40e_setup_tc(struct net_device *netdev, u8 tc)
+ static int i40e_setup_tc_cls_flower(struct i40e_netdev_priv *np,
+                                   struct tc_cls_flower_offload *cls_flower)
   {
-       struct i40e_netdev_priv *np = netdev_priv(netdev);
         struct i40e_vsi *vsi = np->vsi;
-       struct i40e_pf *pf = vsi->back;
-       u8 enabled_tc = 0;
-       int ret = -EINVAL;
-       int i;
- 
-       /* Check if DCB enabled to continue */
-       if (!(pf->flags & I40E_FLAG_DCB_ENABLED)) {
-               netdev_info(netdev, "DCB is not enabled for adapter\n");
-               goto exit;
-       }
   
-       /* Check if MFP enabled */
-       if (pf->flags & I40E_FLAG_MFP_ENABLED) {
-               netdev_info(netdev, "Configuring TC not supported in MFP mode\n");
-               goto exit;
-       }
+       if (cls_flower->common.chain_index)
+               return -EOPNOTSUPP;
   
-       /* Check whether tc count is within enabled limit */
-       if (tc > i40e_pf_get_num_tc(pf)) {
-               netdev_info(netdev, "TC count greater than enabled on link for adapter\n");
-               goto exit;
+       switch (cls_flower->command) {
+       case TC_CLSFLOWER_REPLACE:
+               return i40e_configure_clsflower(vsi, cls_flower);
+       case TC_CLSFLOWER_DESTROY:
+               return i40e_delete_clsflower(vsi, cls_flower);
+       case TC_CLSFLOWER_STATS:
+               return -EOPNOTSUPP;
+       default:
+               return -EINVAL;
         }
+ }
   
-       /* Generate TC map for number of tc requested */
-       for (i = 0; i < tc; i++)
-               enabled_tc |= BIT(i);
- 
-       /* Requesting same TC configuration as already enabled */
-       if (enabled_tc == vsi->tc_config.enabled_tc)
-               return 0;
+ static int i40e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+                                 void *cb_priv)
+ {
+       struct i40e_netdev_priv *np = cb_priv;
   
-       /* Quiesce VSI queues */
-       i40e_quiesce_vsi(vsi);
+       switch (type) {
+       case TC_SETUP_CLSFLOWER:
+               return i40e_setup_tc_cls_flower(np, type_data);
   
-       /* Configure VSI for enabled TCs */
-       ret = i40e_vsi_config_tc(vsi, enabled_tc);
-       if (ret) {
-               netdev_info(netdev, "Failed configuring TC for VSI seid=%d\n",
-                           vsi->seid);
-               goto exit;
+       default:
+               return -EOPNOTSUPP;
         }
+ }
   
-       /* Unquiesce VSI */
-       i40e_unquiesce_vsi(vsi);
+ static int i40e_setup_tc_block(struct net_device *dev,
+                              struct tc_block_offload *f)
+ {
+       struct i40e_netdev_priv *np = netdev_priv(dev);
   
- exit:
-       return ret;
+       if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+               return -EOPNOTSUPP;
+ 
+       switch (f->command) {
+       case TC_BLOCK_BIND:
+               return tcf_block_cb_register(f->block, i40e_setup_tc_block_cb,
+                                            np, np);
+       case TC_BLOCK_UNBIND:
+               tcf_block_cb_unregister(f->block, i40e_setup_tc_block_cb, np);
+               return 0;
+       default:
+               return -EOPNOTSUPP;
+       }
   }
   
   static int __i40e_setup_tc(struct net_device *netdev, enum tc_setup_type type,
                            void *type_data)
   {
-       struct tc_mqprio_qopt *mqprio = type_data;
- 
-       if (type != TC_SETUP_MQPRIO)
+       switch (type) {
+       case TC_SETUP_QDISC_MQPRIO:
+               return i40e_setup_tc(netdev, type_data);
+       case TC_SETUP_BLOCK:
+               return i40e_setup_tc_block(netdev, type_data);
+       default:
                 return -EOPNOTSUPP;
- 
-       mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
- 
-       return i40e_setup_tc(netdev, mqprio->num_tc);
+       }
   }
   
   /**
@@@ -5747,7 -7674,7 +7674,7 @@@ err_setup_rx
   err_setup_tx:
         i40e_vsi_free_tx_resources(vsi);
         if (vsi == pf->vsi[pf->lan_vsi])
-               i40e_do_reset(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED), true);
+               i40e_do_reset(pf, I40E_PF_RESET_FLAG, true);
   
         return err;
   }
@@@ -5809,6 -7736,33 +7736,33 @@@ static void i40e_fdir_filter_exit(struc
                                 I40E_L3_SRC_MASK | I40E_L3_DST_MASK);
   }
   
+ /**
+  * i40e_cloud_filter_exit - Cleans up the cloud filters
+  * @pf: Pointer to PF
+  *
+  * This function destroys the hlist where all the cloud filters
+  * were saved.
+  **/
+ static void i40e_cloud_filter_exit(struct i40e_pf *pf)
+ {
+       struct i40e_cloud_filter *cfilter;
+       struct hlist_node *node;
+ 
+       hlist_for_each_entry_safe(cfilter, node,
+                                 &pf->cloud_filter_list, cloud_node) {
+               hlist_del(&cfilter->cloud_node);
+               kfree(cfilter);
+       }
+       pf->num_cloud_filters = 0;
+ 
+       if ((pf->flags & I40E_FLAG_FD_SB_TO_CLOUD_FILTER) &&
+           !(pf->flags & I40E_FLAG_FD_SB_INACTIVE)) {
+               pf->flags |= I40E_FLAG_FD_SB_ENABLED;
+               pf->flags &= ~I40E_FLAG_FD_SB_TO_CLOUD_FILTER;
+               pf->flags &= ~I40E_FLAG_FD_SB_INACTIVE;
+       }
+ }
+ 
   /**
    * i40e_close - Disables a network interface
    * @netdev: network interface device structure
@@@ -5875,7 -7829,7 +7829,7 @@@ void i40e_do_reset(struct i40e_pf *pf, 
                 wr32(&pf->hw, I40E_GLGEN_RTRIG, val);
                 i40e_flush(&pf->hw);
   
-       } else if (reset_flags & BIT_ULL(__I40E_PF_RESET_REQUESTED)) {
+       } else if (reset_flags & I40E_PF_RESET_FLAG) {
   
                 /* Request a PF Reset
                  *
@@@ -6226,6 -8180,7 +8180,7 @@@ void i40e_fdir_check_and_reenable(struc
                                 hlist_del(&filter->fdir_node);
                                 kfree(filter);
                                 pf->fdir_pf_active_filters--;
+                               pf->fd_inv = 0;
                         }
                 }
         }
@@@ -6429,8 -8384,7 +8384,7 @@@ static void i40e_link_event(struct i40e
              new_link == netif_carrier_ok(vsi->netdev)))
                 return;
   
-       if (!test_bit(__I40E_VSI_DOWN, vsi->state))
-               i40e_print_link_message(vsi, new_link);
+       i40e_print_link_message(vsi, new_link);
   
         /* Notify the base of the switch tree connected to
          * the link.  Floating VEBs are not notified.
@@@ -6553,12 -8507,26 +8507,26 @@@ static void i40e_handle_link_event(stru
          */
         i40e_link_event(pf);
   
-       /* check for unqualified module, if link is down */
-       if ((status->link_info & I40E_AQ_MEDIA_AVAILABLE) &&
-           (!(status->an_info & I40E_AQ_QUALIFIED_MODULE)) &&
-           (!(status->link_info & I40E_AQ_LINK_UP)))
+       /* Check if module meets thermal requirements */
+       if (status->phy_type == I40E_PHY_TYPE_NOT_SUPPORTED_HIGH_TEMP) {
                 dev_err(&pf->pdev->dev,
-                       "The driver failed to link because an unqualified module was detected.\n");
+                       "Rx/Tx is disabled on this device because the module does not meet thermal requirements.\n");
+               dev_err(&pf->pdev->dev,
+                       "Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n");
+       } else {
+               /* check for unqualified module, if link is down, suppress
+                * the message if link was forced to be down.
+                */
+               if ((status->link_info & I40E_AQ_MEDIA_AVAILABLE) &&
+                   (!(status->an_info & I40E_AQ_QUALIFIED_MODULE)) &&
+                   (!(status->link_info & I40E_AQ_LINK_UP)) &&
+                   (!(pf->flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED))) {
+                       dev_err(&pf->pdev->dev,
+                               "Rx/Tx is disabled on this device because an unsupported SFP module type was detected.\n");
+                       dev_err(&pf->pdev->dev,
+                               "Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n");
+               }
+       }
   }
   
   /**
@@@ -6900,7 -8868,8 +8868,8 @@@ end_reconstitute
    * i40e_get_capabilities - get info about the HW
    * @pf: the PF struct
    **/
- static int i40e_get_capabilities(struct i40e_pf *pf)
+ static int i40e_get_capabilities(struct i40e_pf *pf,
+                                enum i40e_admin_queue_opc list_type)
   {
         struct i40e_aqc_list_capabilities_element_resp *cap_buf;
         u16 data_size;
@@@ -6915,9 -8884,8 +8884,8 @@@
   
                 /* this loads the data into the hw struct for us */
                 err = i40e_aq_discover_capabilities(&pf->hw, cap_buf, buf_len,
-                                           &data_size,
-                                           i40e_aqc_opc_list_func_capabilities,
-                                           NULL);
+                                                   &data_size, list_type,
+                                                   NULL);
                 /* data loaded, buffer no longer needed */
                 kfree(cap_buf);
   
@@@ -6934,26 -8902,44 +8902,44 @@@
                 }
         } while (err);
   
-       if (pf->hw.debug_mask & I40E_DEBUG_USER)
-               dev_info(&pf->pdev->dev,
-                        "pf=%d, num_vfs=%d, msix_pf=%d, msix_vf=%d, fd_g=%d, fd_b=%d, pf_max_q=%d num_vsi=%d\n",
-                        pf->hw.pf_id, pf->hw.func_caps.num_vfs,
-                        pf->hw.func_caps.num_msix_vectors,
-                        pf->hw.func_caps.num_msix_vectors_vf,
-                        pf->hw.func_caps.fd_filters_guaranteed,
-                        pf->hw.func_caps.fd_filters_best_effort,
-                        pf->hw.func_caps.num_tx_qp,
-                        pf->hw.func_caps.num_vsis);
- 
+       if (pf->hw.debug_mask & I40E_DEBUG_USER) {
+               if (list_type == i40e_aqc_opc_list_func_capabilities) {
+                       dev_info(&pf->pdev->dev,
+                                "pf=%d, num_vfs=%d, msix_pf=%d, msix_vf=%d, fd_g=%d, fd_b=%d, pf_max_q=%d num_vsi=%d\n",
+                                pf->hw.pf_id, pf->hw.func_caps.num_vfs,
+                                pf->hw.func_caps.num_msix_vectors,
+                                pf->hw.func_caps.num_msix_vectors_vf,
+                                pf->hw.func_caps.fd_filters_guaranteed,
+                                pf->hw.func_caps.fd_filters_best_effort,
+                                pf->hw.func_caps.num_tx_qp,
+                                pf->hw.func_caps.num_vsis);
+               } else if (list_type == i40e_aqc_opc_list_dev_capabilities) {
+                       dev_info(&pf->pdev->dev,
+                                "switch_mode=0x%04x, function_valid=0x%08x\n",
+                                pf->hw.dev_caps.switch_mode,
+                                pf->hw.dev_caps.valid_functions);
+                       dev_info(&pf->pdev->dev,
+                                "SR-IOV=%d, num_vfs for all function=%u\n",
+                                pf->hw.dev_caps.sr_iov_1_1,
+                                pf->hw.dev_caps.num_vfs);
+                       dev_info(&pf->pdev->dev,
+                                "num_vsis=%u, num_rx:%u, num_tx=%u\n",
+                                pf->hw.dev_caps.num_vsis,
+                                pf->hw.dev_caps.num_rx_qp,
+                                pf->hw.dev_caps.num_tx_qp);
+               }
+       }
+       if (list_type == i40e_aqc_opc_list_func_capabilities) {
   #define DEF_NUM_VSI (1 + (pf->hw.func_caps.fcoe ? 1 : 0) \
                        + pf->hw.func_caps.num_vfs)
-       if (pf->hw.revision_id == 0 && (DEF_NUM_VSI > pf->hw.func_caps.num_vsis)) {
-               dev_info(&pf->pdev->dev,
-                        "got num_vsis %d, setting num_vsis to %d\n",
-                        pf->hw.func_caps.num_vsis, DEF_NUM_VSI);
-               pf->hw.func_caps.num_vsis = DEF_NUM_VSI;
+               if (pf->hw.revision_id == 0 &&
+                   pf->hw.func_caps.num_vsis < DEF_NUM_VSI) {
+                       dev_info(&pf->pdev->dev,
+                                "got num_vsis %d, setting num_vsis to %d\n",
+                                pf->hw.func_caps.num_vsis, DEF_NUM_VSI);
+                       pf->hw.func_caps.num_vsis = DEF_NUM_VSI;
+               }
         }
- 
         return 0;
   }
   
@@@ -6985,35 -8971,125 +8971,125 @@@ static void i40e_fdir_sb_setup(struct i
         if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED))
                 return;
   
-       /* find existing VSI and see if it needs configuring */
-       vsi = i40e_find_vsi_by_type(pf, I40E_VSI_FDIR);
+       /* find existing VSI and see if it needs configuring */
+       vsi = i40e_find_vsi_by_type(pf, I40E_VSI_FDIR);
+ 
+       /* create a new VSI if none exists */
+       if (!vsi) {
+               vsi = i40e_vsi_setup(pf, I40E_VSI_FDIR,
+                                    pf->vsi[pf->lan_vsi]->seid, 0);
+               if (!vsi) {
+                       dev_info(&pf->pdev->dev, "Couldn't create FDir VSI\n");
+                       pf->flags &= ~I40E_FLAG_FD_SB_ENABLED;
+                       pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
+                       return;
+               }
+       }
+ 
+       i40e_vsi_setup_irqhandler(vsi, i40e_fdir_clean_ring);
+ }
+ 
+ /**
+  * i40e_fdir_teardown - release the Flow Director resources
+  * @pf: board private structure
+  **/
+ static void i40e_fdir_teardown(struct i40e_pf *pf)
+ {
+       struct i40e_vsi *vsi;
+ 
+       i40e_fdir_filter_exit(pf);
+       vsi = i40e_find_vsi_by_type(pf, I40E_VSI_FDIR);
+       if (vsi)
+               i40e_vsi_release(vsi);
+ }
+ 
+ /**
+  * i40e_rebuild_cloud_filters - Rebuilds cloud filters for VSIs
+  * @vsi: PF main vsi
+  * @seid: seid of main or channel VSIs
+  *
+  * Rebuilds cloud filters associated with main VSI and channel VSIs if they
+  * existed before reset
+  **/
+ static int i40e_rebuild_cloud_filters(struct i40e_vsi *vsi, u16 seid)
+ {
+       struct i40e_cloud_filter *cfilter;
+       struct i40e_pf *pf = vsi->back;
+       struct hlist_node *node;
+       i40e_status ret;
+ 
+       /* Add cloud filters back if they exist */
+       hlist_for_each_entry_safe(cfilter, node, &pf->cloud_filter_list,
+                                 cloud_node) {
+               if (cfilter->seid != seid)
+                       continue;
+ 
+               if (cfilter->dst_port)
+                       ret = i40e_add_del_cloud_filter_big_buf(vsi, cfilter,
+                                                               true);
+               else
+                       ret = i40e_add_del_cloud_filter(vsi, cfilter, true);
   
-       /* create a new VSI if none exists */
-       if (!vsi) {
-               vsi = i40e_vsi_setup(pf, I40E_VSI_FDIR,
-                                    pf->vsi[pf->lan_vsi]->seid, 0);
-               if (!vsi) {
-                       dev_info(&pf->pdev->dev, "Couldn't create FDir VSI\n");
-                       pf->flags &= ~I40E_FLAG_FD_SB_ENABLED;
-                       return;
+               if (ret) {
+                       dev_dbg(&pf->pdev->dev,
+                               "Failed to rebuild cloud filter, err %s aq_err %s\n",
+                               i40e_stat_str(&pf->hw, ret),
+                               i40e_aq_str(&pf->hw,
+                                           pf->hw.aq.asq_last_status));
+                       return ret;
                 }
         }
- 
-       i40e_vsi_setup_irqhandler(vsi, i40e_fdir_clean_ring);
+       return 0;
   }
   
   /**
-  * i40e_fdir_teardown - release the Flow Director resources
-  * @pf: board private structure
+  * i40e_rebuild_channels - Rebuilds channel VSIs if they existed before reset
+  * @vsi: PF main vsi
+  *
+  * Rebuilds channel VSIs if they existed before reset
    **/
- static void i40e_fdir_teardown(struct i40e_pf *pf)
+ static int i40e_rebuild_channels(struct i40e_vsi *vsi)
   {
-       struct i40e_vsi *vsi;
+       struct i40e_channel *ch, *ch_tmp;
+       i40e_status ret;
   
-       i40e_fdir_filter_exit(pf);
-       vsi = i40e_find_vsi_by_type(pf, I40E_VSI_FDIR);
-       if (vsi)
-               i40e_vsi_release(vsi);
+       if (list_empty(&vsi->ch_list))
+               return 0;
+ 
+       list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) {
+               if (!ch->initialized)
+                       break;
+               /* Proceed with creation of channel (VMDq2) VSI */
+               ret = i40e_add_channel(vsi->back, vsi->uplink_seid, ch);
+               if (ret) {
+                       dev_info(&vsi->back->pdev->dev,
+                                "failed to rebuild channels using uplink_seid %u\n",
+                                vsi->uplink_seid);
+                       return ret;
+               }
+               if (ch->max_tx_rate) {
+                       u64 credits = ch->max_tx_rate;
+ 
+                       if (i40e_set_bw_limit(vsi, ch->seid,
+                                             ch->max_tx_rate))
+                               return -EINVAL;
+ 
+                       do_div(credits, I40E_BW_CREDIT_DIVISOR);
+                       dev_dbg(&vsi->back->pdev->dev,
+                               "Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n",
+                               ch->max_tx_rate,
+                               credits,
+                               ch->seid);
+               }
+               ret = i40e_rebuild_cloud_filters(vsi, ch->seid);
+               if (ret) {
+                       dev_dbg(&vsi->back->pdev->dev,
+                               "Failed to rebuild cloud filters for channel VSI %u\n",
+                               ch->seid);
+                       return ret;
+               }
+       }
+       return 0;
   }
   
   /**
@@@ -7152,6 -9228,7 +9228,7 @@@ static int i40e_reset(struct i40e_pf *p
    **/
   static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
   {
+       struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
         struct i40e_hw *hw = &pf->hw;
         u8 set_fc_aq_fail = 0;
         i40e_status ret;
@@@ -7177,7 -9254,7 +9254,7 @@@
                 i40e_verify_eeprom(pf);
   
         i40e_clear_pxe_mode(hw);
-       ret = i40e_get_capabilities(pf);
+       ret = i40e_get_capabilities(pf, i40e_aqc_opc_list_func_capabilities);
         if (ret)
                 goto end_core_reset;
   
@@@ -7234,7 -9311,7 +9311,7 @@@
          * If there were VEBs but the reconstitution failed, we'll try
          * try to recover minimal use by getting the basic PF VSI working.
          */
-       if (pf->vsi[pf->lan_vsi]->uplink_seid != pf->mac_seid) {
+       if (vsi->uplink_seid != pf->mac_seid) {
                 dev_dbg(&pf->pdev->dev, "attempting to rebuild switch\n");
                 /* find the one VEB connected to the MAC, and find orphans */
                 for (v = 0; v < I40E_MAX_VEB; v++) {
@@@ -7258,8 -9335,7 +9335,7 @@@
                                         dev_info(&pf->pdev->dev,
                                                  "rebuild of switch failed: %d, will try to set up simple PF connection\n",
                                                  ret);
-                                       pf->vsi[pf->lan_vsi]->uplink_seid
-                                                               = pf->mac_seid;
+                                       vsi->uplink_seid = pf->mac_seid;
                                         break;
                                 } else if (pf->veb[v]->uplink_seid == 0) {
                                         dev_info(&pf->pdev->dev,
@@@ -7270,10 -9346,10 +9346,10 @@@
                 }
         }
   
-       if (pf->vsi[pf->lan_vsi]->uplink_seid == pf->mac_seid) {
+       if (vsi->uplink_seid == pf->mac_seid) {
                 dev_dbg(&pf->pdev->dev, "attempting to rebuild PF VSI\n");
                 /* no VEB, so rebuild only the Main VSI */
-               ret = i40e_add_vsi(pf->vsi[pf->lan_vsi]);
+               ret = i40e_add_vsi(vsi);
                 if (ret) {
                         dev_info(&pf->pdev->dev,
                                  "rebuild of Main VSI failed: %d\n", ret);
@@@ -7281,6 -9357,35 +9357,35 @@@
                 }
         }
   
+       if (vsi->mqprio_qopt.max_rate[0]) {
+               u64 max_tx_rate = vsi->mqprio_qopt.max_rate[0];
+               u64 credits = 0;
+ 
+               do_div(max_tx_rate, I40E_BW_MBPS_DIVISOR);
+               ret = i40e_set_bw_limit(vsi, vsi->seid, max_tx_rate);
+               if (ret)
+                       goto end_unlock;
+ 
+               credits = max_tx_rate;
+               do_div(credits, I40E_BW_CREDIT_DIVISOR);
+               dev_dbg(&vsi->back->pdev->dev,
+                       "Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n",
+                       max_tx_rate,
+                       credits,
+                       vsi->seid);
+       }
+ 
+       ret = i40e_rebuild_cloud_filters(vsi, vsi->seid);
+       if (ret)
+               goto end_unlock;
+ 
+       /* PF Main VSI is rebuild by now, go ahead and rebuild channel VSIs
+        * for this main VSI if they exist
+        */
+       ret = i40e_rebuild_channels(vsi);
+       if (ret)
+               goto end_unlock;
+ 
         /* Reconfigure hardware for allowing smaller MSS in the case
          * of TSO, so that we avoid the MDD being fired and causing
          * a reset in the case of small MSS+TSO.
@@@ -7615,9 -9720,9 +9720,9 @@@ static void i40e_service_task(struct wo
    * i40e_service_timer - timer callback
    * @data: pointer to PF struct
    **/
- static void i40e_service_timer(unsigned long data)
+ static void i40e_service_timer(struct timer_list *t)
   {
-       struct i40e_pf *pf = (struct i40e_pf *)data;
+       struct i40e_pf *pf = from_timer(pf, t, service_timer);
   
         mod_timer(&pf->service_timer,
                   round_jiffies(jiffies + pf->service_timer_period));
@@@ -7674,7 -9779,7 +9779,7 @@@ static int i40e_set_num_rings_in_vsi(st
   
   /**
    * i40e_vsi_alloc_arrays - Allocate queue and vector pointer arrays for the vsi
-  * @type: VSI pointer
+  * @vsi: VSI pointer
    * @alloc_qvectors: a bool to specify if q_vectors need to be allocated.
    *
    * On error: returns error code (negative)
@@@ -8139,7 -10244,7 +10244,7 @@@ static int i40e_init_msix(struct i40e_p
                 pf->num_lan_qps = 1;
                 pf->num_lan_msix = 1;
   
-       } else if (!vectors_left) {
+       } else if (v_actual != v_budget) {
                 /* If we have limited resources, we will start with no vectors
                  * for the special features and then allocate vectors to some
                  * of these features based on the policy and at the end disable
@@@ -8148,7 -10253,8 +10253,8 @@@
                 int vec;
   
                 dev_info(&pf->pdev->dev,
-                        "MSI-X vector limit reached, attempting to redistribute vectors\n");
+                        "MSI-X vector limit reached with %d, wanted %d, attempting to redistribute vectors\n",
+                        v_actual, v_budget);
                 /* reserve the misc vector */
                 vec = v_actual - 1;
   
@@@ -8196,6 -10302,7 +10302,7 @@@
             (pf->num_fdsb_msix == 0)) {
                 dev_info(&pf->pdev->dev, "Sideband Flowdir disabled, not enough MSI-X vectors\n");
                 pf->flags &= ~I40E_FLAG_FD_SB_ENABLED;
+               pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
         }
         if ((pf->flags & I40E_FLAG_VMDQ_ENABLED) &&
             (pf->num_vmdq_msix == 0)) {
@@@ -8313,6 -10420,7 +10420,7 @@@ static int i40e_init_interrupt_scheme(s
                                        I40E_FLAG_FD_SB_ENABLED  |
                                        I40E_FLAG_FD_ATR_ENABLED |
                                        I40E_FLAG_VMDQ_ENABLED);
+                       pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
   
                         /* rework the queue expectations without MSIX */
                         i40e_determine_queue_usage(pf);
@@@ -8350,6 -10458,55 +10458,55 @@@
         return 0;
   }
   
+ /**
+  * i40e_restore_interrupt_scheme - Restore the interrupt scheme
+  * @pf: private board data structure
+  *
+  * Restore the interrupt scheme that was cleared when we suspended the
+  * device. This should be called during resume to re-allocate the q_vectors
+  * and reacquire IRQs.
+  */
+ static int i40e_restore_interrupt_scheme(struct i40e_pf *pf)
+ {
+       int err, i;
+ 
+       /* We cleared the MSI and MSI-X flags when disabling the old interrupt
+        * scheme. We need to re-enabled them here in order to attempt to
+        * re-acquire the MSI or MSI-X vectors
+        */
+       pf->flags |= (I40E_FLAG_MSIX_ENABLED | I40E_FLAG_MSI_ENABLED);
+ 
+       err = i40e_init_interrupt_scheme(pf);
+       if (err)
+               return err;
+ 
+       /* Now that we've re-acquired IRQs, we need to remap the vectors and
+        * rings together again.
+        */
+       for (i = 0; i < pf->num_alloc_vsi; i++) {
+               if (pf->vsi[i]) {
+                       err = i40e_vsi_alloc_q_vectors(pf->vsi[i]);
+                       if (err)
+                               goto err_unwind;
+                       i40e_vsi_map_rings_to_vectors(pf->vsi[i]);
+               }
+       }
+ 
+       err = i40e_setup_misc_vector(pf);
+       if (err)
+               goto err_unwind;
+ 
+       return 0;
+ 
+ err_unwind:
+       while (i--) {
+               if (pf->vsi[i])
+                       i40e_vsi_free_q_vectors(pf->vsi[i]);
+       }
+ 
+       return err;
+ }
+ 
   /**
    * i40e_setup_misc_vector - Setup the misc vector to handle non queue events
    * @pf: board private structure
@@@ -8363,13 -10520,12 +10520,12 @@@ static int i40e_setup_misc_vector(struc
         struct i40e_hw *hw = &pf->hw;
         int err = 0;
   
-       /* Only request the irq if this is the first time through, and
-        * not when we're rebuilding after a Reset
-        */
-       if (!test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) {
+       /* Only request the IRQ once, the first time through. */
+       if (!test_and_set_bit(__I40E_MISC_IRQ_REQUESTED, pf->state)) {
                 err = request_irq(pf->msix_entries[0].vector,
                                   i40e_intr, 0, pf->int_name, pf);
                 if (err) {
+                       clear_bit(__I40E_MISC_IRQ_REQUESTED, pf->state);
                         dev_info(&pf->pdev->dev,
                                  "request_irq for %s failed: %d\n",
                                  pf->int_name, err);
@@@ -8385,50 -10541,11 +10541,11 @@@
   
         i40e_flush(hw);
   
-       i40e_irq_dynamic_enable_icr0(pf, true);
+       i40e_irq_dynamic_enable_icr0(pf);
   
         return err;
   }
   
- /**
-  * i40e_config_rss_aq - Prepare for RSS using AQ commands
-  * @vsi: vsi structure
-  * @seed: RSS hash seed
-  **/
- static int i40e_config_rss_aq(struct i40e_vsi *vsi, const u8 *seed,
-                             u8 *lut, u16 lut_size)
- {
-       struct i40e_pf *pf = vsi->back;
-       struct i40e_hw *hw = &pf->hw;
-       int ret = 0;
- 
-       if (seed) {
-               struct i40e_aqc_get_set_rss_key_data *seed_dw =
-                       (struct i40e_aqc_get_set_rss_key_data *)seed;
-               ret = i40e_aq_set_rss_key(hw, vsi->id, seed_dw);
-               if (ret) {
-                       dev_info(&pf->pdev->dev,
-                                "Cannot set RSS key, err %s aq_err %s\n",
-                                i40e_stat_str(hw, ret),
-                                i40e_aq_str(hw, hw->aq.asq_last_status));
-                       return ret;
-               }
-       }
-       if (lut) {
-               bool pf_lut = vsi->type == I40E_VSI_MAIN ? true : false;
- 
-               ret = i40e_aq_set_rss_lut(hw, vsi->id, pf_lut, lut, lut_size);
-               if (ret) {
-                       dev_info(&pf->pdev->dev,
-                                "Cannot set RSS lut, err %s aq_err %s\n",
-                                i40e_stat_str(hw, ret),
-                                i40e_aq_str(hw, hw->aq.asq_last_status));
-                       return ret;
-               }
-       }
-       return ret;
- }
- 
   /**
    * i40e_get_rss_aq - Get RSS keys and lut by using AQ commands
    * @vsi: Pointer to vsi structure
@@@ -8475,46 -10592,6 +10592,6 @@@ static int i40e_get_rss_aq(struct i40e_
         return ret;
   }
   
- /**
-  * i40e_vsi_config_rss - Prepare for VSI(VMDq) RSS if used
-  * @vsi: VSI structure
-  **/
- static int i40e_vsi_config_rss(struct i40e_vsi *vsi)
- {
-       u8 seed[I40E_HKEY_ARRAY_SIZE];
-       struct i40e_pf *pf = vsi->back;
-       u8 *lut;
-       int ret;
- 
-       if (!(pf->hw_features & I40E_HW_RSS_AQ_CAPABLE))
-               return 0;
- 
-       if (!vsi->rss_size)
-               vsi->rss_size = min_t(int, pf->alloc_rss_size,
-                                     vsi->num_queue_pairs);
-       if (!vsi->rss_size)
-               return -EINVAL;
- 
-       lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
-       if (!lut)
-               return -ENOMEM;
-       /* Use the user configured hash keys and lookup table if there is one,
-        * otherwise use default
-        */
-       if (vsi->rss_lut_user)
-               memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size);
-       else
-               i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size);
-       if (vsi->rss_hkey_user)
-               memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE);
-       else
-               netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE);
-       ret = i40e_config_rss_aq(vsi, seed, lut, vsi->rss_table_size);
-       kfree(lut);
- 
-       return ret;
- }
- 
   /**
    * i40e_config_rss_reg - Configure RSS keys and lut by writing registers
    * @vsi: Pointer to vsi structure
@@@ -8913,8 -10990,8 +10990,8 @@@ static int i40e_sw_init(struct i40e_pf 
                     I40E_FLAG_MSIX_ENABLED;
   
         /* Set default ITR */
-       pf->rx_itr_default = I40E_ITR_DYNAMIC | I40E_ITR_RX_DEF;
-       pf->tx_itr_default = I40E_ITR_DYNAMIC | I40E_ITR_TX_DEF;
+       pf->rx_itr_default = I40E_ITR_RX_DEF;
+       pf->tx_itr_default = I40E_ITR_TX_DEF;
   
         /* Depending on PF configurations, it is possible that the RSS
          * maximum might end up larger than the available queues
@@@ -9014,6 -11091,11 +11091,11 @@@
             (pf->hw.aq.fw_maj_ver >= 5)))
                 pf->hw_features |= I40E_HW_USE_SET_LLDP_MIB;
   
+       /* Enable PTP L4 if FW > v6.0 */
+       if (pf->hw.mac.type == I40E_MAC_XL710 &&
+           pf->hw.aq.fw_maj_ver >= 6)
+               pf->hw_features |= I40E_HW_PTP_L4_CAPABLE;
+ 
         if (pf->hw.func_caps.vmdq) {
                 pf->num_vmdq_vsis = I40E_DEFAULT_NUM_VMDQ_VSI;
                 pf->flags |= I40E_FLAG_VMDQ_ENABLED;
@@@ -9079,9 -11161,13 +11161,13 @@@ bool i40e_set_ntuple(struct i40e_pf *pf
                 /* Enable filters and mark for reset */
                 if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED))
                         need_reset = true;
-               /* enable FD_SB only if there is MSI-X vector */
-               if (pf->num_fdsb_msix > 0)
+               /* enable FD_SB only if there is MSI-X vector and no cloud
+                * filters exist
+                */
+               if (pf->num_fdsb_msix > 0 && !pf->num_cloud_filters) {
                         pf->flags |= I40E_FLAG_FD_SB_ENABLED;
+                       pf->flags &= ~I40E_FLAG_FD_SB_INACTIVE;
+               }
         } else {
                 /* turn off filters, mark for reset and clear SW filter list */
                 if (pf->flags & I40E_FLAG_FD_SB_ENABLED) {
@@@ -9090,6 -11176,8 +11176,8 @@@
                 }
                 pf->flags &= ~(I40E_FLAG_FD_SB_ENABLED |
                                I40E_FLAG_FD_SB_AUTO_DISABLED);
+               pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
+ 
                 /* reset fd counters */
                 pf->fd_add_err = 0;
                 pf->fd_atr_cnt = 0;
@@@ -9151,10 -11239,16 +11239,16 @@@ static int i40e_set_features(struct net
         else
                 i40e_vlan_stripping_disable(vsi);
   
+       if (!(features & NETIF_F_HW_TC) && pf->num_cloud_filters) {
+               dev_err(&pf->pdev->dev,
+                       "Offloaded tc filters active, can't turn hw_tc_offload off");
+               return -EINVAL;
+       }
+ 
         need_reset = i40e_set_ntuple(pf, features);
   
         if (need_reset)
-               i40e_do_reset(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED), true);
+               i40e_do_reset(pf, I40E_PF_RESET_FLAG, true);
   
         return 0;
   }
@@@ -9406,8 -11500,7 +11500,7 @@@ static int i40e_ndo_bridge_setlink(stru
                                 pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
                         else
                                 pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED;
-                       i40e_do_reset(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED),
-                                     true);
+                       i40e_do_reset(pf, I40E_PF_RESET_FLAG, true);
                         break;
                 }
         }
@@@ -9555,12 -11648,12 +11648,12 @@@ static int i40e_xdp_setup(struct i40e_v
   }
   
   /**
-  * i40e_xdp - implements ndo_xdp for i40e
+  * i40e_xdp - implements ndo_bpf for i40e
    * @dev: netdevice
    * @xdp: XDP command
    **/
   static int i40e_xdp(struct net_device *dev,
-                   struct netdev_xdp *xdp)
+                   struct netdev_bpf *xdp)
   {
         struct i40e_netdev_priv *np = netdev_priv(dev);
         struct i40e_vsi *vsi = np->vsi;
@@@ -9612,7 -11705,7 +11705,7 @@@ static const struct net_device_ops i40e
         .ndo_features_check     = i40e_features_check,
         .ndo_bridge_getlink     = i40e_ndo_bridge_getlink,
         .ndo_bridge_setlink     = i40e_ndo_bridge_setlink,
-       .ndo_xdp                = i40e_xdp,
+       .ndo_bpf                = i40e_xdp,
   };
   
   /**
@@@ -9671,7 -11764,8 +11764,8 @@@ static int i40e_config_netdev(struct i4
         netdev->vlan_features |= hw_enc_features | NETIF_F_TSO_MANGLEID;
   
         if (!(pf->flags & I40E_FLAG_MFP_ENABLED))
-               netdev->hw_features |= NETIF_F_NTUPLE;
+               netdev->hw_features |= NETIF_F_NTUPLE | NETIF_F_HW_TC;
+ 
         hw_features = hw_enc_features           |
                       NETIF_F_HW_VLAN_CTAG_TX   |
                       NETIF_F_HW_VLAN_CTAG_RX;
@@@ -9849,6 -11943,31 +11943,31 @@@ static int i40e_add_vsi(struct i40e_vs
   
                 enabled_tc = i40e_pf_get_tc_map(pf);
   
+               /* Source pruning is enabled by default, so the flag is
+                * negative logic - if it's set, we need to fiddle with
+                * the VSI to disable source pruning.
+                */
+               if (pf->flags & I40E_FLAG_SOURCE_PRUNING_DISABLED) {
+                       memset(&ctxt, 0, sizeof(ctxt));
+                       ctxt.seid = pf->main_vsi_seid;
+                       ctxt.pf_num = pf->hw.pf_id;
+                       ctxt.vf_num = 0;
+                       ctxt.info.valid_sections |=
+                                    cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
+                       ctxt.info.switch_id =
+                                  cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_LOCAL_LB);
+                       ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
+                       if (ret) {
+                               dev_info(&pf->pdev->dev,
+                                        "update vsi failed, err %s aq_err %s\n",
+                                        i40e_stat_str(&pf->hw, ret),
+                                        i40e_aq_str(&pf->hw,
+                                                    pf->hw.aq.asq_last_status));
+                               ret = -ENOENT;
+                               goto err;
+                       }
+               }
+ 
                 /* MFP mode setup queue map and update VSI */
                 if ((pf->flags & I40E_FLAG_MFP_ENABLED) &&
                     !(pf->hw.func_caps.iscsi)) { /* NIC type PF */
@@@ -10951,14 -13070,16 +13070,16 @@@ static int i40e_setup_pf_switch(struct 
         */
   
         if ((pf->hw.pf_id == 0) &&
-           !(pf->flags & I40E_FLAG_TRUE_PROMISC_SUPPORT))
+           !(pf->flags & I40E_FLAG_TRUE_PROMISC_SUPPORT)) {
                 flags = I40E_AQ_SET_SWITCH_CFG_PROMISC;
+               pf->last_sw_conf_flags = flags;
+       }
   
         if (pf->hw.pf_id == 0) {
                 u16 valid_flags;
   
                 valid_flags = I40E_AQ_SET_SWITCH_CFG_PROMISC;
-               ret = i40e_aq_set_switch_config(&pf->hw, flags, valid_flags,
+               ret = i40e_aq_set_switch_config(&pf->hw, flags, valid_flags, 0,
                                                 NULL);
                 if (ret && pf->hw.aq.asq_last_status != I40E_AQ_RC_ESRCH) {
                         dev_info(&pf->pdev->dev,
@@@ -10968,6 -13089,7 +13089,7 @@@
                                              pf->hw.aq.asq_last_status));
                         /* not a fatal problem, just keep going */
                 }
+               pf->last_sw_conf_valid_flags = valid_flags;
         }
   
         /* first time setup */
@@@ -10988,6 -13110,7 +13110,7 @@@
                         vsi = i40e_vsi_reinit_setup(pf->vsi[pf->lan_vsi]);
                 if (!vsi) {
                         dev_info(&pf->pdev->dev, "setup of MAIN VSI failed\n");
+                       i40e_cloud_filter_exit(pf);
                         i40e_fdir_teardown(pf);
                         return -EAGAIN;
                 }
@@@ -11039,6 -13162,7 +13162,7 @@@
   static void i40e_determine_queue_usage(struct i40e_pf *pf)
   {
         int queues_left;
+       int q_max;
   
         pf->num_lan_qps = 0;
   
@@@ -11063,6 -13187,7 +13187,7 @@@
                                I40E_FLAG_DCB_ENABLED    |
                                I40E_FLAG_SRIOV_ENABLED  |
                                I40E_FLAG_VMDQ_ENABLED);
+               pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
         } else if (!(pf->flags & (I40E_FLAG_RSS_ENABLED |
                                   I40E_FLAG_FD_SB_ENABLED |
                                   I40E_FLAG_FD_ATR_ENABLED |
@@@ -11077,6 -13202,7 +13202,7 @@@
                                I40E_FLAG_FD_ATR_ENABLED |
                                I40E_FLAG_DCB_ENABLED    |
                                I40E_FLAG_VMDQ_ENABLED);
+               pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
         } else {
                 /* Not enough queues for all TCs */
                 if ((pf->flags & I40E_FLAG_DCB_CAPABLE) &&
@@@ -11085,10 -13211,12 +13211,12 @@@
                                         I40E_FLAG_DCB_ENABLED);
                         dev_info(&pf->pdev->dev, "not enough queues for DCB. DCB is disabled.\n");
                 }
-               pf->num_lan_qps = max_t(int, pf->rss_size_max,
-                                       num_online_cpus());
-               pf->num_lan_qps = min_t(int, pf->num_lan_qps,
-                                       pf->hw.func_caps.num_tx_qp);
+ 
+               /* limit lan qps to the smaller of qps, cpus or msix */
+               q_max = max_t(int, pf->rss_size_max, num_online_cpus());
+               q_max = min_t(int, q_max, pf->hw.func_caps.num_tx_qp);
+               q_max = min_t(int, q_max, pf->hw.func_caps.num_msix_vectors);
+               pf->num_lan_qps = q_max;
   
                 queues_left -= pf->num_lan_qps;
         }
@@@ -11098,6 -13226,7 +13226,7 @@@
                         queues_left -= 1; /* save 1 queue for FD */
                 } else {
                         pf->flags &= ~I40E_FLAG_FD_SB_ENABLED;
+                       pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
                         dev_info(&pf->pdev->dev, "not enough queues for Flow Director. Flow Director feature is disabled\n");
                 }
         }
@@@ -11304,6 -13433,13 +13433,13 @@@ static int i40e_probe(struct pci_dev *p
         hw->bus.bus_id = pdev->bus->number;
         pf->instance = pfs_found;
   
+       /* Select something other than the 802.1ad ethertype for the
+        * switch to use internally and drop on ingress.
+        */
+       hw->switch_tag = 0xffff;
+       hw->first_tag = ETH_P_8021AD;
+       hw->second_tag = ETH_P_8021Q;
+ 
         INIT_LIST_HEAD(&pf->l3_flex_pit_list);
         INIT_LIST_HEAD(&pf->l4_flex_pit_list);
   
@@@ -11380,11 -13516,10 +13516,10 @@@
                  i40e_nvm_version_str(hw));
   
         if (hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR &&
-           hw->aq.api_min_ver > I40E_FW_API_VERSION_MINOR)
+           hw->aq.api_min_ver > I40E_FW_MINOR_VERSION(hw))
                 dev_info(&pdev->dev,
                          "The driver for the device detected a newer version of the NVM image than expected. Please install the most recent version of the network driver.\n");
-       else if (hw->aq.api_maj_ver < I40E_FW_API_VERSION_MAJOR ||
-                hw->aq.api_min_ver < (I40E_FW_API_VERSION_MINOR - 1))
+       else if (hw->aq.api_maj_ver == 1 && hw->aq.api_min_ver < 4)
                 dev_info(&pdev->dev,
                          "The driver for the device detected an older version of the NVM image than expected. Please update the NVM image.\n");
   
@@@ -11395,7 -13530,7 +13530,7 @@@
                 dev_warn(&pdev->dev, "This device is a pre-production adapter/LOM. Please be aware there may be issues with your hardware. If you are experiencing problems please contact your Intel or hardware representative who provided you with this hardware.\n");
   
         i40e_clear_pxe_mode(hw);
-       err = i40e_get_capabilities(pf);
+       err = i40e_get_capabilities(pf, i40e_aqc_opc_list_func_capabilities);
         if (err)
                 goto err_adminq_setup;
   
@@@ -11454,7 -13589,7 +13589,7 @@@
   #endif /* CONFIG_I40E_DCB */
   
         /* set up periodic task facility */
-       setup_timer(&pf->service_timer, i40e_service_timer, (unsigned long)pf);
+       timer_setup(&pf->service_timer, i40e_service_timer, 0);
         pf->service_timer_period = HZ;
   
         INIT_WORK(&pf->service_task, i40e_service_task);
@@@ -11506,6 -13641,7 +13641,7 @@@
                 dev_info(&pdev->dev, "setup_pf_switch failed: %d\n", err);
                 goto err_vsis;
         }
+       INIT_LIST_HEAD(&pf->vsi[pf->lan_vsi]->ch_list);
   
         /* Make sure flow control is set according to current settings */
         err = i40e_set_fc(hw, &set_fc_aq_fail, true);
@@@ -11777,7 -13913,7 +13913,7 @@@ static void i40e_remove(struct pci_dev 
         /* no more scheduling of any task */
         set_bit(__I40E_SUSPENDED, pf->state);
         set_bit(__I40E_DOWN, pf->state);
-       if (pf->service_timer.data)
+       if (pf->service_timer.function)
                 del_timer_sync(&pf->service_timer);
         if (pf->service_task.func)
                 cancel_work_sync(&pf->service_task);
@@@ -11812,6 -13948,8 +13948,8 @@@
         if (pf->vsi[pf->lan_vsi])
                 i40e_vsi_release(pf->vsi[pf->lan_vsi]);
   
+       i40e_cloud_filter_exit(pf);
+ 
         /* remove attached clients */
         if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
                 ret_code = i40e_lan_del_device(pf);
@@@ -11936,6 -14074,28 +14074,28 @@@ static pci_ers_result_t i40e_pci_error_
         return result;
   }
   
+ /**
+  * i40e_pci_error_reset_prepare - prepare device driver for pci reset
+  * @pdev: PCI device information struct
+  */
+ static void i40e_pci_error_reset_prepare(struct pci_dev *pdev)
+ {
+       struct i40e_pf *pf = pci_get_drvdata(pdev);
+ 
+       i40e_prep_for_reset(pf, false);
+ }
+ 
+ /**
+  * i40e_pci_error_reset_done - pci reset done, device driver reset can begin
+  * @pdev: PCI device information struct
+  */
+ static void i40e_pci_error_reset_done(struct pci_dev *pdev)
+ {
+       struct i40e_pf *pf = pci_get_drvdata(pdev);
+ 
+       i40e_reset_and_rebuild(pf, false, false);
+ }
+ 
   /**
    * i40e_pci_error_resume - restart operations after PCI error recovery
    * @pdev: PCI device information struct
@@@ -12021,6 -14181,7 +14181,7 @@@ static void i40e_shutdown(struct pci_de
   
         del_timer_sync(&pf->service_timer);
         cancel_work_sync(&pf->service_task);
+       i40e_cloud_filter_exit(pf);
         i40e_fdir_teardown(pf);
   
         /* Client close must be called explicitly here because the timer
@@@ -12046,20 -14207,26 +14207,26 @@@
         }
   }
   
- #ifdef CONFIG_PM
   /**
-  * i40e_suspend - PCI callback for moving to D3
-  * @pdev: PCI device information struct
+  * i40e_suspend - PM callback for moving to D3
+  * @dev: generic device information structure
    **/
- static int i40e_suspend(struct pci_dev *pdev, pm_message_t state)
+ static int __maybe_unused i40e_suspend(struct device *dev)
   {
+       struct pci_dev *pdev = to_pci_dev(dev);
         struct i40e_pf *pf = pci_get_drvdata(pdev);
         struct i40e_hw *hw = &pf->hw;
-       int retval = 0;
   
-       set_bit(__I40E_SUSPENDED, pf->state);
+       /* If we're already suspended, then there is nothing to do */
+       if (test_and_set_bit(__I40E_SUSPENDED, pf->state))
+               return 0;
+ 
         set_bit(__I40E_DOWN, pf->state);
   
+       /* Ensure service task will not be running */
+       del_timer_sync(&pf->service_timer);
+       cancel_work_sync(&pf->service_task);
+ 
         if (pf->wol_en && (pf->hw_features & I40E_HW_WOL_MC_MAGIC_PKT_WAKE))
                 i40e_enable_mc_magic_wake(pf);
   
@@@ -12068,81 -14235,70 +14235,70 @@@
         wr32(hw, I40E_PFPM_APM, (pf->wol_en ? I40E_PFPM_APM_APME_MASK : 0));
         wr32(hw, I40E_PFPM_WUFC, (pf->wol_en ? I40E_PFPM_WUFC_MAG_MASK : 0));
   
-       i40e_stop_misc_vector(pf);
-       if (pf->msix_entries) {
-               synchronize_irq(pf->msix_entries[0].vector);
-               free_irq(pf->msix_entries[0].vector, pf);
-       }
-       retval = pci_save_state(pdev);
-       if (retval)
-               return retval;
- 
-       pci_wake_from_d3(pdev, pf->wol_en);
-       pci_set_power_state(pdev, PCI_D3hot);
+       /* Clear the interrupt scheme and release our IRQs so that the system
+        * can safely hibernate even when there are a large number of CPUs.
+        * Otherwise hibernation might fail when mapping all the vectors back
+        * to CPU0.
+        */
+       i40e_clear_interrupt_scheme(pf);
   
-       return retval;
+       return 0;
   }
   
   /**
-  * i40e_resume - PCI callback for waking up from D3
-  * @pdev: PCI device information struct
+  * i40e_resume - PM callback for waking up from D3
+  * @dev: generic device information structure
    **/
- static int i40e_resume(struct pci_dev *pdev)
+ static int __maybe_unused i40e_resume(struct device *dev)
   {
+       struct pci_dev *pdev = to_pci_dev(dev);
         struct i40e_pf *pf = pci_get_drvdata(pdev);
-       u32 err;
+       int err;
   
-       pci_set_power_state(pdev, PCI_D0);
-       pci_restore_state(pdev);
-       /* pci_restore_state() clears dev->state_saves, so
-        * call pci_save_state() again to restore it.
-        */
-       pci_save_state(pdev);
+       /* If we're not suspended, then there is nothing to do */
+       if (!test_bit(__I40E_SUSPENDED, pf->state))
+               return 0;
   
-       err = pci_enable_device_mem(pdev);
+       /* We cleared the interrupt scheme when we suspended, so we need to
+        * restore it now to resume device functionality.
+        */
+       err = i40e_restore_interrupt_scheme(pf);
         if (err) {
-               dev_err(&pdev->dev, "Cannot enable PCI device from suspend\n");
-               return err;
+               dev_err(&pdev->dev, "Cannot restore interrupt scheme: %d\n",
+                       err);
         }
-       pci_set_master(pdev);
   
-       /* no wakeup events while running */
-       pci_wake_from_d3(pdev, false);
- 
-       /* handling the reset will rebuild the device state */
-       if (test_and_clear_bit(__I40E_SUSPENDED, pf->state)) {
-               clear_bit(__I40E_DOWN, pf->state);
-               if (pf->msix_entries) {
-                       err = request_irq(pf->msix_entries[0].vector,
-                                         i40e_intr, 0, pf->int_name, pf);
-                       if (err) {
-                               dev_err(&pf->pdev->dev,
-                                       "request_irq for %s failed: %d\n",
-                                       pf->int_name, err);
-                       }
-               }
-               i40e_reset_and_rebuild(pf, false, false);
-       }
+       clear_bit(__I40E_DOWN, pf->state);
+       i40e_reset_and_rebuild(pf, false, false);
+ 
+       /* Clear suspended state last after everything is recovered */
+       clear_bit(__I40E_SUSPENDED, pf->state);
+ 
+       /* Restart the service task */
+       mod_timer(&pf->service_timer,
+                 round_jiffies(jiffies + pf->service_timer_period));
   
         return 0;
   }
   
- #endif
   static const struct pci_error_handlers i40e_err_handler = {
         .error_detected = i40e_pci_error_detected,
         .slot_reset = i40e_pci_error_slot_reset,
+       .reset_prepare = i40e_pci_error_reset_prepare,
+       .reset_done = i40e_pci_error_reset_done,
         .resume = i40e_pci_error_resume,
   };
   
+ static SIMPLE_DEV_PM_OPS(i40e_pm_ops, i40e_suspend, i40e_resume);
+ 
   static struct pci_driver i40e_driver = {
         .name     = i40e_driver_name,
         .id_table = i40e_pci_tbl,
         .probe    = i40e_probe,
         .remove   = i40e_remove,
- #ifdef CONFIG_PM
-       .suspend  = i40e_suspend,
-       .resume   = i40e_resume,
- #endif
+       .driver   = {
+               .pm = &i40e_pm_ops,
+       },
         .shutdown = i40e_shutdown,
         .err_handler = &i40e_err_handler,
         .sriov_configure = i40e_pci_sriov_configure,
diff --combined drivers/net/ethernet/intel/igb/e1000_regs.h

index 31a3f09df9f75fee5ab62472c64fb07446408f61,8eee081d395f97a77363a9b1965cea223941d1d5..568c96842f2818b1e72de9fd0a19a2625070e363
--- 1/drivers/net/ethernet/intel/igb/e1000_regs.h
--- 2/drivers/net/ethernet/intel/igb/e1000_regs.h
+++ b/drivers/net/ethernet/intel/igb/e1000_regs.h
@@@ -375,7 -375,7 +375,7 @@@ u32 igb_rd32(struct e1000_hw *hw, u32 r
   /* write operations, indexed using DWORDS */
   #define wr32(reg, val) \
   do { \
- -      u8 __iomem *hw_addr = ACCESS_ONCE((hw)->hw_addr); \
+ +      u8 __iomem *hw_addr = READ_ONCE((hw)->hw_addr); \
         if (!E1000_REMOVED(hw_addr)) \
                 writel((val), &hw_addr[(reg)]); \
   } while (0)
@@@ -421,6 -421,14 +421,14 @@@
   
   #define E1000_I210_FLA                0x1201C
   
+ #define E1000_I210_DTXMXPKTSZ 0x355C
+ 
+ #define E1000_I210_TXDCTL(_n) (0x0E028 + ((_n) * 0x40))
+ 
+ #define E1000_I210_TQAVCTRL   0x3570
+ #define E1000_I210_TQAVCC(_n) (0x3004 + ((_n) * 0x40))
+ #define E1000_I210_TQAVHC(_n) (0x300C + ((_n) * 0x40))
+ 
   #define E1000_INVM_DATA_REG(_n)       (0x12120 + 4*(_n))
   #define E1000_INVM_SIZE               64 /* Number of INVM Data Registers */
   
diff --combined drivers/net/ethernet/intel/igb/igb_main.c

index 18b6c25d4705b9ca12918c5369a7a9e02cf35634,43cf39527660b6c6e07f200131c60066b83df932..e94d3c256667637c8186fd83299b64ea2de53c72
--- 1/drivers/net/ethernet/intel/igb/igb_main.c
--- 2/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@@ -34,6 -34,7 +34,7 @@@
   #include <linux/slab.h>
   #include <net/checksum.h>
   #include <net/ip6_checksum.h>
+ #include <net/pkt_sched.h>
   #include <linux/net_tstamp.h>
   #include <linux/mii.h>
   #include <linux/ethtool.h>
@@@ -62,6 -63,17 +63,17 @@@
   #define BUILD 0
   #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
   __stringify(BUILD) "-k"
+ 
+ enum queue_mode {
+       QUEUE_MODE_STRICT_PRIORITY,
+       QUEUE_MODE_STREAM_RESERVATION,
+ };
+ 
+ enum tx_queue_prio {
+       TX_QUEUE_PRIO_HIGH,
+       TX_QUEUE_PRIO_LOW,
+ };
+ 
   char igb_driver_name[] = "igb";
   char igb_driver_version[] = DRV_VERSION;
   static const char igb_driver_string[] =
@@@ -133,8 -145,8 +145,8 @@@ static void igb_clean_all_rx_rings(stru
   static void igb_clean_tx_ring(struct igb_ring *);
   static void igb_clean_rx_ring(struct igb_ring *);
   static void igb_set_rx_mode(struct net_device *);
- static void igb_update_phy_info(unsigned long);
- static void igb_watchdog(unsigned long);
+ static void igb_update_phy_info(struct timer_list *);
+ static void igb_watchdog(struct timer_list *);
   static void igb_watchdog_task(struct work_struct *);
   static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
   static void igb_get_stats64(struct net_device *dev,
@@@ -750,7 -762,7 +762,7 @@@ static void igb_cache_ring_register(str
   u32 igb_rd32(struct e1000_hw *hw, u32 reg)
   {
         struct igb_adapter *igb = container_of(hw, struct igb_adapter, hw);
- -      u8 __iomem *hw_addr = ACCESS_ONCE(hw->hw_addr);
+ +      u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr);
         u32 value = 0;
   
         if (E1000_REMOVED(hw_addr))
@@@ -1271,6 -1283,12 +1283,12 @@@ static int igb_alloc_q_vector(struct ig
                 ring->count = adapter->tx_ring_count;
                 ring->queue_index = txr_idx;
   
+               ring->cbs_enable = false;
+               ring->idleslope = 0;
+               ring->sendslope = 0;
+               ring->hicredit = 0;
+               ring->locredit = 0;
+ 
                 u64_stats_init(&ring->tx_syncp);
                 u64_stats_init(&ring->tx_syncp2);
   
@@@ -1598,6 -1616,284 +1616,284 @@@ static void igb_get_hw_control(struct i
                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
   }
   
+ static void enable_fqtss(struct igb_adapter *adapter, bool enable)
+ {
+       struct net_device *netdev = adapter->netdev;
+       struct e1000_hw *hw = &adapter->hw;
+ 
+       WARN_ON(hw->mac.type != e1000_i210);
+ 
+       if (enable)
+               adapter->flags |= IGB_FLAG_FQTSS;
+       else
+               adapter->flags &= ~IGB_FLAG_FQTSS;
+ 
+       if (netif_running(netdev))
+               schedule_work(&adapter->reset_task);
+ }
+ 
+ static bool is_fqtss_enabled(struct igb_adapter *adapter)
+ {
+       return (adapter->flags & IGB_FLAG_FQTSS) ? true : false;
+ }
+ 
+ static void set_tx_desc_fetch_prio(struct e1000_hw *hw, int queue,
+                                  enum tx_queue_prio prio)
+ {
+       u32 val;
+ 
+       WARN_ON(hw->mac.type != e1000_i210);
+       WARN_ON(queue < 0 || queue > 4);
+ 
+       val = rd32(E1000_I210_TXDCTL(queue));
+ 
+       if (prio == TX_QUEUE_PRIO_HIGH)
+               val |= E1000_TXDCTL_PRIORITY;
+       else
+               val &= ~E1000_TXDCTL_PRIORITY;
+ 
+       wr32(E1000_I210_TXDCTL(queue), val);
+ }
+ 
+ static void set_queue_mode(struct e1000_hw *hw, int queue, enum queue_mode mode)
+ {
+       u32 val;
+ 
+       WARN_ON(hw->mac.type != e1000_i210);
+       WARN_ON(queue < 0 || queue > 1);
+ 
+       val = rd32(E1000_I210_TQAVCC(queue));
+ 
+       if (mode == QUEUE_MODE_STREAM_RESERVATION)
+               val |= E1000_TQAVCC_QUEUEMODE;
+       else
+               val &= ~E1000_TQAVCC_QUEUEMODE;
+ 
+       wr32(E1000_I210_TQAVCC(queue), val);
+ }
+ 
+ /**
+  *  igb_configure_cbs - Configure Credit-Based Shaper (CBS)
+  *  @adapter: pointer to adapter struct
+  *  @queue: queue number
+  *  @enable: true = enable CBS, false = disable CBS
+  *  @idleslope: idleSlope in kbps
+  *  @sendslope: sendSlope in kbps
+  *  @hicredit: hiCredit in bytes
+  *  @locredit: loCredit in bytes
+  *
+  *  Configure CBS for a given hardware queue. When disabling, idleslope,
+  *  sendslope, hicredit, locredit arguments are ignored. Returns 0 if
+  *  success. Negative otherwise.
+  **/
+ static void igb_configure_cbs(struct igb_adapter *adapter, int queue,
+                             bool enable, int idleslope, int sendslope,
+                             int hicredit, int locredit)
+ {
+       struct net_device *netdev = adapter->netdev;
+       struct e1000_hw *hw = &adapter->hw;
+       u32 tqavcc;
+       u16 value;
+ 
+       WARN_ON(hw->mac.type != e1000_i210);
+       WARN_ON(queue < 0 || queue > 1);
+ 
+       if (enable) {
+               set_tx_desc_fetch_prio(hw, queue, TX_QUEUE_PRIO_HIGH);
+               set_queue_mode(hw, queue, QUEUE_MODE_STREAM_RESERVATION);
+ 
+               /* According to i210 datasheet section 7.2.7.7, we should set
+                * the 'idleSlope' field from TQAVCC register following the
+                * equation:
+                *
+                * For 100 Mbps link speed:
+                *
+                *     value = BW * 0x7735 * 0.2                          (E1)
+                *
+                * For 1000Mbps link speed:
+                *
+                *     value = BW * 0x7735 * 2                            (E2)
+                *
+                * E1 and E2 can be merged into one equation as shown below.
+                * Note that 'link-speed' is in Mbps.
+                *
+                *     value = BW * 0x7735 * 2 * link-speed
+                *                           --------------               (E3)
+                *                                1000
+                *
+                * 'BW' is the percentage bandwidth out of full link speed
+                * which can be found with the following equation. Note that
+                * idleSlope here is the parameter from this function which
+                * is in kbps.
+                *
+                *     BW =     idleSlope
+                *          -----------------                             (E4)
+                *          link-speed * 1000
+                *
+                * That said, we can come up with a generic equation to
+                * calculate the value we should set it TQAVCC register by
+                * replacing 'BW' in E3 by E4. The resulting equation is:
+                *
+                * value =     idleSlope     * 0x7735 * 2 * link-speed
+                *         -----------------            --------------    (E5)
+                *         link-speed * 1000                 1000
+                *
+                * 'link-speed' is present in both sides of the fraction so
+                * it is canceled out. The final equation is the following:
+                *
+                *     value = idleSlope * 61034
+                *             -----------------                          (E6)
+                *                  1000000
+                */
+               value = DIV_ROUND_UP_ULL(idleslope * 61034ULL, 1000000);
+ 
+               tqavcc = rd32(E1000_I210_TQAVCC(queue));
+               tqavcc &= ~E1000_TQAVCC_IDLESLOPE_MASK;
+               tqavcc |= value;
+               wr32(E1000_I210_TQAVCC(queue), tqavcc);
+ 
+               wr32(E1000_I210_TQAVHC(queue), 0x80000000 + hicredit * 0x7735);
+       } else {
+               set_tx_desc_fetch_prio(hw, queue, TX_QUEUE_PRIO_LOW);
+               set_queue_mode(hw, queue, QUEUE_MODE_STRICT_PRIORITY);
+ 
+               /* Set idleSlope to zero. */
+               tqavcc = rd32(E1000_I210_TQAVCC(queue));
+               tqavcc &= ~E1000_TQAVCC_IDLESLOPE_MASK;
+               wr32(E1000_I210_TQAVCC(queue), tqavcc);
+ 
+               /* Set hiCredit to zero. */
+               wr32(E1000_I210_TQAVHC(queue), 0);
+       }
+ 
+       /* XXX: In i210 controller the sendSlope and loCredit parameters from
+        * CBS are not configurable by software so we don't do any 'controller
+        * configuration' in respect to these parameters.
+        */
+ 
+       netdev_dbg(netdev, "CBS %s: queue %d idleslope %d sendslope %d hiCredit %d locredit %d\n",
+                  (enable) ? "enabled" : "disabled", queue,
+                  idleslope, sendslope, hicredit, locredit);
+ }
+ 
+ static int igb_save_cbs_params(struct igb_adapter *adapter, int queue,
+                              bool enable, int idleslope, int sendslope,
+                              int hicredit, int locredit)
+ {
+       struct igb_ring *ring;
+ 
+       if (queue < 0 || queue > adapter->num_tx_queues)
+               return -EINVAL;
+ 
+       ring = adapter->tx_ring[queue];
+ 
+       ring->cbs_enable = enable;
+       ring->idleslope = idleslope;
+       ring->sendslope = sendslope;
+       ring->hicredit = hicredit;
+       ring->locredit = locredit;
+ 
+       return 0;
+ }
+ 
+ static bool is_any_cbs_enabled(struct igb_adapter *adapter)
+ {
+       struct igb_ring *ring;
+       int i;
+ 
+       for (i = 0; i < adapter->num_tx_queues; i++) {
+               ring = adapter->tx_ring[i];
+ 
+               if (ring->cbs_enable)
+                       return true;
+       }
+ 
+       return false;
+ }
+ 
+ static void igb_setup_tx_mode(struct igb_adapter *adapter)
+ {
+       struct net_device *netdev = adapter->netdev;
+       struct e1000_hw *hw = &adapter->hw;
+       u32 val;
+ 
+       /* Only i210 controller supports changing the transmission mode. */
+       if (hw->mac.type != e1000_i210)
+               return;
+ 
+       if (is_fqtss_enabled(adapter)) {
+               int i, max_queue;
+ 
+               /* Configure TQAVCTRL register: set transmit mode to 'Qav',
+                * set data fetch arbitration to 'round robin' and set data
+                * transfer arbitration to 'credit shaper algorithm.
+                */
+               val = rd32(E1000_I210_TQAVCTRL);
+               val |= E1000_TQAVCTRL_XMIT_MODE | E1000_TQAVCTRL_DATATRANARB;
+               val &= ~E1000_TQAVCTRL_DATAFETCHARB;
+               wr32(E1000_I210_TQAVCTRL, val);
+ 
+               /* Configure Tx and Rx packet buffers sizes as described in
+                * i210 datasheet section 7.2.7.7.
+                */
+               val = rd32(E1000_TXPBS);
+               val &= ~I210_TXPBSIZE_MASK;
+               val |= I210_TXPBSIZE_PB0_8KB | I210_TXPBSIZE_PB1_8KB |
+                       I210_TXPBSIZE_PB2_4KB | I210_TXPBSIZE_PB3_4KB;
+               wr32(E1000_TXPBS, val);
+ 
+               val = rd32(E1000_RXPBS);
+               val &= ~I210_RXPBSIZE_MASK;
+               val |= I210_RXPBSIZE_PB_32KB;
+               wr32(E1000_RXPBS, val);
+ 
+               /* Section 8.12.9 states that MAX_TPKT_SIZE from DTXMXPKTSZ
+                * register should not exceed the buffer size programmed in
+                * TXPBS. The smallest buffer size programmed in TXPBS is 4kB
+                * so according to the datasheet we should set MAX_TPKT_SIZE to
+                * 4kB / 64.
+                *
+                * However, when we do so, no frame from queue 2 and 3 are
+                * transmitted.  It seems the MAX_TPKT_SIZE should not be great
+                * or _equal_ to the buffer size programmed in TXPBS. For this
+                * reason, we set set MAX_ TPKT_SIZE to (4kB - 1) / 64.
+                */
+               val = (4096 - 1) / 64;
+               wr32(E1000_I210_DTXMXPKTSZ, val);
+ 
+               /* Since FQTSS mode is enabled, apply any CBS configuration
+                * previously set. If no previous CBS configuration has been
+                * done, then the initial configuration is applied, which means
+                * CBS is disabled.
+                */
+               max_queue = (adapter->num_tx_queues < I210_SR_QUEUES_NUM) ?
+                           adapter->num_tx_queues : I210_SR_QUEUES_NUM;
+ 
+               for (i = 0; i < max_queue; i++) {
+                       struct igb_ring *ring = adapter->tx_ring[i];
+ 
+                       igb_configure_cbs(adapter, i, ring->cbs_enable,
+                                         ring->idleslope, ring->sendslope,
+                                         ring->hicredit, ring->locredit);
+               }
+       } else {
+               wr32(E1000_RXPBS, I210_RXPBSIZE_DEFAULT);
+               wr32(E1000_TXPBS, I210_TXPBSIZE_DEFAULT);
+               wr32(E1000_I210_DTXMXPKTSZ, I210_DTXMXPKTSZ_DEFAULT);
+ 
+               val = rd32(E1000_I210_TQAVCTRL);
+               /* According to Section 8.12.21, the other flags we've set when
+                * enabling FQTSS are not relevant when disabling FQTSS so we
+                * don't set they here.
+                */
+               val &= ~E1000_TQAVCTRL_XMIT_MODE;
+               wr32(E1000_I210_TQAVCTRL, val);
+       }
+ 
+       netdev_dbg(netdev, "FQTSS %s\n", (is_fqtss_enabled(adapter)) ?
+                  "enabled" : "disabled");
+ }
+ 
   /**
    *  igb_configure - configure the hardware for RX and TX
    *  @adapter: private board structure
@@@ -1609,6 -1905,7 +1905,7 @@@ static void igb_configure(struct igb_ad
   
         igb_get_hw_control(adapter);
         igb_set_rx_mode(netdev);
+       igb_setup_tx_mode(adapter);
   
         igb_restore_vlan(adapter);
   
@@@ -2150,6 -2447,55 +2447,55 @@@ igb_features_check(struct sk_buff *skb
         return features;
   }
   
+ static int igb_offload_cbs(struct igb_adapter *adapter,
+                          struct tc_cbs_qopt_offload *qopt)
+ {
+       struct e1000_hw *hw = &adapter->hw;
+       int err;
+ 
+       /* CBS offloading is only supported by i210 controller. */
+       if (hw->mac.type != e1000_i210)
+               return -EOPNOTSUPP;
+ 
+       /* CBS offloading is only supported by queue 0 and queue 1. */
+       if (qopt->queue < 0 || qopt->queue > 1)
+               return -EINVAL;
+ 
+       err = igb_save_cbs_params(adapter, qopt->queue, qopt->enable,
+                                 qopt->idleslope, qopt->sendslope,
+                                 qopt->hicredit, qopt->locredit);
+       if (err)
+               return err;
+ 
+       if (is_fqtss_enabled(adapter)) {
+               igb_configure_cbs(adapter, qopt->queue, qopt->enable,
+                                 qopt->idleslope, qopt->sendslope,
+                                 qopt->hicredit, qopt->locredit);
+ 
+               if (!is_any_cbs_enabled(adapter))
+                       enable_fqtss(adapter, false);
+ 
+       } else {
+               enable_fqtss(adapter, true);
+       }
+ 
+       return 0;
+ }
+ 
+ static int igb_setup_tc(struct net_device *dev, enum tc_setup_type type,
+                       void *type_data)
+ {
+       struct igb_adapter *adapter = netdev_priv(dev);
+ 
+       switch (type) {
+       case TC_SETUP_QDISC_CBS:
+               return igb_offload_cbs(adapter, type_data);
+ 
+       default:
+               return -EOPNOTSUPP;
+       }
+ }
+ 
   static const struct net_device_ops igb_netdev_ops = {
         .ndo_open               = igb_open,
         .ndo_stop               = igb_close,
@@@ -2175,6 -2521,7 +2521,7 @@@
         .ndo_set_features       = igb_set_features,
         .ndo_fdb_add            = igb_ndo_fdb_add,
         .ndo_features_check     = igb_features_check,
+       .ndo_setup_tc           = igb_setup_tc,
   };
   
   /**
@@@ -2538,10 -2885,8 +2885,8 @@@ static int igb_probe(struct pci_dev *pd
                 wr32(E1000_TXPBS, I210_TXPBSIZE_DEFAULT);
         }
   
-       setup_timer(&adapter->watchdog_timer, igb_watchdog,
-                   (unsigned long) adapter);
-       setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
-                   (unsigned long) adapter);
+       timer_setup(&adapter->watchdog_timer, igb_watchdog, 0);
+       timer_setup(&adapter->phy_info_timer, igb_update_phy_info, 0);
   
         INIT_WORK(&adapter->reset_task, igb_reset_task);
         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
@@@ -3162,6 -3507,8 +3507,8 @@@ static int igb_sw_init(struct igb_adapt
         /* Setup and initialize a copy of the hw vlan table array */
         adapter->shadow_vfta = kcalloc(E1000_VLAN_FILTER_TBL_SIZE, sizeof(u32),
                                        GFP_ATOMIC);
+       if (!adapter->shadow_vfta)
+               return -ENOMEM;
   
         /* This call may decrease the number of queues */
         if (igb_init_interrupt_scheme(adapter, true)) {
@@@ -4423,9 -4770,9 +4770,9 @@@ static void igb_spoof_check(struct igb_
   /* Need to wait a few seconds after link up to get diagnostic information from
    * the phy
    */
- static void igb_update_phy_info(unsigned long data)
+ static void igb_update_phy_info(struct timer_list *t)
   {
-       struct igb_adapter *adapter = (struct igb_adapter *) data;
+       struct igb_adapter *adapter = from_timer(adapter, t, phy_info_timer);
         igb_get_phy_info(&adapter->hw);
   }
   
@@@ -4512,9 -4859,9 +4859,9 @@@ static void igb_check_lvmmc(struct igb_
    *  igb_watchdog - Timer Call-back
    *  @data: pointer to adapter cast into an unsigned long
    **/
- static void igb_watchdog(unsigned long data)
+ static void igb_watchdog(struct timer_list *t)
   {
-       struct igb_adapter *adapter = (struct igb_adapter *)data;
+       struct igb_adapter *adapter = from_timer(adapter, t, watchdog_timer);
         /* Do the rest outside of interrupt context */
         schedule_work(&adapter->watchdog_task);
   }
diff --combined drivers/net/ethernet/intel/ixgbe/ixgbe_main.c

index 935a2f15b0b00e72763214aad0248a504f5b3a45,6eaca8366ac88c4cb0d271413d2f33d6bf497bde..ca06c3cc2ca841fc395c957efe64cf717b36670f
--- 1/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
--- 2/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@@ -380,7 -380,7 +380,7 @@@ static void ixgbe_check_remove(struct i
    */
   u32 ixgbe_read_reg(struct ixgbe_hw *hw, u32 reg)
   {
- -      u8 __iomem *reg_addr = ACCESS_ONCE(hw->hw_addr);
+ +      u8 __iomem *reg_addr = READ_ONCE(hw->hw_addr);
         u32 value;
   
         if (ixgbe_removed(reg_addr))
@@@ -1620,6 -1620,7 +1620,7 @@@ static bool ixgbe_alloc_mapped_page(str
         bi->page = page;
         bi->page_offset = ixgbe_rx_offset(rx_ring);
         bi->pagecnt_bias = 1;
+       rx_ring->rx_stats.alloc_rx_page++;
   
         return true;
   }
@@@ -2133,6 -2134,21 +2134,21 @@@ static struct sk_buff *ixgbe_construct_
   #if L1_CACHE_BYTES < 128
         prefetch(xdp->data + L1_CACHE_BYTES);
   #endif
+       /* Note, we get here by enabling legacy-rx via:
+        *
+        *    ethtool --set-priv-flags <dev> legacy-rx on
+        *
+        * In this mode, we currently get 0 extra XDP headroom as
+        * opposed to having legacy-rx off, where we process XDP
+        * packets going to stack via ixgbe_build_skb(). The latter
+        * provides us currently with 192 bytes of headroom.
+        *
+        * For ixgbe_construct_skb() mode it means that the
+        * xdp->data_meta will always point to xdp->data, since
+        * the helper cannot expand the head. Should this ever
+        * change in future for legacy-rx mode on, then lets also
+        * add xdp->data_meta handling here.
+        */
   
         /* allocate a skb to store the frags */
         skb = napi_alloc_skb(&rx_ring->q_vector->napi, IXGBE_RX_HDR_SIZE);
@@@ -2165,6 -2181,7 +2181,7 @@@ static struct sk_buff *ixgbe_build_skb(
                                        struct xdp_buff *xdp,
                                        union ixgbe_adv_rx_desc *rx_desc)
   {
+       unsigned int metasize = xdp->data - xdp->data_meta;
   #if (PAGE_SIZE < 8192)
         unsigned int truesize = ixgbe_rx_pg_size(rx_ring) / 2;
   #else
@@@ -2174,10 -2191,14 +2191,14 @@@
   #endif
         struct sk_buff *skb;
   
-       /* prefetch first cache line of first page */
-       prefetch(xdp->data);
+       /* Prefetch first cache line of first page. If xdp->data_meta
+        * is unused, this points extactly as xdp->data, otherwise we
+        * likely have a consumer accessing first few bytes of meta
+        * data, and then actual data.
+        */
+       prefetch(xdp->data_meta);
   #if L1_CACHE_BYTES < 128
-       prefetch(xdp->data + L1_CACHE_BYTES);
+       prefetch(xdp->data_meta + L1_CACHE_BYTES);
   #endif
   
         /* build an skb to around the page buffer */
@@@ -2188,6 -2209,8 +2209,8 @@@
         /* update pointers within the skb to store the data */
         skb_reserve(skb, xdp->data - xdp->data_hard_start);
         __skb_put(skb, xdp->data_end - xdp->data);
+       if (metasize)
+               skb_metadata_set(skb, metasize);
   
         /* record DMA address if this is the start of a chain of buffers */
         if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))
@@@ -2326,6 -2349,7 +2349,7 @@@ static int ixgbe_clean_rx_irq(struct ix
                 if (!skb) {
                         xdp.data = page_address(rx_buffer->page) +
                                    rx_buffer->page_offset;
+                       xdp.data_meta = xdp.data;
                         xdp.data_hard_start = xdp.data -
                                               ixgbe_rx_offset(rx_ring);
                         xdp.data_end = xdp.data + size;
@@@ -2516,50 -2540,174 +2540,174 @@@ enum latency_range 
   static void ixgbe_update_itr(struct ixgbe_q_vector *q_vector,
                              struct ixgbe_ring_container *ring_container)
   {
-       int bytes = ring_container->total_bytes;
-       int packets = ring_container->total_packets;
-       u32 timepassed_us;
-       u64 bytes_perint;
-       u8 itr_setting = ring_container->itr;
+       unsigned int itr = IXGBE_ITR_ADAPTIVE_MIN_USECS |
+                          IXGBE_ITR_ADAPTIVE_LATENCY;
+       unsigned int avg_wire_size, packets, bytes;
+       unsigned long next_update = jiffies;
   
-       if (packets == 0)
+       /* If we don't have any rings just leave ourselves set for maximum
+        * possible latency so we take ourselves out of the equation.
+        */
+       if (!ring_container->ring)
                 return;
   
-       /* simple throttlerate management
-        *   0-10MB/s   lowest (100000 ints/s)
-        *  10-20MB/s   low    (20000 ints/s)
-        *  20-1249MB/s bulk   (12000 ints/s)
+       /* If we didn't update within up to 1 - 2 jiffies we can assume
+        * that either packets are coming in so slow there hasn't been
+        * any work, or that there is so much work that NAPI is dealing
+        * with interrupt moderation and we don't need to do anything.
          */
-       /* what was last interrupt timeslice? */
-       timepassed_us = q_vector->itr >> 2;
-       if (timepassed_us == 0)
-               return;
+       if (time_after(next_update, ring_container->next_update))
+               goto clear_counts;
   
-       bytes_perint = bytes / timepassed_us; /* bytes/usec */
+       packets = ring_container->total_packets;
   
-       switch (itr_setting) {
-       case lowest_latency:
-               if (bytes_perint > 10)
-                       itr_setting = low_latency;
-               break;
-       case low_latency:
-               if (bytes_perint > 20)
-                       itr_setting = bulk_latency;
-               else if (bytes_perint <= 10)
-                       itr_setting = lowest_latency;
+       /* We have no packets to actually measure against. This means
+        * either one of the other queues on this vector is active or
+        * we are a Tx queue doing TSO with too high of an interrupt rate.
+        *
+        * When this occurs just tick up our delay by the minimum value
+        * and hope that this extra delay will prevent us from being called
+        * without any work on our queue.
+        */
+       if (!packets) {
+               itr = (q_vector->itr >> 2) + IXGBE_ITR_ADAPTIVE_MIN_INC;
+               if (itr > IXGBE_ITR_ADAPTIVE_MAX_USECS)
+                       itr = IXGBE_ITR_ADAPTIVE_MAX_USECS;
+               itr += ring_container->itr & IXGBE_ITR_ADAPTIVE_LATENCY;
+               goto clear_counts;
+       }
+ 
+       bytes = ring_container->total_bytes;
+ 
+       /* If packets are less than 4 or bytes are less than 9000 assume
+        * insufficient data to use bulk rate limiting approach. We are
+        * likely latency driven.
+        */
+       if (packets < 4 && bytes < 9000) {
+               itr = IXGBE_ITR_ADAPTIVE_LATENCY;
+               goto adjust_by_size;
+       }
+ 
+       /* Between 4 and 48 we can assume that our current interrupt delay
+        * is only slightly too low. As such we should increase it by a small
+        * fixed amount.
+        */
+       if (packets < 48) {
+               itr = (q_vector->itr >> 2) + IXGBE_ITR_ADAPTIVE_MIN_INC;
+               if (itr > IXGBE_ITR_ADAPTIVE_MAX_USECS)
+                       itr = IXGBE_ITR_ADAPTIVE_MAX_USECS;
+               goto clear_counts;
+       }
+ 
+       /* Between 48 and 96 is our "goldilocks" zone where we are working
+        * out "just right". Just report that our current ITR is good for us.
+        */
+       if (packets < 96) {
+               itr = q_vector->itr >> 2;
+               goto clear_counts;
+       }
+ 
+       /* If packet count is 96 or greater we are likely looking at a slight
+        * overrun of the delay we want. Try halving our delay to see if that
+        * will cut the number of packets in half per interrupt.
+        */
+       if (packets < 256) {
+               itr = q_vector->itr >> 3;
+               if (itr < IXGBE_ITR_ADAPTIVE_MIN_USECS)
+                       itr = IXGBE_ITR_ADAPTIVE_MIN_USECS;
+               goto clear_counts;
+       }
+ 
+       /* The paths below assume we are dealing with a bulk ITR since number
+        * of packets is 256 or greater. We are just going to have to compute
+        * a value and try to bring the count under control, though for smaller
+        * packet sizes there isn't much we can do as NAPI polling will likely
+        * be kicking in sooner rather than later.
+        */
+       itr = IXGBE_ITR_ADAPTIVE_BULK;
+ 
+ adjust_by_size:
+       /* If packet counts are 256 or greater we can assume we have a gross
+        * overestimation of what the rate should be. Instead of trying to fine
+        * tune it just use the formula below to try and dial in an exact value
+        * give the current packet size of the frame.
+        */
+       avg_wire_size = bytes / packets;
+ 
+       /* The following is a crude approximation of:
+        *  wmem_default / (size + overhead) = desired_pkts_per_int
+        *  rate / bits_per_byte / (size + ethernet overhead) = pkt_rate
+        *  (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value
+        *
+        * Assuming wmem_default is 212992 and overhead is 640 bytes per
+        * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the
+        * formula down to
+        *
+        *  (170 * (size + 24)) / (size + 640) = ITR
+        *
+        * We first do some math on the packet size and then finally bitshift
+        * by 8 after rounding up. We also have to account for PCIe link speed
+        * difference as ITR scales based on this.
+        */
+       if (avg_wire_size <= 60) {
+               /* Start at 50k ints/sec */
+               avg_wire_size = 5120;
+       } else if (avg_wire_size <= 316) {
+               /* 50K ints/sec to 16K ints/sec */
+               avg_wire_size *= 40;
+               avg_wire_size += 2720;
+       } else if (avg_wire_size <= 1084) {
+               /* 16K ints/sec to 9.2K ints/sec */
+               avg_wire_size *= 15;
+               avg_wire_size += 11452;
+       } else if (avg_wire_size <= 1980) {
+               /* 9.2K ints/sec to 8K ints/sec */
+               avg_wire_size *= 5;
+               avg_wire_size += 22420;
+       } else {
+               /* plateau at a limit of 8K ints/sec */
+               avg_wire_size = 32256;
+       }
+ 
+       /* If we are in low latency mode half our delay which doubles the rate
+        * to somewhere between 100K to 16K ints/sec
+        */
+       if (itr & IXGBE_ITR_ADAPTIVE_LATENCY)
+               avg_wire_size >>= 1;
+ 
+       /* Resultant value is 256 times larger than it needs to be. This
+        * gives us room to adjust the value as needed to either increase
+        * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc.
+        *
+        * Use addition as we have already recorded the new latency flag
+        * for the ITR value.
+        */
+       switch (q_vector->adapter->link_speed) {
+       case IXGBE_LINK_SPEED_10GB_FULL:
+       case IXGBE_LINK_SPEED_100_FULL:
+       default:
+               itr += DIV_ROUND_UP(avg_wire_size,
+                                   IXGBE_ITR_ADAPTIVE_MIN_INC * 256) *
+                      IXGBE_ITR_ADAPTIVE_MIN_INC;
                 break;
-       case bulk_latency:
-               if (bytes_perint <= 20)
-                       itr_setting = low_latency;
+       case IXGBE_LINK_SPEED_2_5GB_FULL:
+       case IXGBE_LINK_SPEED_1GB_FULL:
+       case IXGBE_LINK_SPEED_10_FULL:
+               itr += DIV_ROUND_UP(avg_wire_size,
+                                   IXGBE_ITR_ADAPTIVE_MIN_INC * 64) *
+                      IXGBE_ITR_ADAPTIVE_MIN_INC;
                 break;
         }
   
-       /* clear work counters since we have the values we need */
+ clear_counts:
+       /* write back value */
+       ring_container->itr = itr;
+ 
+       /* next update should occur within next jiffy */
+       ring_container->next_update = next_update + 1;
+ 
         ring_container->total_bytes = 0;
         ring_container->total_packets = 0;
- 
-       /* write updated itr to ring container */
-       ring_container->itr = itr_setting;
   }
   
   /**
@@@ -2601,34 -2749,19 +2749,19 @@@ void ixgbe_write_eitr(struct ixgbe_q_ve
   
   static void ixgbe_set_itr(struct ixgbe_q_vector *q_vector)
   {
-       u32 new_itr = q_vector->itr;
-       u8 current_itr;
+       u32 new_itr;
   
         ixgbe_update_itr(q_vector, &q_vector->tx);
         ixgbe_update_itr(q_vector, &q_vector->rx);
   
-       current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
+       /* use the smallest value of new ITR delay calculations */
+       new_itr = min(q_vector->rx.itr, q_vector->tx.itr);
   
-       switch (current_itr) {
-       /* counts and packets in update_itr are dependent on these numbers */
-       case lowest_latency:
-               new_itr = IXGBE_100K_ITR;
-               break;
-       case low_latency:
-               new_itr = IXGBE_20K_ITR;
-               break;
-       case bulk_latency:
-               new_itr = IXGBE_12K_ITR;
-               break;
-       default:
-               break;
-       }
+       /* Clear latency flag if set, shift into correct position */
+       new_itr &= ~IXGBE_ITR_ADAPTIVE_LATENCY;
+       new_itr <<= 2;
   
         if (new_itr != q_vector->itr) {
-               /* do an exponential smoothing */
-               new_itr = (10 * new_itr * q_vector->itr) /
-                         ((9 * new_itr) + q_vector->itr);
- 
                 /* save the algorithm value here */
                 q_vector->itr = new_itr;
   
@@@ -6771,6 -6904,7 +6904,7 @@@ void ixgbe_update_stats(struct ixgbe_ad
         u32 i, missed_rx = 0, mpc, bprc, lxon, lxoff, xon_off_tot;
         u64 non_eop_descs = 0, restart_queue = 0, tx_busy = 0;
         u64 alloc_rx_page_failed = 0, alloc_rx_buff_failed = 0;
+       u64 alloc_rx_page = 0;
         u64 bytes = 0, packets = 0, hw_csum_rx_error = 0;
   
         if (test_bit(__IXGBE_DOWN, &adapter->state) ||
@@@ -6791,6 -6925,7 +6925,7 @@@
         for (i = 0; i < adapter->num_rx_queues; i++) {
                 struct ixgbe_ring *rx_ring = adapter->rx_ring[i];
                 non_eop_descs += rx_ring->rx_stats.non_eop_descs;
+               alloc_rx_page += rx_ring->rx_stats.alloc_rx_page;
                 alloc_rx_page_failed += rx_ring->rx_stats.alloc_rx_page_failed;
                 alloc_rx_buff_failed += rx_ring->rx_stats.alloc_rx_buff_failed;
                 hw_csum_rx_error += rx_ring->rx_stats.csum_err;
@@@ -6798,6 -6933,7 +6933,7 @@@
                 packets += rx_ring->stats.packets;
         }
         adapter->non_eop_descs = non_eop_descs;
+       adapter->alloc_rx_page = alloc_rx_page;
         adapter->alloc_rx_page_failed = alloc_rx_page_failed;
         adapter->alloc_rx_buff_failed = alloc_rx_buff_failed;
         adapter->hw_csum_rx_error = hw_csum_rx_error;
@@@ -7554,9 -7690,9 +7690,9 @@@ static void ixgbe_sfp_link_config_subta
    * ixgbe_service_timer - Timer Call-back
    * @data: pointer to adapter cast into an unsigned long
    **/
- static void ixgbe_service_timer(unsigned long data)
+ static void ixgbe_service_timer(struct timer_list *t)
   {
-       struct ixgbe_adapter *adapter = (struct ixgbe_adapter *)data;
+       struct ixgbe_adapter *adapter = from_timer(adapter, t, service_timer);
         unsigned long next_event_offset;
   
         /* poll faster when waiting for link */
@@@ -8624,7 -8760,7 +8760,7 @@@ static void ixgbe_get_stats64(struct ne
   
         rcu_read_lock();
         for (i = 0; i < adapter->num_rx_queues; i++) {
- -              struct ixgbe_ring *ring = ACCESS_ONCE(adapter->rx_ring[i]);
+ +              struct ixgbe_ring *ring = READ_ONCE(adapter->rx_ring[i]);
                 u64 bytes, packets;
                 unsigned int start;
   
@@@ -8640,12 -8776,12 +8776,12 @@@
         }
   
         for (i = 0; i < adapter->num_tx_queues; i++) {
- -              struct ixgbe_ring *ring = ACCESS_ONCE(adapter->tx_ring[i]);
+ +              struct ixgbe_ring *ring = READ_ONCE(adapter->tx_ring[i]);
   
                 ixgbe_get_ring_stats64(stats, ring);
         }
         for (i = 0; i < adapter->num_xdp_queues; i++) {
- -              struct ixgbe_ring *ring = ACCESS_ONCE(adapter->xdp_ring[i]);
+ +              struct ixgbe_ring *ring = READ_ONCE(adapter->xdp_ring[i]);
   
                 ixgbe_get_ring_stats64(stats, ring);
         }
@@@ -9223,13 -9359,10 +9359,10 @@@ free_jump
         return err;
   }
   
- static int ixgbe_setup_tc_cls_u32(struct net_device *dev,
+ static int ixgbe_setup_tc_cls_u32(struct ixgbe_adapter *adapter,
                                   struct tc_cls_u32_offload *cls_u32)
   {
-       struct ixgbe_adapter *adapter = netdev_priv(dev);
- 
-       if (!is_classid_clsact_ingress(cls_u32->common.classid) ||
-           cls_u32->common.chain_index)
+       if (cls_u32->common.chain_index)
                 return -EOPNOTSUPP;
   
         switch (cls_u32->command) {
@@@ -9248,6 -9381,43 +9381,43 @@@
         }
   }
   
+ static int ixgbe_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+                                  void *cb_priv)
+ {
+       struct ixgbe_adapter *adapter = cb_priv;
+ 
+       if (!tc_can_offload(adapter->netdev))
+               return -EOPNOTSUPP;
+ 
+       switch (type) {
+       case TC_SETUP_CLSU32:
+               return ixgbe_setup_tc_cls_u32(adapter, type_data);
+       default:
+               return -EOPNOTSUPP;
+       }
+ }
+ 
+ static int ixgbe_setup_tc_block(struct net_device *dev,
+                               struct tc_block_offload *f)
+ {
+       struct ixgbe_adapter *adapter = netdev_priv(dev);
+ 
+       if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+               return -EOPNOTSUPP;
+ 
+       switch (f->command) {
+       case TC_BLOCK_BIND:
+               return tcf_block_cb_register(f->block, ixgbe_setup_tc_block_cb,
+                                            adapter, adapter);
+       case TC_BLOCK_UNBIND:
+               tcf_block_cb_unregister(f->block, ixgbe_setup_tc_block_cb,
+                                       adapter);
+               return 0;
+       default:
+               return -EOPNOTSUPP;
+       }
+ }
+ 
   static int ixgbe_setup_tc_mqprio(struct net_device *dev,
                                  struct tc_mqprio_qopt *mqprio)
   {
@@@ -9259,9 -9429,9 +9429,9 @@@ static int __ixgbe_setup_tc(struct net_
                             void *type_data)
   {
         switch (type) {
-       case TC_SETUP_CLSU32:
-               return ixgbe_setup_tc_cls_u32(dev, type_data);
-       case TC_SETUP_MQPRIO:
+       case TC_SETUP_BLOCK:
+               return ixgbe_setup_tc_block(dev, type_data);
+       case TC_SETUP_QDISC_MQPRIO:
                 return ixgbe_setup_tc_mqprio(dev, type_data);
         default:
                 return -EOPNOTSUPP;
@@@ -9733,6 -9903,17 +9903,17 @@@ static void ixgbe_fwd_del(struct net_de
         limit = find_last_bit(&adapter->fwd_bitmask, 32);
         adapter->ring_feature[RING_F_VMDQ].limit = limit + 1;
         ixgbe_fwd_ring_down(fwd_adapter->netdev, fwd_adapter);
+ 
+       /* go back to full RSS if we're done with our VMQs */
+       if (adapter->ring_feature[RING_F_VMDQ].limit == 1) {
+               int rss = min_t(int, ixgbe_max_rss_indices(adapter),
+                               num_online_cpus());
+ 
+               adapter->flags &= ~IXGBE_FLAG_VMDQ_ENABLED;
+               adapter->flags &= ~IXGBE_FLAG_SRIOV_ENABLED;
+               adapter->ring_feature[RING_F_RSS].limit = rss;
+       }
+ 
         ixgbe_setup_tc(pdev, netdev_get_num_tc(pdev));
         netdev_dbg(pdev, "pool %i:%i queues %i:%i VSI bitmask %lx\n",
                    fwd_adapter->pool, adapter->num_rx_pools,
@@@ -9823,7 -10004,7 +10004,7 @@@ static int ixgbe_xdp_setup(struct net_d
         return 0;
   }
   
- static int ixgbe_xdp(struct net_device *dev, struct netdev_xdp *xdp)
+ static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp)
   {
         struct ixgbe_adapter *adapter = netdev_priv(dev);
   
@@@ -9932,7 -10113,7 +10113,7 @@@ static const struct net_device_ops ixgb
         .ndo_udp_tunnel_add     = ixgbe_add_udp_tunnel_port,
         .ndo_udp_tunnel_del     = ixgbe_del_udp_tunnel_port,
         .ndo_features_check     = ixgbe_features_check,
-       .ndo_xdp                = ixgbe_xdp,
+       .ndo_bpf                = ixgbe_xdp,
         .ndo_xdp_xmit           = ixgbe_xdp_xmit,
         .ndo_xdp_flush          = ixgbe_xdp_flush,
   };
@@@ -10355,8 -10536,7 +10536,7 @@@ skip_sriov
         ether_addr_copy(hw->mac.addr, hw->mac.perm_addr);
         ixgbe_mac_set_default_filter(adapter);
   
-       setup_timer(&adapter->service_timer, &ixgbe_service_timer,
-                   (unsigned long) adapter);
+       timer_setup(&adapter->service_timer, ixgbe_service_timer, 0);
   
         if (ixgbe_removed(hw->hw_addr)) {
                 err = -EIO;
@@@ -10711,6 -10891,9 +10891,9 @@@ skip_bad_vf_detection
   #endif /* CONFIG_PCI_IOV */
         if (!test_bit(__IXGBE_SERVICE_INITED, &adapter->state))
                 return PCI_ERS_RESULT_DISCONNECT;
+ 
+       if (!netif_device_present(netdev))
+               return PCI_ERS_RESULT_DISCONNECT;
   
         rtnl_lock();
         netif_device_detach(netdev);
diff --combined drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c

index cacb30682434b8685a0102fa51b883fe08caf547,12d3601b1d57f9fa977acd54c646e81c3d1b9048..feed11bc9ddffdf7b779abab19a7e3e678909ecd
--- 1/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
--- 2/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@@ -164,7 -164,7 +164,7 @@@ static void ixgbevf_check_remove(struc
   
   u32 ixgbevf_read_reg(struct ixgbe_hw *hw, u32 reg)
   {
- -      u8 __iomem *reg_addr = ACCESS_ONCE(hw->hw_addr);
+ +      u8 __iomem *reg_addr = READ_ONCE(hw->hw_addr);
         u32 value;
   
         if (IXGBE_REMOVED(reg_addr))
@@@ -2747,9 -2747,10 +2747,10 @@@ void ixgbevf_update_stats(struct ixgbev
    * ixgbevf_service_timer - Timer Call-back
    * @data: pointer to adapter cast into an unsigned long
    **/
- static void ixgbevf_service_timer(unsigned long data)
+ static void ixgbevf_service_timer(struct timer_list *t)
   {
-       struct ixgbevf_adapter *adapter = (struct ixgbevf_adapter *)data;
+       struct ixgbevf_adapter *adapter = from_timer(adapter, t,
+                                                    service_timer);
   
         /* Reset the timer */
         mod_timer(&adapter->service_timer, (HZ * 2) + jiffies);
@@@ -4120,8 -4121,7 +4121,7 @@@ static int ixgbevf_probe(struct pci_de
                 goto err_sw_init;
         }
   
-       setup_timer(&adapter->service_timer, &ixgbevf_service_timer,
-                   (unsigned long)adapter);
+       timer_setup(&adapter->service_timer, ixgbevf_service_timer, 0);
   
         INIT_WORK(&adapter->service_task, ixgbevf_service_task);
         set_bit(__IXGBEVF_SERVICE_INITED, &adapter->state);
diff --combined drivers/net/ethernet/mellanox/mlx4/en_tx.c

index 3541a7f9d12e5a06f924f80d0fee20c355d1204a,596445a4a241419737596b51fb38d00b02ad6681..6b68537738480eb649b962647fe8b52c65f4cb32
--- 1/drivers/net/ethernet/mellanox/mlx4/en_tx.c
--- 2/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@@ -414,8 -414,8 +414,8 @@@ bool mlx4_en_process_tx_cq(struct net_d
   
         index = cons_index & size_mask;
         cqe = mlx4_en_get_cqe(buf, index, priv->cqe_size) + factor;
- -      last_nr_txbb = ACCESS_ONCE(ring->last_nr_txbb);
- -      ring_cons = ACCESS_ONCE(ring->cons);
+ +      last_nr_txbb = READ_ONCE(ring->last_nr_txbb);
+ +      ring_cons = READ_ONCE(ring->cons);
         ring_index = ring_cons & size_mask;
         stamp_index = ring_index;
   
@@@ -479,8 -479,8 +479,8 @@@
         wmb();
   
         /* we want to dirty this cache line once */
- -      ACCESS_ONCE(ring->last_nr_txbb) = last_nr_txbb;
- -      ACCESS_ONCE(ring->cons) = ring_cons + txbbs_skipped;
+ +      WRITE_ONCE(ring->last_nr_txbb, last_nr_txbb);
+ +      WRITE_ONCE(ring->cons, ring_cons + txbbs_skipped);
   
         if (cq->type == TX_XDP)
                 return done < budget;
@@@ -718,7 -718,7 +718,7 @@@ void mlx4_en_xmit_doorbell(struct mlx4_
   #else
         iowrite32be(
   #endif
-                 ring->doorbell_qpn,
+                 (__force u32)ring->doorbell_qpn,
                   ring->bf.uar->map + MLX4_SEND_DOORBELL);
   }
   
@@@ -858,7 -858,7 +858,7 @@@ netdev_tx_t mlx4_en_xmit(struct sk_buf
                 goto tx_drop;
   
         /* fetch ring->cons far ahead before needing it to avoid stall */
- -      ring_cons = ACCESS_ONCE(ring->cons);
+ +      ring_cons = READ_ONCE(ring->cons);
   
         real_size = get_real_size(skb, shinfo, dev, &lso_header_size,
                                   &inline_ok, &fragptr);
@@@ -1066,7 -1066,7 +1066,7 @@@
                  */
                 smp_rmb();
   
- -              ring_cons = ACCESS_ONCE(ring->cons);
+ +              ring_cons = READ_ONCE(ring->cons);
                 if (unlikely(!mlx4_en_is_tx_ring_full(ring))) {
                         netif_tx_wake_queue(ring->tx_queue);
                         ring->wake_queue++;
@@@ -1085,13 -1085,35 +1085,35 @@@ tx_drop
   #define MLX4_EN_XDP_TX_REAL_SZ (((CTRL_SIZE + MLX4_EN_XDP_TX_NRTXBB * DS_SIZE) \
                                  / 16) & 0x3f)
   
+ void mlx4_en_init_tx_xdp_ring_descs(struct mlx4_en_priv *priv,
+                                   struct mlx4_en_tx_ring *ring)
+ {
+       int i;
+ 
+       for (i = 0; i < ring->size; i++) {
+               struct mlx4_en_tx_info *tx_info = &ring->tx_info[i];
+               struct mlx4_en_tx_desc *tx_desc = ring->buf +
+                       (i << LOG_TXBB_SIZE);
+ 
+               tx_info->map0_byte_count = PAGE_SIZE;
+               tx_info->nr_txbb = MLX4_EN_XDP_TX_NRTXBB;
+               tx_info->data_offset = offsetof(struct mlx4_en_tx_desc, data);
+               tx_info->ts_requested = 0;
+               tx_info->nr_maps = 1;
+               tx_info->linear = 1;
+               tx_info->inl = 0;
+ 
+               tx_desc->data.lkey = ring->mr_key;
+               tx_desc->ctrl.qpn_vlan.fence_size = MLX4_EN_XDP_TX_REAL_SZ;
+               tx_desc->ctrl.srcrb_flags = priv->ctrl_flags;
+       }
+ }
+ 
   netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
                                struct mlx4_en_rx_alloc *frame,
-                              struct net_device *dev, unsigned int length,
+                              struct mlx4_en_priv *priv, unsigned int length,
                                int tx_ind, bool *doorbell_pending)
   {
-       struct mlx4_en_priv *priv = netdev_priv(dev);
-       union mlx4_wqe_qpn_vlan qpn_vlan = {};
         struct mlx4_en_tx_desc *tx_desc;
         struct mlx4_en_tx_info *tx_info;
         struct mlx4_wqe_data_seg *data;
@@@ -1123,25 -1145,16 +1145,16 @@@
         tx_info->page = frame->page;
         frame->page = NULL;
         tx_info->map0_dma = dma;
-       tx_info->map0_byte_count = PAGE_SIZE;
-       tx_info->nr_txbb = MLX4_EN_XDP_TX_NRTXBB;
         tx_info->nr_bytes = max_t(unsigned int, length, ETH_ZLEN);
-       tx_info->data_offset = offsetof(struct mlx4_en_tx_desc, data);
-       tx_info->ts_requested = 0;
-       tx_info->nr_maps = 1;
-       tx_info->linear = 1;
-       tx_info->inl = 0;
   
         dma_sync_single_range_for_device(priv->ddev, dma, frame->page_offset,
                                          length, PCI_DMA_TODEVICE);
   
         data->addr = cpu_to_be64(dma + frame->page_offset);
-       data->lkey = ring->mr_key;
         dma_wmb();
         data->byte_count = cpu_to_be32(length);
   
         /* tx completion can avoid cache line miss for common cases */
-       tx_desc->ctrl.srcrb_flags = priv->ctrl_flags;
   
         op_own = cpu_to_be32(MLX4_OPCODE_SEND) |
                 ((ring->prod & ring->size) ?
@@@ -1152,10 -1165,13 +1165,13 @@@
   
         ring->prod += MLX4_EN_XDP_TX_NRTXBB;
   
-       qpn_vlan.fence_size = MLX4_EN_XDP_TX_REAL_SZ;
+       /* Ensure new descriptor hits memory
+        * before setting ownership of this descriptor to HW
+        */
+       dma_wmb();
+       tx_desc->ctrl.owner_opcode = op_own;
+       ring->xmit_more++;
   
-       mlx4_en_tx_write_desc(ring, tx_desc, qpn_vlan, TXBB_SIZE, 0,
-                             op_own, false, false);
         *doorbell_pending = true;
   
         return NETDEV_TX_OK;
diff --combined drivers/net/ethernet/neterion/vxge/vxge-main.c

index 5dd5f61e1114bd1f7870fd1b34ed32d002a4d427,426c9a946eb4f9f372ca138c640351c78be9016d..fe7e0e1dd01def224bd1932483170e04d488ff72
--- 1/drivers/net/ethernet/neterion/vxge/vxge-main.c
--- 2/drivers/net/ethernet/neterion/vxge/vxge-main.c
+++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c
@@@ -1122,7 -1122,6 +1122,6 @@@ static void vxge_set_multicast(struct n
         struct netdev_hw_addr *ha;
         struct vxgedev *vdev;
         int i, mcast_cnt = 0;
-       struct __vxge_hw_device *hldev;
         struct vxge_vpath *vpath;
         enum vxge_hw_status status = VXGE_HW_OK;
         struct macInfo mac_info;
@@@ -1136,7 -1135,6 +1135,6 @@@
                 "%s:%d", __func__, __LINE__);
   
         vdev = netdev_priv(dev);
-       hldev = vdev->devh;
   
         if (unlikely(!is_vxge_card_up(vdev)))
                 return;
@@@ -1283,7 -1281,6 +1281,6 @@@ static int vxge_set_mac_addr(struct net
   {
         struct sockaddr *addr = p;
         struct vxgedev *vdev;
-       struct __vxge_hw_device *hldev;
         enum vxge_hw_status status = VXGE_HW_OK;
         struct macInfo mac_info_new, mac_info_old;
         int vpath_idx = 0;
@@@ -1291,7 -1288,6 +1288,6 @@@
         vxge_debug_entryexit(VXGE_TRACE, "%s:%d", __func__, __LINE__);
   
         vdev = netdev_priv(dev);
-       hldev = vdev->devh;
   
         if (!is_valid_ether_addr(addr->sa_data))
                 return -EINVAL;
@@@ -2177,7 -2173,6 +2173,6 @@@ static void adaptive_coalesce_rx_interr
    */
   static irqreturn_t vxge_isr_napi(int irq, void *dev_id)
   {
-       struct net_device *dev;
         struct __vxge_hw_device *hldev;
         u64 reason;
         enum vxge_hw_status status;
@@@ -2185,7 -2180,6 +2180,6 @@@
   
         vxge_debug_intr(VXGE_TRACE, "%s:%d", __func__, __LINE__);
   
-       dev = vdev->ndev;
         hldev = pci_get_drvdata(vdev->pdev);
   
         if (pci_channel_offline(vdev->pdev))
@@@ -2597,9 -2591,9 +2591,9 @@@ INTA_MODE
         return VXGE_HW_OK;
   }
   
- static void vxge_poll_vp_reset(unsigned long data)
+ static void vxge_poll_vp_reset(struct timer_list *t)
   {
-       struct vxgedev *vdev = (struct vxgedev *)data;
+       struct vxgedev *vdev = from_timer(vdev, t, vp_reset_timer);
         int i, j = 0;
   
         for (i = 0; i < vdev->no_of_vpath; i++) {
@@@ -2616,9 -2610,9 +2610,9 @@@
         mod_timer(&vdev->vp_reset_timer, jiffies + HZ / 2);
   }
   
- static void vxge_poll_vp_lockup(unsigned long data)
+ static void vxge_poll_vp_lockup(struct timer_list *t)
   {
-       struct vxgedev *vdev = (struct vxgedev *)data;
+       struct vxgedev *vdev = from_timer(vdev, t, vp_lockup_timer);
         enum vxge_hw_status status = VXGE_HW_OK;
         struct vxge_vpath *vpath;
         struct vxge_ring *ring;
@@@ -2629,7 -2623,7 +2623,7 @@@
                 ring = &vdev->vpaths[i].ring;
   
                 /* Truncated to machine word size number of frames */
- -              rx_frms = ACCESS_ONCE(ring->stats.rx_frms);
+ +              rx_frms = READ_ONCE(ring->stats.rx_frms);
   
                 /* Did this vpath received any packets */
                 if (ring->stats.prev_rx_frms == rx_frms) {
@@@ -2713,14 -2707,13 +2707,13 @@@ static int vxge_open(struct net_device 
         struct vxge_vpath *vpath;
         int ret = 0;
         int i;
-       u64 val64, function_mode;
+       u64 val64;
   
         vxge_debug_entryexit(VXGE_TRACE,
                 "%s: %s:%d", dev->name, __func__, __LINE__);
   
         vdev = netdev_priv(dev);
         hldev = pci_get_drvdata(vdev->pdev);
-       function_mode = vdev->config.device_hw_info.function_mode;
   
         /* make sure you have link off by default every time Nic is
          * initialized */
@@@ -2858,12 -2851,12 +2851,12 @@@
                 vdev->config.rx_pause_enable);
   
         if (vdev->vp_reset_timer.function == NULL)
-               vxge_os_timer(&vdev->vp_reset_timer, vxge_poll_vp_reset, vdev,
+               vxge_os_timer(&vdev->vp_reset_timer, vxge_poll_vp_reset,
                               HZ / 2);
   
         /* There is no need to check for RxD leak and RxD lookup on Titan1A */
         if (vdev->titan1 && vdev->vp_lockup_timer.function == NULL)
-               vxge_os_timer(&vdev->vp_lockup_timer, vxge_poll_vp_lockup, vdev,
+               vxge_os_timer(&vdev->vp_lockup_timer, vxge_poll_vp_lockup,
                               HZ / 2);
   
         set_bit(__VXGE_STATE_CARD_UP, &vdev->state);
diff --combined drivers/net/ethernet/sfc/ef10.c

index a95a46bcd339d824f442170b47f335c630c50b6b,46d60013564c9a7d4c2038a28e02867a766a2171..e566dbb3343d7cfbd4b244fbb534c09deb3a74f4
--- 1/drivers/net/ethernet/sfc/ef10.c
--- 2/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@@ -674,6 -674,10 +674,10 @@@ static int efx_ef10_probe(struct efx_ni
         efx->rx_packet_len_offset =
                 ES_DZ_RX_PREFIX_PKTLEN_OFST - ES_DZ_RX_PREFIX_SIZE;
   
+       if (nic_data->datapath_caps &
+           (1 << MC_CMD_GET_CAPABILITIES_OUT_RX_INCLUDE_FCS_LBN))
+               efx->net_dev->hw_features |= NETIF_F_RXFCS;
+ 
         rc = efx_mcdi_port_get_number(efx);
         if (rc < 0)
                 goto fail5;
@@@ -2073,7 -2077,7 +2077,7 @@@ static irqreturn_t efx_ef10_msi_interru
         netif_vdbg(efx, intr, efx->net_dev,
                    "IRQ %d on CPU %d\n", irq, raw_smp_processor_id());
   
- -      if (likely(ACCESS_ONCE(efx->irq_soft_enabled))) {
+ +      if (likely(READ_ONCE(efx->irq_soft_enabled))) {
                 /* Note test interrupts */
                 if (context->index == efx->irq_level)
                         efx->last_irq_cpu = raw_smp_processor_id();
@@@ -2088,7 -2092,7 +2092,7 @@@
   static irqreturn_t efx_ef10_legacy_interrupt(int irq, void *dev_id)
   {
         struct efx_nic *efx = dev_id;
- -      bool soft_enabled = ACCESS_ONCE(efx->irq_soft_enabled);
+ +      bool soft_enabled = READ_ONCE(efx->irq_soft_enabled);
         struct efx_channel *channel;
         efx_dword_t reg;
         u32 queues;
@@@ -3199,11 -3203,15 +3203,15 @@@ static u16 efx_ef10_handle_rx_event_err
                                            const efx_qword_t *event)
   {
         struct efx_nic *efx = channel->efx;
+       bool handled = false;
   
         if (EFX_QWORD_FIELD(*event, ESF_DZ_RX_ECRC_ERR)) {
-               if (!efx->loopback_selftest)
-                       channel->n_rx_eth_crc_err += n_packets;
-               return EFX_RX_PKT_DISCARD;
+               if (!(efx->net_dev->features & NETIF_F_RXALL)) {
+                       if (!efx->loopback_selftest)
+                               channel->n_rx_eth_crc_err += n_packets;
+                       return EFX_RX_PKT_DISCARD;
+               }
+               handled = true;
         }
         if (EFX_QWORD_FIELD(*event, ESF_DZ_RX_IPCKSUM_ERR)) {
                 if (unlikely(rx_encap_hdr != ESE_EZ_ENCAP_HDR_VXLAN &&
@@@ -3274,7 -3282,7 +3282,7 @@@
                 return 0;
         }
   
-       WARN_ON(1); /* No error bits were recognised */
+       WARN_ON(!handled); /* No error bits were recognised */
         return 0;
   }
   
@@@ -3291,7 -3299,7 +3299,7 @@@ static int efx_ef10_handle_rx_event(str
         bool rx_cont;
         u16 flags = 0;
   
- -      if (unlikely(ACCESS_ONCE(efx->reset_pending)))
+ +      if (unlikely(READ_ONCE(efx->reset_pending)))
                 return 0;
   
         /* Basic packet information */
@@@ -3428,7 -3436,7 +3436,7 @@@ efx_ef10_handle_tx_event(struct efx_cha
         unsigned int tx_ev_q_label;
         int tx_descs = 0;
   
- -      if (unlikely(ACCESS_ONCE(efx->reset_pending)))
+ +      if (unlikely(READ_ONCE(efx->reset_pending)))
                 return 0;
   
         if (unlikely(EFX_QWORD_FIELD(*event, ESF_DZ_TX_DROP_EVENT)))
@@@ -5316,7 -5324,7 +5324,7 @@@ static void efx_ef10_filter_remove_old(
         int i;
   
         for (i = 0; i < HUNT_FILTER_TBL_ROWS; i++) {
- -              if (ACCESS_ONCE(table->entry[i].spec) &
+ +              if (READ_ONCE(table->entry[i].spec) &
                     EFX_EF10_FILTER_FLAG_AUTO_OLD) {
                         rc = efx_ef10_filter_remove_internal(efx,
                                         1U << EFX_FILTER_PRI_AUTO, i, true);
@@@ -5726,7 -5734,7 +5734,7 @@@ static int efx_ef10_set_mac_address(str
                  * MCFW do not support VFs.
                  */
                 rc = efx_ef10_vport_set_mac_address(efx);
-       } else {
+       } else if (rc) {
                 efx_mcdi_display_error(efx, MC_CMD_VADAPTOR_SET_MAC,
                                        sizeof(inbuf), NULL, 0, rc);
         }
diff --combined drivers/net/ethernet/sfc/efx.c

index 016616a6388057c7107196ae7521543785a5c678,6668e371405c9f8680fa360de03c1cee19a9004c..e3c492fcaff07d9c6b3c5211609c3bd0bde0eac4
--- 1/drivers/net/ethernet/sfc/efx.c
--- 2/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@@ -471,8 -471,7 +471,7 @@@ efx_alloc_channel(struct efx_nic *efx, 
   
         rx_queue = &channel->rx_queue;
         rx_queue->efx = efx;
-       setup_timer(&rx_queue->slow_fill, efx_rx_slow_fill,
-                   (unsigned long)rx_queue);
+       timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0);
   
         return channel;
   }
@@@ -511,8 -510,7 +510,7 @@@ efx_copy_channel(const struct efx_chann
         rx_queue = &channel->rx_queue;
         rx_queue->buffer = NULL;
         memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd));
-       setup_timer(&rx_queue->slow_fill, efx_rx_slow_fill,
-                   (unsigned long)rx_queue);
+       timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0);
   
         return channel;
   }
@@@ -2317,8 -2315,11 +2315,11 @@@ static int efx_set_features(struct net_
                         return rc;
         }
   
-       /* If Rx VLAN filter is changed, update filters via mac_reconfigure */
-       if ((net_dev->features ^ data) & NETIF_F_HW_VLAN_CTAG_FILTER) {
+       /* If Rx VLAN filter is changed, update filters via mac_reconfigure.
+        * If rx-fcs is changed, mac_reconfigure updates that too.
+        */
+       if ((net_dev->features ^ data) & (NETIF_F_HW_VLAN_CTAG_FILTER |
+                                         NETIF_F_RXFCS)) {
                 /* efx_set_rx_mode() will schedule MAC work to update filters
                  * when a new features are finally set in net_dev.
                  */
@@@ -2809,7 -2810,7 +2810,7 @@@ static void efx_reset_work(struct work_
         unsigned long pending;
         enum reset_type method;
   
- -      pending = ACCESS_ONCE(efx->reset_pending);
+ +      pending = READ_ONCE(efx->reset_pending);
         method = fls(pending) - 1;
   
         if (method == RESET_TYPE_MC_BIST)
@@@ -2874,7 -2875,7 +2875,7 @@@ void efx_schedule_reset(struct efx_nic 
         /* If we're not READY then just leave the flags set as the cue
          * to abort probing or reschedule the reset later.
          */
- -      if (ACCESS_ONCE(efx->state) != STATE_READY)
+ +      if (READ_ONCE(efx->state) != STATE_READY)
                 return;
   
         /* efx_process_channel() will no longer read events once a
@@@ -3244,7 -3245,7 +3245,7 @@@ static int efx_pci_probe_post_io(struc
   
         /* Determine netdevice features */
         net_dev->features |= (efx->type->offload_features | NETIF_F_SG |
-                             NETIF_F_TSO | NETIF_F_RXCSUM);
+                             NETIF_F_TSO | NETIF_F_RXCSUM | NETIF_F_RXALL);
         if (efx->type->offload_features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
                 net_dev->features |= NETIF_F_TSO6;
         /* Check whether device supports TSO */
@@@ -3255,7 -3256,10 +3256,10 @@@
                                    NETIF_F_HIGHDMA | NETIF_F_ALL_TSO |
                                    NETIF_F_RXCSUM);
   
-       net_dev->hw_features = net_dev->features & ~efx->fixed_features;
+       net_dev->hw_features |= net_dev->features & ~efx->fixed_features;
+ 
+       /* Disable receiving frames with bad FCS, by default. */
+       net_dev->features &= ~NETIF_F_RXALL;
   
         /* Disable VLAN filtering by default.  It may be enforced if
          * the feature is fixed (i.e. VLAN filters are required to
diff --combined drivers/net/ethernet/sfc/falcon/efx.c

index 7263275fde4a1d6eb27d7bd3d358904b2a66313a,6685a66ee1a3b85624414f65a8c8f24a0994b87d..3d6c91e96589870ca540331350ceae7563e163c5
--- 1/drivers/net/ethernet/sfc/falcon/efx.c
--- 2/drivers/net/ethernet/sfc/falcon/efx.c
+++ b/drivers/net/ethernet/sfc/falcon/efx.c
@@@ -449,8 -449,7 +449,7 @@@ ef4_alloc_channel(struct ef4_nic *efx, 
   
         rx_queue = &channel->rx_queue;
         rx_queue->efx = efx;
-       setup_timer(&rx_queue->slow_fill, ef4_rx_slow_fill,
-                   (unsigned long)rx_queue);
+       timer_setup(&rx_queue->slow_fill, ef4_rx_slow_fill, 0);
   
         return channel;
   }
@@@ -489,8 -488,7 +488,7 @@@ ef4_copy_channel(const struct ef4_chann
         rx_queue = &channel->rx_queue;
         rx_queue->buffer = NULL;
         memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd));
-       setup_timer(&rx_queue->slow_fill, ef4_rx_slow_fill,
-                   (unsigned long)rx_queue);
+       timer_setup(&rx_queue->slow_fill, ef4_rx_slow_fill, 0);
   
         return channel;
   }
@@@ -2545,7 -2543,7 +2543,7 @@@ static void ef4_reset_work(struct work_
         unsigned long pending;
         enum reset_type method;
   
- -      pending = ACCESS_ONCE(efx->reset_pending);
+ +      pending = READ_ONCE(efx->reset_pending);
         method = fls(pending) - 1;
   
         if ((method == RESET_TYPE_RECOVER_OR_DISABLE ||
@@@ -2605,7 -2603,7 +2603,7 @@@ void ef4_schedule_reset(struct ef4_nic 
         /* If we're not READY then just leave the flags set as the cue
          * to abort probing or reschedule the reset later.
          */
- -      if (ACCESS_ONCE(efx->state) != STATE_READY)
+ +      if (READ_ONCE(efx->state) != STATE_READY)
                 return;
   
         queue_work(reset_workqueue, &efx->reset_work);
diff --combined drivers/net/ethernet/sfc/falcon/falcon.c

index cd8bb472d75813773e645b6bf0e1c196523a38d0,ccda017b6794525f3043655e72171b37f79ef534..6520d7bc8d211755e44d1900e1ff1dbce2d0d5d7
--- 1/drivers/net/ethernet/sfc/falcon/falcon.c
--- 2/drivers/net/ethernet/sfc/falcon/falcon.c
+++ b/drivers/net/ethernet/sfc/falcon/falcon.c
@@@ -452,7 -452,7 +452,7 @@@ static irqreturn_t falcon_legacy_interr
                    "IRQ %d on CPU %d status " EF4_OWORD_FMT "\n",
                    irq, raw_smp_processor_id(), EF4_OWORD_VAL(*int_ker));
   
- -      if (!likely(ACCESS_ONCE(efx->irq_soft_enabled)))
+ +      if (!likely(READ_ONCE(efx->irq_soft_enabled)))
                 return IRQ_HANDLED;
   
         /* Check to see if we have a serious error condition */
@@@ -1372,7 -1372,7 +1372,7 @@@ static void falcon_reconfigure_mac_wrap
         ef4_oword_t reg;
         int link_speed, isolate;
   
- -      isolate = !!ACCESS_ONCE(efx->reset_pending);
+ +      isolate = !!READ_ONCE(efx->reset_pending);
   
         switch (link_state->speed) {
         case 10000: link_speed = 3; break;
@@@ -1454,10 -1454,11 +1454,11 @@@ static void falcon_stats_complete(struc
         }
   }
   
- static void falcon_stats_timer_func(unsigned long context)
+ static void falcon_stats_timer_func(struct timer_list *t)
   {
-       struct ef4_nic *efx = (struct ef4_nic *)context;
-       struct falcon_nic_data *nic_data = efx->nic_data;
+       struct falcon_nic_data *nic_data = from_timer(nic_data, t,
+                                                     stats_timer);
+       struct ef4_nic *efx = nic_data->efx;
   
         spin_lock(&efx->stats_lock);
   
@@@ -2295,6 -2296,7 +2296,7 @@@ static int falcon_probe_nic(struct ef4_
         if (!nic_data)
                 return -ENOMEM;
         efx->nic_data = nic_data;
+       nic_data->efx = efx;
   
         rc = -ENODEV;
   
@@@ -2402,8 -2404,7 +2404,7 @@@
         }
   
         nic_data->stats_disable_count = 1;
-       setup_timer(&nic_data->stats_timer, &falcon_stats_timer_func,
-                   (unsigned long)efx);
+       timer_setup(&nic_data->stats_timer, falcon_stats_timer_func, 0);
   
         return 0;
   
diff --combined drivers/net/ethernet/sfc/falcon/nic.h

index 54ca457cdb15dc79f0d5175c83d530ed3c3440a9,e2e3c008d0738287a67bc0fc8883ba9bb194d4cb..07c62dc552cb923749edba62c9ac3399cdc04681
--- 1/drivers/net/ethernet/sfc/falcon/nic.h
--- 2/drivers/net/ethernet/sfc/falcon/nic.h
+++ b/drivers/net/ethernet/sfc/falcon/nic.h
@@@ -83,7 -83,7 +83,7 @@@ static inline struct ef4_tx_queue *ef4_
   static inline bool __ef4_nic_tx_is_empty(struct ef4_tx_queue *tx_queue,
                                          unsigned int write_count)
   {
- -      unsigned int empty_read_count = ACCESS_ONCE(tx_queue->empty_read_count);
+ +      unsigned int empty_read_count = READ_ONCE(tx_queue->empty_read_count);
   
         if (empty_read_count == 0)
                 return false;
@@@ -267,6 -267,7 +267,7 @@@ enum 
   /**
    * struct falcon_nic_data - Falcon NIC state
    * @pci_dev2: Secondary function of Falcon A
+  * @efx: ef4_nic pointer
    * @board: Board state and functions
    * @stats: Hardware statistics
    * @stats_disable_count: Nest count for disabling statistics fetches
@@@ -280,6 -281,7 +281,7 @@@
    */
   struct falcon_nic_data {
         struct pci_dev *pci_dev2;
+       struct ef4_nic *efx;
         struct falcon_board board;
         u64 stats[FALCON_STAT_COUNT];
         unsigned int stats_disable_count;
@@@ -464,11 -466,11 +466,11 @@@ irqreturn_t ef4_farch_fatal_interrupt(s
   
   static inline int ef4_nic_event_test_irq_cpu(struct ef4_channel *channel)
   {
- -      return ACCESS_ONCE(channel->event_test_cpu);
+ +      return READ_ONCE(channel->event_test_cpu);
   }
   static inline int ef4_nic_irq_test_irq_cpu(struct ef4_nic *efx)
   {
- -      return ACCESS_ONCE(efx->last_irq_cpu);
+ +      return READ_ONCE(efx->last_irq_cpu);
   }
   
   /* Global Resources */
diff --combined drivers/net/ethernet/sfc/falcon/tx.c

index 6486814e97dccee08431dea134739d2840c6c991,1b978d69e702467dfdb9166157fc6d6daecdf3c8..3409bbf5b19fffbc5ec3538e592173f854eb8791
--- 1/drivers/net/ethernet/sfc/falcon/tx.c
--- 2/drivers/net/ethernet/sfc/falcon/tx.c
+++ b/drivers/net/ethernet/sfc/falcon/tx.c
@@@ -134,8 -134,8 +134,8 @@@ static void ef4_tx_maybe_stop_queue(str
          */
         netif_tx_stop_queue(txq1->core_txq);
         smp_mb();
- -      txq1->old_read_count = ACCESS_ONCE(txq1->read_count);
- -      txq2->old_read_count = ACCESS_ONCE(txq2->read_count);
+ +      txq1->old_read_count = READ_ONCE(txq1->read_count);
+ +      txq2->old_read_count = READ_ONCE(txq2->read_count);
   
         fill_level = max(txq1->insert_count - txq1->old_read_count,
                          txq2->insert_count - txq2->old_read_count);
@@@ -435,7 -435,7 +435,7 @@@ int ef4_setup_tc(struct net_device *net
         unsigned tc, num_tc;
         int rc;
   
-       if (type != TC_SETUP_MQPRIO)
+       if (type != TC_SETUP_QDISC_MQPRIO)
                 return -EOPNOTSUPP;
   
         num_tc = mqprio->num_tc;
@@@ -524,7 -524,7 +524,7 @@@ void ef4_xmit_done(struct ef4_tx_queue 
   
         /* Check whether the hardware queue is now empty */
         if ((int)(tx_queue->read_count - tx_queue->old_write_count) >= 0) {
- -              tx_queue->old_write_count = ACCESS_ONCE(tx_queue->write_count);
+ +              tx_queue->old_write_count = READ_ONCE(tx_queue->write_count);
                 if (tx_queue->read_count == tx_queue->old_write_count) {
                         smp_mb();
                         tx_queue->empty_read_count =
diff --combined drivers/net/ethernet/sfc/farch.c

index 86454d25a405ecbcdd3dd604b963659e0d128541,6608dfe455b17beb87eb79bfbb6438b18b22a71b..5334dc83d926024e854420f8075d1b9e0a5af04c
--- 1/drivers/net/ethernet/sfc/farch.c
--- 2/drivers/net/ethernet/sfc/farch.c
+++ b/drivers/net/ethernet/sfc/farch.c
@@@ -827,7 -827,7 +827,7 @@@ efx_farch_handle_tx_event(struct efx_ch
         struct efx_nic *efx = channel->efx;
         int tx_packets = 0;
   
- -      if (unlikely(ACCESS_ONCE(efx->reset_pending)))
+ +      if (unlikely(READ_ONCE(efx->reset_pending)))
                 return 0;
   
         if (likely(EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_COMP))) {
@@@ -927,6 -927,10 +927,10 @@@ static u16 efx_farch_handle_rx_not_ok(s
         }
   #endif
   
+       if (efx->net_dev->features & NETIF_F_RXALL)
+               /* don't discard frame for CRC error */
+               rx_ev_eth_crc_err = false;
+ 
         /* The frame must be discarded if any of these are true. */
         return (rx_ev_eth_crc_err | rx_ev_frm_trunc |
                 rx_ev_tobe_disc | rx_ev_pause_frm) ?
@@@ -979,7 -983,7 +983,7 @@@ efx_farch_handle_rx_event(struct efx_ch
         struct efx_rx_queue *rx_queue;
         struct efx_nic *efx = channel->efx;
   
- -      if (unlikely(ACCESS_ONCE(efx->reset_pending)))
+ +      if (unlikely(READ_ONCE(efx->reset_pending)))
                 return;
   
         rx_ev_cont = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_JUMBO_CONT);
@@@ -1520,7 -1524,7 +1524,7 @@@ irqreturn_t efx_farch_fatal_interrupt(s
   irqreturn_t efx_farch_legacy_interrupt(int irq, void *dev_id)
   {
         struct efx_nic *efx = dev_id;
- -      bool soft_enabled = ACCESS_ONCE(efx->irq_soft_enabled);
+ +      bool soft_enabled = READ_ONCE(efx->irq_soft_enabled);
         efx_oword_t *int_ker = efx->irq_status.addr;
         irqreturn_t result = IRQ_NONE;
         struct efx_channel *channel;
@@@ -1612,7 -1616,7 +1616,7 @@@ irqreturn_t efx_farch_msi_interrupt(in
                    "IRQ %d on CPU %d status " EFX_OWORD_FMT "\n",
                    irq, raw_smp_processor_id(), EFX_OWORD_VAL(*int_ker));
   
- -      if (!likely(ACCESS_ONCE(efx->irq_soft_enabled)))
+ +      if (!likely(READ_ONCE(efx->irq_soft_enabled)))
                 return IRQ_HANDLED;
   
         /* Handle non-event-queue sources */
diff --combined drivers/net/ethernet/sfc/ptp.c

index 56c2db398deff5f250f8cb729618daa61aa58cc7,4f54245df0ec0dd685895059e87a6d9cd1abe958..caa89bf7603e398d955ff929c94284dc66d357fb
--- 1/drivers/net/ethernet/sfc/ptp.c
--- 2/drivers/net/ethernet/sfc/ptp.c
+++ b/drivers/net/ethernet/sfc/ptp.c
@@@ -648,17 -648,15 +648,15 @@@ static void efx_ptp_send_times(struct e
         struct pps_event_time now;
         struct timespec64 limit;
         struct efx_ptp_data *ptp = efx->ptp_data;
-       struct timespec64 start;
         int *mc_running = ptp->start.addr;
   
         pps_get_ts(&now);
-       start = now.ts_real;
         limit = now.ts_real;
         timespec64_add_ns(&limit, SYNCHRONISE_PERIOD_NS);
   
         /* Write host time for specified period or until MC is done */
         while ((timespec64_compare(&now.ts_real, &limit) < 0) &&
- -             ACCESS_ONCE(*mc_running)) {
+ +             READ_ONCE(*mc_running)) {
                 struct timespec64 update_time;
                 unsigned int host_time;
   
@@@ -668,7 -666,7 +666,7 @@@
                 do {
                         pps_get_ts(&now);
                 } while ((timespec64_compare(&now.ts_real, &update_time) < 0) &&
- -                       ACCESS_ONCE(*mc_running));
+ +                       READ_ONCE(*mc_running));
   
                 /* Synchronise NIC with single word of time only */
                 host_time = (now.ts_real.tv_sec << MC_NANOSECOND_BITS |
@@@ -832,14 -830,14 +830,14 @@@ static int efx_ptp_synchronize(struct e
                        ptp->start.dma_addr);
   
         /* Clear flag that signals MC ready */
- -      ACCESS_ONCE(*start) = 0;
+ +      WRITE_ONCE(*start, 0);
         rc = efx_mcdi_rpc_start(efx, MC_CMD_PTP, synch_buf,
                                 MC_CMD_PTP_IN_SYNCHRONIZE_LEN);
         EFX_WARN_ON_ONCE_PARANOID(rc);
   
         /* Wait for start from MCDI (or timeout) */
         timeout = jiffies + msecs_to_jiffies(MAX_SYNCHRONISE_WAIT_MS);
- -      while (!ACCESS_ONCE(*start) && (time_before(jiffies, timeout))) {
+ +      while (!READ_ONCE(*start) && (time_before(jiffies, timeout))) {
                 udelay(20);     /* Usually start MCDI execution quickly */
                 loops++;
         }
@@@ -849,7 -847,7 +847,7 @@@
         if (!time_before(jiffies, timeout))
                 ++ptp->sync_timeouts;
   
- -      if (ACCESS_ONCE(*start))
+ +      if (READ_ONCE(*start))
                 efx_ptp_send_times(efx, &last_time);
   
         /* Collect results */
diff --combined drivers/net/ethernet/sfc/tx.c

index efb66ea21f27d3d8fd458bba54bdd046a1a936a2,ea27b8a7f46502e72b48d5c02024adfa8f965d9c..0ea7e16f2e6e2c6d8106308e73327390e62074ce
--- 1/drivers/net/ethernet/sfc/tx.c
--- 2/drivers/net/ethernet/sfc/tx.c
+++ b/drivers/net/ethernet/sfc/tx.c
@@@ -136,8 -136,8 +136,8 @@@ static void efx_tx_maybe_stop_queue(str
          */
         netif_tx_stop_queue(txq1->core_txq);
         smp_mb();
- -      txq1->old_read_count = ACCESS_ONCE(txq1->read_count);
- -      txq2->old_read_count = ACCESS_ONCE(txq2->read_count);
+ +      txq1->old_read_count = READ_ONCE(txq1->read_count);
+ +      txq2->old_read_count = READ_ONCE(txq2->read_count);
   
         fill_level = max(txq1->insert_count - txq1->old_read_count,
                          txq2->insert_count - txq2->old_read_count);
@@@ -663,7 -663,7 +663,7 @@@ int efx_setup_tc(struct net_device *net
         unsigned tc, num_tc;
         int rc;
   
-       if (type != TC_SETUP_MQPRIO)
+       if (type != TC_SETUP_QDISC_MQPRIO)
                 return -EOPNOTSUPP;
   
         num_tc = mqprio->num_tc;
@@@ -752,7 -752,7 +752,7 @@@ void efx_xmit_done(struct efx_tx_queue 
   
         /* Check whether the hardware queue is now empty */
         if ((int)(tx_queue->read_count - tx_queue->old_write_count) >= 0) {
- -              tx_queue->old_write_count = ACCESS_ONCE(tx_queue->write_count);
+ +              tx_queue->old_write_count = READ_ONCE(tx_queue->write_count);
                 if (tx_queue->read_count == tx_queue->old_write_count) {
                         smp_mb();
                         tx_queue->empty_read_count =
diff --combined drivers/net/ethernet/sun/niu.c

index 8ab0fb6892d5d3562e891d574331d816acb69a51,ab502ee35fb29cbcd886355cbeeecb7d04532cc2..06001bacbe0fe8e6f648168b47aacff6c7a4d31a
--- 1/drivers/net/ethernet/sun/niu.c
--- 2/drivers/net/ethernet/sun/niu.c
+++ b/drivers/net/ethernet/sun/niu.c
@@@ -2221,9 -2221,9 +2221,9 @@@ static int niu_link_status(struct niu *
         return err;
   }
   
- static void niu_timer(unsigned long __opaque)
+ static void niu_timer(struct timer_list *t)
   {
-       struct niu *np = (struct niu *) __opaque;
+       struct niu *np = from_timer(np, t, timer);
         unsigned long off;
         int err, link_up;
   
@@@ -6123,10 -6123,8 +6123,8 @@@ static int niu_open(struct net_device *
   
         err = niu_init_hw(np);
         if (!err) {
-               init_timer(&np->timer);
+               timer_setup(&np->timer, niu_timer, 0);
                 np->timer.expires = jiffies + HZ;
-               np->timer.data = (unsigned long) np;
-               np->timer.function = niu_timer;
   
                 err = niu_enable_interrupts(np, 1);
                 if (err)
@@@ -6245,7 -6243,7 +6243,7 @@@ static void niu_get_rx_stats(struct ni
   
         pkts = dropped = errors = bytes = 0;
   
- -      rx_rings = ACCESS_ONCE(np->rx_rings);
+ +      rx_rings = READ_ONCE(np->rx_rings);
         if (!rx_rings)
                 goto no_rings;
   
@@@ -6276,7 -6274,7 +6274,7 @@@ static void niu_get_tx_stats(struct ni
   
         pkts = errors = bytes = 0;
   
- -      tx_rings = ACCESS_ONCE(np->tx_rings);
+ +      tx_rings = READ_ONCE(np->tx_rings);
         if (!tx_rings)
                 goto no_rings;
   
@@@ -6775,10 -6773,8 +6773,8 @@@ static int niu_change_mtu(struct net_de
   
         err = niu_init_hw(np);
         if (!err) {
-               init_timer(&np->timer);
+               timer_setup(&np->timer, niu_timer, 0);
                 np->timer.expires = jiffies + HZ;
-               np->timer.data = (unsigned long) np;
-               np->timer.function = niu_timer;
   
                 err = niu_enable_interrupts(np, 1);
                 if (err)
diff --combined drivers/net/hamradio/yam.c

index 104f71fa9c5ed342df1988dc86d5389e1c49673b,b88c5cc00a6320ae3e241c8afcd569ec2d9e73e7..14c3632b8cde3cc95a25d98bc3ff826ed529205e
--- 1/drivers/net/hamradio/yam.c
--- 2/drivers/net/hamradio/yam.c
+++ b/drivers/net/hamradio/yam.c
@@@ -157,7 -157,7 +157,7 @@@ static struct net_device *yam_devs[NR_P
   
   static struct yam_mcs *yam_data;
   
- -static DEFINE_TIMER(yam_timer, NULL, 0, 0);
+ +static DEFINE_TIMER(yam_timer, NULL);
   
   /* --------------------------------------------------------------------- */
   
@@@ -647,7 -647,7 +647,7 @@@ static void yam_arbitrate(struct net_de
         yam_start_tx(dev, yp);
   }
   
- static void yam_dotimer(unsigned long dummy)
+ static void yam_dotimer(struct timer_list *unused)
   {
         int i;
   
@@@ -1164,7 -1164,7 +1164,7 @@@ static int __init yam_init_driver(void
   
         }
   
-       yam_timer.function = yam_dotimer;
+       timer_setup(&yam_timer, yam_dotimer, 0);
         yam_timer.expires = jiffies + HZ / 100;
         add_timer(&yam_timer);
   
diff --combined drivers/net/tun.c

index c1685a6d788360beb3a1a0b8cf3a01efe157618d,1a326b69722189c60366531c4635ba4f2fb66c99..6bb1e604aadd68b6060df277491478899ccc7e83
--- 1/drivers/net/tun.c
--- 2/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@@ -75,6 -75,7 +75,7 @@@
   #include <linux/skb_array.h>
   #include <linux/bpf.h>
   #include <linux/bpf_trace.h>
+ #include <linux/mutex.h>
   
   #include <linux/uaccess.h>
   
@@@ -121,7 -122,8 +122,8 @@@ do {                                                               
   #define TUN_VNET_BE     0x40000000
   
   #define TUN_FEATURES (IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR | \
-                     IFF_MULTI_QUEUE)
+                     IFF_MULTI_QUEUE | IFF_NAPI | IFF_NAPI_FRAGS)
+ 
   #define GOODCOPY_LEN 128
   
   #define FLT_EXACT_COUNT 8
@@@ -172,6 -174,9 +174,9 @@@ struct tun_file 
                 u16 queue_index;
                 unsigned int ifindex;
         };
+       struct napi_struct napi;
+       bool napi_enabled;
+       struct mutex napi_mutex;        /* Protects access to the above napi */
         struct list_head next;
         struct tun_struct *detached;
         struct skb_array tx_array;
@@@ -229,6 -234,75 +234,75 @@@ struct tun_struct 
         struct bpf_prog __rcu *xdp_prog;
   };
   
+ static int tun_napi_receive(struct napi_struct *napi, int budget)
+ {
+       struct tun_file *tfile = container_of(napi, struct tun_file, napi);
+       struct sk_buff_head *queue = &tfile->sk.sk_write_queue;
+       struct sk_buff_head process_queue;
+       struct sk_buff *skb;
+       int received = 0;
+ 
+       __skb_queue_head_init(&process_queue);
+ 
+       spin_lock(&queue->lock);
+       skb_queue_splice_tail_init(queue, &process_queue);
+       spin_unlock(&queue->lock);
+ 
+       while (received < budget && (skb = __skb_dequeue(&process_queue))) {
+               napi_gro_receive(napi, skb);
+               ++received;
+       }
+ 
+       if (!skb_queue_empty(&process_queue)) {
+               spin_lock(&queue->lock);
+               skb_queue_splice(&process_queue, queue);
+               spin_unlock(&queue->lock);
+       }
+ 
+       return received;
+ }
+ 
+ static int tun_napi_poll(struct napi_struct *napi, int budget)
+ {
+       unsigned int received;
+ 
+       received = tun_napi_receive(napi, budget);
+ 
+       if (received < budget)
+               napi_complete_done(napi, received);
+ 
+       return received;
+ }
+ 
+ static void tun_napi_init(struct tun_struct *tun, struct tun_file *tfile,
+                         bool napi_en)
+ {
+       tfile->napi_enabled = napi_en;
+       if (napi_en) {
+               netif_napi_add(tun->dev, &tfile->napi, tun_napi_poll,
+                              NAPI_POLL_WEIGHT);
+               napi_enable(&tfile->napi);
+               mutex_init(&tfile->napi_mutex);
+       }
+ }
+ 
+ static void tun_napi_disable(struct tun_struct *tun, struct tun_file *tfile)
+ {
+       if (tfile->napi_enabled)
+               napi_disable(&tfile->napi);
+ }
+ 
+ static void tun_napi_del(struct tun_struct *tun, struct tun_file *tfile)
+ {
+       if (tfile->napi_enabled)
+               netif_napi_del(&tfile->napi);
+ }
+ 
+ static bool tun_napi_frags_enabled(const struct tun_struct *tun)
+ {
+       return READ_ONCE(tun->flags) & IFF_NAPI_FRAGS;
+ }
+ 
   #ifdef CONFIG_TUN_VNET_CROSS_LE
   static inline bool tun_legacy_is_little_endian(struct tun_struct *tun)
   {
@@@ -380,25 -454,28 +454,28 @@@ static void tun_flow_cleanup(unsigned l
   
         tun_debug(KERN_INFO, tun, "tun_flow_cleanup\n");
   
-       spin_lock_bh(&tun->lock);
+       spin_lock(&tun->lock);
         for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) {
                 struct tun_flow_entry *e;
                 struct hlist_node *n;
   
                 hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) {
                         unsigned long this_timer;
-                       count++;
+ 
                         this_timer = e->updated + delay;
-                       if (time_before_eq(this_timer, jiffies))
+                       if (time_before_eq(this_timer, jiffies)) {
                                 tun_flow_delete(tun, e);
-                       else if (time_before(this_timer, next_timer))
+                               continue;
+                       }
+                       count++;
+                       if (time_before(this_timer, next_timer))
                                 next_timer = this_timer;
                 }
         }
   
         if (count)
                 mod_timer(&tun->flow_gc_timer, round_jiffies_up(next_timer));
-       spin_unlock_bh(&tun->lock);
+       spin_unlock(&tun->lock);
   }
   
   static void tun_flow_update(struct tun_struct *tun, u32 rxhash,
@@@ -469,7 -546,7 +546,7 @@@ static u16 tun_select_queue(struct net_
         u32 numqueues = 0;
   
         rcu_read_lock();
- -      numqueues = ACCESS_ONCE(tun->numqueues);
+ +      numqueues = READ_ONCE(tun->numqueues);
   
         txq = __skb_get_hash_symmetric(skb);
         if (txq) {
@@@ -541,6 -618,11 +618,11 @@@ static void __tun_detach(struct tun_fil
   
         tun = rtnl_dereference(tfile->tun);
   
+       if (tun && clean) {
+               tun_napi_disable(tun, tfile);
+               tun_napi_del(tun, tfile);
+       }
+ 
         if (tun && !tfile->detached) {
                 u16 index = tfile->queue_index;
                 BUG_ON(index >= tun->numqueues);
@@@ -598,6 -680,7 +680,7 @@@ static void tun_detach_all(struct net_d
         for (i = 0; i < n; i++) {
                 tfile = rtnl_dereference(tun->tfiles[i]);
                 BUG_ON(!tfile);
+               tun_napi_disable(tun, tfile);
                 tfile->socket.sk->sk_shutdown = RCV_SHUTDOWN;
                 tfile->socket.sk->sk_data_ready(tfile->socket.sk);
                 RCU_INIT_POINTER(tfile->tun, NULL);
@@@ -613,6 -696,7 +696,7 @@@
         synchronize_net();
         for (i = 0; i < n; i++) {
                 tfile = rtnl_dereference(tun->tfiles[i]);
+               tun_napi_del(tun, tfile);
                 /* Drop read queue */
                 tun_queue_purge(tfile);
                 sock_put(&tfile->sk);
@@@ -631,7 -715,8 +715,8 @@@
                 module_put(THIS_MODULE);
   }
   
- static int tun_attach(struct tun_struct *tun, struct file *file, bool skip_filter)
+ static int tun_attach(struct tun_struct *tun, struct file *file,
+                     bool skip_filter, bool napi)
   {
         struct tun_file *tfile = file->private_data;
         struct net_device *dev = tun->dev;
@@@ -677,10 -762,12 +762,12 @@@
         rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile);
         tun->numqueues++;
   
-       if (tfile->detached)
+       if (tfile->detached) {
                 tun_enable_queue(tfile);
-       else
+       } else {
                 sock_hold(&tfile->sk);
+               tun_napi_init(tun, tfile, napi);
+       }
   
         tun_set_real_num_queues(tun);
   
@@@ -692,7 -779,7 +779,7 @@@ out
         return err;
   }
   
- static struct tun_struct *__tun_get(struct tun_file *tfile)
+ static struct tun_struct *tun_get(struct tun_file *tfile)
   {
         struct tun_struct *tun;
   
@@@ -705,11 -792,6 +792,6 @@@
         return tun;
   }
   
- static struct tun_struct *tun_get(struct file *file)
- {
-       return __tun_get(file->private_data);
- }
- 
   static void tun_put(struct tun_struct *tun)
   {
         dev_put(tun->dev);
@@@ -864,7 -946,7 +946,7 @@@ static netdev_tx_t tun_net_xmit(struct 
   
         rcu_read_lock();
         tfile = rcu_dereference(tun->tfiles[txq]);
- -      numqueues = ACCESS_ONCE(tun->numqueues);
+ +      numqueues = READ_ONCE(tun->numqueues);
   
         /* Drop packet if interface is not attached */
         if (txq >= numqueues)
@@@ -956,13 -1038,33 +1038,33 @@@ static void tun_poll_controller(struct 
          * Tun only receives frames when:
          * 1) the char device endpoint gets data from user space
          * 2) the tun socket gets a sendmsg call from user space
-        * Since both of those are synchronous operations, we are guaranteed
-        * never to have pending data when we poll for it
-        * so there is nothing to do here but return.
+        * If NAPI is not enabled, since both of those are synchronous
+        * operations, we are guaranteed never to have pending data when we poll
+        * for it so there is nothing to do here but return.
          * We need this though so netpoll recognizes us as an interface that
          * supports polling, which enables bridge devices in virt setups to
          * still use netconsole
+        * If NAPI is enabled, however, we need to schedule polling for all
+        * queues unless we are using napi_gro_frags(), which we call in
+        * process context and not in NAPI context.
          */
+       struct tun_struct *tun = netdev_priv(dev);
+ 
+       if (tun->flags & IFF_NAPI) {
+               struct tun_file *tfile;
+               int i;
+ 
+               if (tun_napi_frags_enabled(tun))
+                       return;
+ 
+               rcu_read_lock();
+               for (i = 0; i < tun->numqueues; i++) {
+                       tfile = rcu_dereference(tun->tfiles[i]);
+                       if (tfile->napi_enabled)
+                               napi_schedule(&tfile->napi);
+               }
+               rcu_read_unlock();
+       }
         return;
   }
   #endif
@@@ -1039,7 -1141,7 +1141,7 @@@ static u32 tun_xdp_query(struct net_dev
         return 0;
   }
   
- static int tun_xdp(struct net_device *dev, struct netdev_xdp *xdp)
+ static int tun_xdp(struct net_device *dev, struct netdev_bpf *xdp)
   {
         switch (xdp->command) {
         case XDP_SETUP_PROG:
@@@ -1083,7 -1185,7 +1185,7 @@@ static const struct net_device_ops tap_
         .ndo_features_check     = passthru_features_check,
         .ndo_set_rx_headroom    = tun_set_headroom,
         .ndo_get_stats64        = tun_net_get_stats64,
-       .ndo_xdp                = tun_xdp,
+       .ndo_bpf                = tun_xdp,
   };
   
   static void tun_flow_init(struct tun_struct *tun)
@@@ -1095,8 -1197,6 +1197,6 @@@
   
         tun->ageing_time = TUN_FLOW_EXPIRE;
         setup_timer(&tun->flow_gc_timer, tun_flow_cleanup, (unsigned long)tun);
-       mod_timer(&tun->flow_gc_timer,
-                 round_jiffies_up(jiffies + tun->ageing_time));
   }
   
   static void tun_flow_uninit(struct tun_struct *tun)
@@@ -1149,7 -1249,7 +1249,7 @@@ static void tun_net_init(struct net_dev
   static unsigned int tun_chr_poll(struct file *file, poll_table *wait)
   {
         struct tun_file *tfile = file->private_data;
-       struct tun_struct *tun = __tun_get(tfile);
+       struct tun_struct *tun = tun_get(tfile);
         struct sock *sk;
         unsigned int mask = 0;
   
@@@ -1178,6 -1278,64 +1278,64 @@@
         return mask;
   }
   
+ static struct sk_buff *tun_napi_alloc_frags(struct tun_file *tfile,
+                                           size_t len,
+                                           const struct iov_iter *it)
+ {
+       struct sk_buff *skb;
+       size_t linear;
+       int err;
+       int i;
+ 
+       if (it->nr_segs > MAX_SKB_FRAGS + 1)
+               return ERR_PTR(-ENOMEM);
+ 
+       local_bh_disable();
+       skb = napi_get_frags(&tfile->napi);
+       local_bh_enable();
+       if (!skb)
+               return ERR_PTR(-ENOMEM);
+ 
+       linear = iov_iter_single_seg_count(it);
+       err = __skb_grow(skb, linear);
+       if (err)
+               goto free;
+ 
+       skb->len = len;
+       skb->data_len = len - linear;
+       skb->truesize += skb->data_len;
+ 
+       for (i = 1; i < it->nr_segs; i++) {
+               size_t fragsz = it->iov[i].iov_len;
+               unsigned long offset;
+               struct page *page;
+               void *data;
+ 
+               if (fragsz == 0 || fragsz > PAGE_SIZE) {
+                       err = -EINVAL;
+                       goto free;
+               }
+ 
+               local_bh_disable();
+               data = napi_alloc_frag(fragsz);
+               local_bh_enable();
+               if (!data) {
+                       err = -ENOMEM;
+                       goto free;
+               }
+ 
+               page = virt_to_head_page(data);
+               offset = data - page_address(page);
+               skb_fill_page_desc(skb, i - 1, page, offset, fragsz);
+       }
+ 
+       return skb;
+ free:
+       /* frees skb and all frags allocated with napi_alloc_frag() */
+       napi_free_frags(&tfile->napi);
+       return ERR_PTR(err);
+ }
+ 
   /* prepad is the amount to reserve at front.  len is length after that.
    * linear is a hint as to how much to copy (usually headers). */
   static struct sk_buff *tun_alloc_skb(struct tun_file *tfile,
@@@ -1315,6 -1473,7 +1473,7 @@@ static struct sk_buff *tun_build_skb(st
   
                 xdp.data_hard_start = buf;
                 xdp.data = buf + pad;
+               xdp_set_data_meta_invalid(&xdp);
                 xdp.data_end = xdp.data + len;
                 orig_data = xdp.data;
                 act = bpf_prog_run_xdp(xdp_prog, &xdp);
@@@ -1391,6 -1550,7 +1550,7 @@@ static ssize_t tun_get_user(struct tun_
         int err;
         u32 rxhash;
         int skb_xdp = 1;
+       bool frags = tun_napi_frags_enabled(tun);
   
         if (!(tun->dev->flags & IFF_UP))
                 return -EIO;
@@@ -1448,7 -1608,7 +1608,7 @@@
                         zerocopy = true;
         }
   
-       if (tun_can_build_skb(tun, tfile, len, noblock, zerocopy)) {
+       if (!frags && tun_can_build_skb(tun, tfile, len, noblock, zerocopy)) {
                 /* For the packet that is not easy to be processed
                  * (e.g gso or jumbo packet), we will do it at after
                  * skb was created with generic XDP routine.
@@@ -1469,10 -1629,24 +1629,24 @@@
                                 linear = tun16_to_cpu(tun, gso.hdr_len);
                 }
   
-               skb = tun_alloc_skb(tfile, align, copylen, linear, noblock);
+               if (frags) {
+                       mutex_lock(&tfile->napi_mutex);
+                       skb = tun_napi_alloc_frags(tfile, copylen, from);
+                       /* tun_napi_alloc_frags() enforces a layout for the skb.
+                        * If zerocopy is enabled, then this layout will be
+                        * overwritten by zerocopy_sg_from_iter().
+                        */
+                       zerocopy = false;
+               } else {
+                       skb = tun_alloc_skb(tfile, align, copylen, linear,
+                                           noblock);
+               }
+ 
                 if (IS_ERR(skb)) {
                         if (PTR_ERR(skb) != -EAGAIN)
                                 this_cpu_inc(tun->pcpu_stats->rx_dropped);
+                       if (frags)
+                               mutex_unlock(&tfile->napi_mutex);
                         return PTR_ERR(skb);
                 }
   
@@@ -1484,6 -1658,11 +1658,11 @@@
                 if (err) {
                         this_cpu_inc(tun->pcpu_stats->rx_dropped);
                         kfree_skb(skb);
+                       if (frags) {
+                               tfile->napi.skb = NULL;
+                               mutex_unlock(&tfile->napi_mutex);
+                       }
+ 
                         return -EFAULT;
                 }
         }
@@@ -1491,6 -1670,11 +1670,11 @@@
         if (virtio_net_hdr_to_skb(skb, &gso, tun_is_little_endian(tun))) {
                 this_cpu_inc(tun->pcpu_stats->rx_frame_errors);
                 kfree_skb(skb);
+               if (frags) {
+                       tfile->napi.skb = NULL;
+                       mutex_unlock(&tfile->napi_mutex);
+               }
+ 
                 return -EINVAL;
         }
   
@@@ -1518,7 -1702,8 +1702,8 @@@
                 skb->dev = tun->dev;
                 break;
         case IFF_TAP:
-               skb->protocol = eth_type_trans(skb, tun->dev);
+               if (!frags)
+                       skb->protocol = eth_type_trans(skb, tun->dev);
                 break;
         }
   
@@@ -1552,11 -1737,41 +1737,41 @@@
         }
   
         rxhash = __skb_get_hash_symmetric(skb);
- #ifndef CONFIG_4KSTACKS
-       tun_rx_batched(tun, tfile, skb, more);
- #else
-       netif_rx_ni(skb);
- #endif
+ 
+       if (frags) {
+               /* Exercise flow dissector code path. */
+               u32 headlen = eth_get_headlen(skb->data, skb_headlen(skb));
+ 
+               if (unlikely(headlen > skb_headlen(skb))) {
+                       this_cpu_inc(tun->pcpu_stats->rx_dropped);
+                       napi_free_frags(&tfile->napi);
+                       mutex_unlock(&tfile->napi_mutex);
+                       WARN_ON(1);
+                       return -ENOMEM;
+               }
+ 
+               local_bh_disable();
+               napi_gro_frags(&tfile->napi);
+               local_bh_enable();
+               mutex_unlock(&tfile->napi_mutex);
+       } else if (tfile->napi_enabled) {
+               struct sk_buff_head *queue = &tfile->sk.sk_write_queue;
+               int queue_len;
+ 
+               spin_lock_bh(&queue->lock);
+               __skb_queue_tail(queue, skb);
+               queue_len = skb_queue_len(queue);
+               spin_unlock(&queue->lock);
+ 
+               if (!more || queue_len > NAPI_POLL_WEIGHT)
+                       napi_schedule(&tfile->napi);
+ 
+               local_bh_enable();
+       } else if (!IS_ENABLED(CONFIG_4KSTACKS)) {
+               tun_rx_batched(tun, tfile, skb, more);
+       } else {
+               netif_rx_ni(skb);
+       }
   
         stats = get_cpu_ptr(tun->pcpu_stats);
         u64_stats_update_begin(&stats->syncp);
@@@ -1572,8 -1787,8 +1787,8 @@@
   static ssize_t tun_chr_write_iter(struct kiocb *iocb, struct iov_iter *from)
   {
         struct file *file = iocb->ki_filp;
-       struct tun_struct *tun = tun_get(file);
         struct tun_file *tfile = file->private_data;
+       struct tun_struct *tun = tun_get(tfile);
         ssize_t result;
   
         if (!tun)
@@@ -1757,7 -1972,7 +1972,7 @@@ static ssize_t tun_chr_read_iter(struc
   {
         struct file *file = iocb->ki_filp;
         struct tun_file *tfile = file->private_data;
-       struct tun_struct *tun = __tun_get(tfile);
+       struct tun_struct *tun = tun_get(tfile);
         ssize_t len = iov_iter_count(to), ret;
   
         if (!tun)
@@@ -1834,7 -2049,7 +2049,7 @@@ static int tun_sendmsg(struct socket *s
   {
         int ret;
         struct tun_file *tfile = container_of(sock, struct tun_file, socket);
-       struct tun_struct *tun = __tun_get(tfile);
+       struct tun_struct *tun = tun_get(tfile);
   
         if (!tun)
                 return -EBADFD;
@@@ -1850,7 -2065,7 +2065,7 @@@ static int tun_recvmsg(struct socket *s
                        int flags)
   {
         struct tun_file *tfile = container_of(sock, struct tun_file, socket);
-       struct tun_struct *tun = __tun_get(tfile);
+       struct tun_struct *tun = tun_get(tfile);
         int ret;
   
         if (!tun)
@@@ -1882,7 -2097,7 +2097,7 @@@ static int tun_peek_len(struct socket *
         struct tun_struct *tun;
         int ret = 0;
   
-       tun = __tun_get(tfile);
+       tun = tun_get(tfile);
         if (!tun)
                 return 0;
   
@@@ -1962,6 -2177,15 +2177,15 @@@ static int tun_set_iff(struct net *net
         if (tfile->detached)
                 return -EINVAL;
   
+       if ((ifr->ifr_flags & IFF_NAPI_FRAGS)) {
+               if (!capable(CAP_NET_ADMIN))
+                       return -EPERM;
+ 
+               if (!(ifr->ifr_flags & IFF_NAPI) ||
+                   (ifr->ifr_flags & TUN_TYPE_MASK) != IFF_TAP)
+                       return -EINVAL;
+       }
+ 
         dev = __dev_get_by_name(net, ifr->ifr_name);
         if (dev) {
                 if (ifr->ifr_flags & IFF_TUN_EXCL)
@@@ -1983,7 -2207,8 +2207,8 @@@
                 if (err < 0)
                         return err;
   
-               err = tun_attach(tun, file, ifr->ifr_flags & IFF_NOFILTER);
+               err = tun_attach(tun, file, ifr->ifr_flags & IFF_NOFILTER,
+                                ifr->ifr_flags & IFF_NAPI);
                 if (err < 0)
                         return err;
   
@@@ -2072,7 -2297,7 +2297,7 @@@
                                        NETIF_F_HW_VLAN_STAG_TX);
   
                 INIT_LIST_HEAD(&tun->disabled);
-               err = tun_attach(tun, file, false);
+               err = tun_attach(tun, file, false, ifr->ifr_flags & IFF_NAPI);
                 if (err < 0)
                         goto err_free_flow;
   
@@@ -2222,7 -2447,7 +2447,7 @@@ static int tun_set_queue(struct file *f
                 ret = security_tun_dev_attach_queue(tun->security);
                 if (ret < 0)
                         goto unlock;
-               ret = tun_attach(tun, file, false);
+               ret = tun_attach(tun, file, false, tun->flags & IFF_NAPI);
         } else if (ifr->ifr_flags & IFF_DETACH_QUEUE) {
                 tun = rtnl_dereference(tfile->tun);
                 if (!tun || !(tun->flags & IFF_MULTI_QUEUE) || tfile->detached)
@@@ -2271,7 -2496,7 +2496,7 @@@ static long __tun_chr_ioctl(struct fil
         ret = 0;
         rtnl_lock();
   
-       tun = __tun_get(tfile);
+       tun = tun_get(tfile);
         if (cmd == TUNSETIFF) {
                 ret = -EEXIST;
                 if (tun)
@@@ -2622,15 -2847,16 +2847,16 @@@ static int tun_chr_close(struct inode *
   }
   
   #ifdef CONFIG_PROC_FS
- static void tun_chr_show_fdinfo(struct seq_file *m, struct file *f)
+ static void tun_chr_show_fdinfo(struct seq_file *m, struct file *file)
   {
+       struct tun_file *tfile = file->private_data;
         struct tun_struct *tun;
         struct ifreq ifr;
   
         memset(&ifr, 0, sizeof(ifr));
   
         rtnl_lock();
-       tun = tun_get(f);
+       tun = tun_get(tfile);
         if (tun)
                 tun_get_iff(current->nsproxy->net_ns, tun, &ifr);
         rtnl_unlock();
diff --combined drivers/net/vxlan.c

index 3247d2feda07f8a671fe32aa45225cfbd34c50ff,c437707a8549ea02c38c2d4a6646470154e8760c..7ac487031b4bca89b13f6c6fa5312651e1901661
--- 1/drivers/net/vxlan.c
--- 2/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@@ -1623,26 -1623,19 +1623,19 @@@ static struct sk_buff *vxlan_na_create(
   static int neigh_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
   {
         struct vxlan_dev *vxlan = netdev_priv(dev);
-       struct nd_msg *msg;
-       const struct ipv6hdr *iphdr;
         const struct in6_addr *daddr;
-       struct neighbour *n;
+       const struct ipv6hdr *iphdr;
         struct inet6_dev *in6_dev;
+       struct neighbour *n;
+       struct nd_msg *msg;
   
         in6_dev = __in6_dev_get(dev);
         if (!in6_dev)
                 goto out;
   
-       if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + sizeof(struct nd_msg)))
-               goto out;
- 
         iphdr = ipv6_hdr(skb);
         daddr = &iphdr->daddr;
- 
         msg = (struct nd_msg *)(iphdr + 1);
-       if (msg->icmph.icmp6_code != 0 ||
-           msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION)
-               goto out;
   
         if (ipv6_addr_loopback(daddr) ||
             ipv6_addr_is_multicast(&msg->target))
@@@ -2240,11 -2233,11 +2233,11 @@@ tx_error
   static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
   {
         struct vxlan_dev *vxlan = netdev_priv(dev);
+       struct vxlan_rdst *rdst, *fdst = NULL;
         const struct ip_tunnel_info *info;
-       struct ethhdr *eth;
         bool did_rsc = false;
-       struct vxlan_rdst *rdst, *fdst = NULL;
         struct vxlan_fdb *f;
+       struct ethhdr *eth;
         __be32 vni = 0;
   
         info = skb_tunnel_info(skb);
@@@ -2269,12 -2262,14 +2262,14 @@@
                 if (ntohs(eth->h_proto) == ETH_P_ARP)
                         return arp_reduce(dev, skb, vni);
   #if IS_ENABLED(CONFIG_IPV6)
-               else if (ntohs(eth->h_proto) == ETH_P_IPV6) {
-                       struct ipv6hdr *hdr, _hdr;
-                       if ((hdr = skb_header_pointer(skb,
-                                                     skb_network_offset(skb),
-                                                     sizeof(_hdr), &_hdr)) &&
-                           hdr->nexthdr == IPPROTO_ICMPV6)
+               else if (ntohs(eth->h_proto) == ETH_P_IPV6 &&
+                        pskb_may_pull(skb, sizeof(struct ipv6hdr) +
+                                           sizeof(struct nd_msg)) &&
+                        ipv6_hdr(skb)->nexthdr == IPPROTO_ICMPV6) {
+                       struct nd_msg *m = (struct nd_msg *)(ipv6_hdr(skb) + 1);
+ 
+                       if (m->icmph.icmp6_code == 0 &&
+                           m->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION)
                                 return neigh_reduce(dev, skb, vni);
                 }
   #endif
@@@ -2325,9 -2320,9 +2320,9 @@@
   }
   
   /* Walk the forwarding table and purge stale entries */
- -static void vxlan_cleanup(unsigned long arg)
+ +static void vxlan_cleanup(struct timer_list *t)
   {
- -      struct vxlan_dev *vxlan = (struct vxlan_dev *) arg;
+ +      struct vxlan_dev *vxlan = from_timer(vxlan, t, age_timer);
         unsigned long next_timer = jiffies + FDB_AGE_INTERVAL;
         unsigned int h;
   
@@@ -2647,7 -2642,9 +2642,7 @@@ static void vxlan_setup(struct net_devi
         INIT_LIST_HEAD(&vxlan->next);
         spin_lock_init(&vxlan->hash_lock);
   
- -      init_timer_deferrable(&vxlan->age_timer);
- -      vxlan->age_timer.function = vxlan_cleanup;
- -      vxlan->age_timer.data = (unsigned long) vxlan;
+ +      timer_setup(&vxlan->age_timer, vxlan_cleanup, TIMER_DEFERRABLE);
   
         vxlan->dev = dev;
   
@@@ -3702,6 -3699,7 +3697,7 @@@ static void __net_exit vxlan_exit_net(s
         struct vxlan_net *vn = net_generic(net, vxlan_net_id);
         struct vxlan_dev *vxlan, *next;
         struct net_device *dev, *aux;
+       unsigned int h;
         LIST_HEAD(list);
   
         rtnl_lock();
@@@ -3721,6 -3719,9 +3717,9 @@@
   
         unregister_netdevice_many(&list);
         rtnl_unlock();
+ 
+       for (h = 0; h < PORT_HASH_SIZE; ++h)
+               WARN_ON_ONCE(!hlist_empty(&vn->sock_list[h]));
   }
   
   static struct pernet_operations vxlan_net_ops = {
diff --combined drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c

index 785a0f33b7e66ece24efc4f5b9e97904a2b07957,b2256aa76eb6af2780eaf3b7bd5864262f40806f..e3495ea95553fb2d2056421d5ff0845b8f26e83a
--- 1/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
--- 2/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
@@@ -260,10 -260,11 +260,11 @@@ struct rte_console 
   #define I_HMB_HOST_INT        I_HMB_SW3       /* Miscellaneous Interrupt */
   
   /* tohostmailboxdata */
- #define HMB_DATA_NAKHANDLED   1       /* retransmit NAK'd frame */
- #define HMB_DATA_DEVREADY     2       /* talk to host after enable */
- #define HMB_DATA_FC           4       /* per prio flowcontrol update flag */
- #define HMB_DATA_FWREADY      8       /* fw ready for protocol activity */
+ #define HMB_DATA_NAKHANDLED   0x0001  /* retransmit NAK'd frame */
+ #define HMB_DATA_DEVREADY     0x0002  /* talk to host after enable */
+ #define HMB_DATA_FC           0x0004  /* per prio flowcontrol update flag */
+ #define HMB_DATA_FWREADY      0x0008  /* fw ready for protocol activity */
+ #define HMB_DATA_FWHALT               0x0010  /* firmware halted */
   
   #define HMB_DATA_FCDATA_MASK  0xff000000
   #define HMB_DATA_FCDATA_SHIFT 24
@@@ -1094,6 -1095,10 +1095,10 @@@ static u32 brcmf_sdio_hostmail(struct b
                           offsetof(struct sdpcmd_regs, tosbmailbox));
         bus->sdcnt.f1regdata += 2;
   
+       /* dongle indicates the firmware has halted/crashed */
+       if (hmb_data & HMB_DATA_FWHALT)
+               brcmf_err("mailbox indicates firmware halted\n");
+ 
         /* Dongle recomposed rx frames, accept them again */
         if (hmb_data & HMB_DATA_NAKHANDLED) {
                 brcmf_dbg(SDIO, "Dongle reports NAK handled, expect rtx of %d\n",
@@@ -1151,6 -1156,7 +1156,7 @@@
                          HMB_DATA_NAKHANDLED |
                          HMB_DATA_FC |
                          HMB_DATA_FWREADY |
+                        HMB_DATA_FWHALT |
                          HMB_DATA_FCDATA_MASK | HMB_DATA_VERSION_MASK))
                 brcmf_err("Unknown mailbox data content: 0x%02x\n",
                           hmb_data);
@@@ -3628,7 -3634,7 +3634,7 @@@ static void brcmf_sdio_dataworker(struc
   
         bus->dpc_running = true;
         wmb();
- -      while (ACCESS_ONCE(bus->dpc_triggered)) {
+ +      while (READ_ONCE(bus->dpc_triggered)) {
                 bus->dpc_triggered = false;
                 brcmf_sdio_dpc(bus);
                 bus->idlecount = 0;
@@@ -3979,6 -3985,24 +3985,24 @@@ brcmf_sdio_watchdog(unsigned long data
         }
   }
   
+ static int brcmf_sdio_get_fwname(struct device *dev, u32 chip, u32 chiprev,
+                                u8 *fw_name)
+ {
+       struct brcmf_bus *bus_if = dev_get_drvdata(dev);
+       struct brcmf_sdio_dev *sdiodev = bus_if->bus_priv.sdio;
+       int ret = 0;
+ 
+       if (sdiodev->fw_name[0] != '\0')
+               strlcpy(fw_name, sdiodev->fw_name, BRCMF_FW_NAME_LEN);
+       else
+               ret = brcmf_fw_map_chip_to_name(chip, chiprev,
+                                               brcmf_sdio_fwnames,
+                                               ARRAY_SIZE(brcmf_sdio_fwnames),
+                                               fw_name, NULL);
+ 
+       return ret;
+ }
+ 
   static const struct brcmf_bus_ops brcmf_sdio_bus_ops = {
         .stop = brcmf_sdio_bus_stop,
         .preinit = brcmf_sdio_bus_preinit,
@@@ -3989,6 -4013,7 +4013,7 @@@
         .wowl_config = brcmf_sdio_wowl_config,
         .get_ramsize = brcmf_sdio_bus_get_ramsize,
         .get_memdump = brcmf_sdio_bus_get_memdump,
+       .get_fwname = brcmf_sdio_get_fwname,
   };
   
   static void brcmf_sdio_firmware_callback(struct device *dev, int err,
@@@ -4144,10 -4169,8 +4169,8 @@@ struct brcmf_sdio *brcmf_sdio_probe(str
         init_waitqueue_head(&bus->dcmd_resp_wait);
   
         /* Set up the watchdog timer */
-       init_timer(&bus->timer);
-       bus->timer.data = (unsigned long)bus;
-       bus->timer.function = brcmf_sdio_watchdog;
- 
+       setup_timer(&bus->timer, brcmf_sdio_watchdog,
+                   (unsigned long)bus);
         /* Initialize watchdog thread */
         init_completion(&bus->watchdog_wait);
         bus->watchdog_tsk = kthread_run(brcmf_sdio_watchdog_thread,
diff --combined drivers/net/wireless/intel/iwlwifi/mvm/ops.c

index 0f45f34e39d3d549b78743d4683ef715296c7918,ce718e9c63ec17d66ab6315196f0eaef1471fa47..7078b7e458be84d59e691e88bf791bce9ca9154d
--- 1/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
--- 2/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
@@@ -86,6 -86,7 +86,7 @@@
   #include "time-event.h"
   #include "fw-api.h"
   #include "fw/api/scan.h"
+ #include "fw/acpi.h"
   
   #define DRV_DESCRIPTION       "The new Intel(R) wireless AGN driver for Linux"
   MODULE_DESCRIPTION(DRV_DESCRIPTION);
@@@ -423,8 -424,6 +424,6 @@@ static const struct iwl_hcmd_names iwl_
    * Access is done through binary search
    */
   static const struct iwl_hcmd_names iwl_mvm_mac_conf_names[] = {
-       HCMD_NAME(LINK_QUALITY_MEASUREMENT_CMD),
-       HCMD_NAME(LINK_QUALITY_MEASUREMENT_COMPLETE_NOTIF),
         HCMD_NAME(CHANNEL_SWITCH_NOA_NOTIF),
   };
   
@@@ -490,18 -489,21 +489,21 @@@ static const struct iwl_hcmd_arr iwl_mv
   static void iwl_mvm_async_handlers_wk(struct work_struct *wk);
   static void iwl_mvm_d0i3_exit_work(struct work_struct *wk);
   
- static u32 calc_min_backoff(struct iwl_trans *trans, const struct iwl_cfg *cfg)
+ static u32 iwl_mvm_min_backoff(struct iwl_mvm *mvm)
   {
-       const struct iwl_pwr_tx_backoff *pwr_tx_backoff = cfg->pwr_tx_backoffs;
+       const struct iwl_pwr_tx_backoff *backoff = mvm->cfg->pwr_tx_backoffs;
+       u64 dflt_pwr_limit;
   
-       if (!pwr_tx_backoff)
+       if (!backoff)
                 return 0;
   
-       while (pwr_tx_backoff->pwr) {
-               if (trans->dflt_pwr_limit >= pwr_tx_backoff->pwr)
-                       return pwr_tx_backoff->backoff;
+       dflt_pwr_limit = iwl_acpi_get_pwr_limit(mvm->dev);
   
-               pwr_tx_backoff++;
+       while (backoff->pwr) {
+               if (dflt_pwr_limit >= backoff->pwr)
+                       return backoff->backoff;
+ 
+               backoff++;
         }
   
         return 0;
@@@ -701,7 -703,6 +703,6 @@@ iwl_op_mode_mvm_start(struct iwl_trans 
         trans_cfg.cb_data_offs = offsetof(struct ieee80211_tx_info,
                                           driver_data[2]);
   
-       trans_cfg.sdio_adma_addr = fw->sdio_adma_addr;
         trans_cfg.sw_csum_tx = IWL_MVM_SW_TX_CSUM_OFFLOAD;
   
         /* Set a short watchdog for the command queue */
@@@ -771,7 -772,7 +772,7 @@@
                 goto out_free;
         mvm->hw_registered = true;
   
-       min_backoff = calc_min_backoff(trans, cfg);
+       min_backoff = iwl_mvm_min_backoff(mvm);
         iwl_mvm_thermal_initialize(mvm, min_backoff);
   
         err = iwl_mvm_dbgfs_register(mvm, dbgfs_dir);
@@@ -1118,7 -1119,7 +1119,7 @@@ void iwl_mvm_set_hw_ctkill_state(struc
   static bool iwl_mvm_set_hw_rfkill_state(struct iwl_op_mode *op_mode, bool state)
   {
         struct iwl_mvm *mvm = IWL_OP_MODE_GET_MVM(op_mode);
- -      bool calibrating = ACCESS_ONCE(mvm->calibrating);
+ +      bool calibrating = READ_ONCE(mvm->calibrating);
   
         if (state)
                 set_bit(IWL_MVM_STATUS_HW_RFKILL, &mvm->status);
diff --combined drivers/net/wireless/intel/iwlwifi/mvm/tx.c

index 6e9d3289b9d0ef68ccb40603f8601b3552abeab9,d88c3685a6ddc61e13cfa14c58d66919abed4f55..593b7f97b29c103f8faf28dd905dd36aed34e763
--- 1/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
--- 2/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
@@@ -652,7 -652,7 +652,7 @@@ int iwl_mvm_tx_skb_non_sta(struct iwl_m
                                 return -1;
                 } else if (info.control.vif->type == NL80211_IFTYPE_STATION &&
                            is_multicast_ether_addr(hdr->addr1)) {
- -                      u8 ap_sta_id = ACCESS_ONCE(mvmvif->ap_sta_id);
+ +                      u8 ap_sta_id = READ_ONCE(mvmvif->ap_sta_id);
   
                         if (ap_sta_id != IWL_MVM_INVALID_STA)
                                 sta_id = ap_sta_id;
@@@ -700,7 -700,7 +700,7 @@@ static int iwl_mvm_tx_tso(struct iwl_mv
         snap_ip_tcp = 8 + skb_transport_header(skb) - skb_network_header(skb) +
                 tcp_hdrlen(skb);
   
- -      dbg_max_amsdu_len = ACCESS_ONCE(mvm->max_amsdu_len);
+ +      dbg_max_amsdu_len = READ_ONCE(mvm->max_amsdu_len);
   
         if (!sta->max_amsdu_len ||
             !ieee80211_is_data_qos(hdr->frame_control) ||
@@@ -1594,8 -1594,7 +1594,7 @@@ static void iwl_mvm_rx_tx_cmd_agg(struc
                 mvmsta->tid_data[tid].tx_time =
                         le16_to_cpu(tx_resp->wireless_media_time);
                 mvmsta->tid_data[tid].lq_color =
-                       (tx_resp->tlc_info & TX_RES_RATE_TABLE_COLOR_MSK) >>
-                       TX_RES_RATE_TABLE_COLOR_POS;
+                       TX_RES_RATE_TABLE_COL_GET(tx_resp->tlc_info);
         }
   
         rcu_read_unlock();
@@@ -1746,6 -1745,7 +1745,7 @@@ void iwl_mvm_rx_ba_notif(struct iwl_mv
         if (iwl_mvm_has_new_tx_api(mvm)) {
                 struct iwl_mvm_compressed_ba_notif *ba_res =
                         (void *)pkt->data;
+               u8 lq_color = TX_RES_RATE_TABLE_COL_GET(ba_res->tlc_rate_info);
                 int i;
   
                 sta_id = ba_res->sta_id;
@@@ -1759,11 -1759,18 +1759,18 @@@
                 if (!le16_to_cpu(ba_res->tfd_cnt))
                         goto out;
   
+               rcu_read_lock();
+ 
+               mvmsta = iwl_mvm_sta_from_staid_rcu(mvm, sta_id);
+               if (!mvmsta)
+                       goto out_unlock;
+ 
                 /* Free per TID */
                 for (i = 0; i < le16_to_cpu(ba_res->tfd_cnt); i++) {
                         struct iwl_mvm_compressed_ba_tfd *ba_tfd =
                                 &ba_res->tfd[i];
   
+                       mvmsta->tid_data[i].lq_color = lq_color;
                         iwl_mvm_tx_reclaim(mvm, sta_id, ba_tfd->tid,
                                            (int)(le16_to_cpu(ba_tfd->q_num)),
                                            le16_to_cpu(ba_tfd->tfd_index),
@@@ -1771,6 -1778,8 +1778,8 @@@
                                            le32_to_cpu(ba_res->tx_rate));
                 }
   
+ out_unlock:
+               rcu_read_unlock();
   out:
                 IWL_DEBUG_TX_REPLY(mvm,
                                    "BA_NOTIFICATION Received from sta_id = %d, flags %x, sent:%d, acked:%d\n",
diff --combined drivers/net/wireless/intel/iwlwifi/pcie/trans.c

index 9ad3f4fe589417ed752e397554575fdc4553fb0e,8d992d5ba0644935f31497d6c4aa92af1f852682..b7a51603465b20752616639cd3f663a6844dcdd8
--- 1/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
--- 2/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
@@@ -88,6 -88,93 +88,93 @@@
   #define IWL_FW_MEM_EXTENDED_START     0x40000
   #define IWL_FW_MEM_EXTENDED_END               0x57FFF
   
+ static void iwl_trans_pcie_dump_regs(struct iwl_trans *trans)
+ {
+ #define PCI_DUMP_SIZE 64
+ #define PREFIX_LEN    32
+       struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+       struct pci_dev *pdev = trans_pcie->pci_dev;
+       u32 i, pos, alloc_size, *ptr, *buf;
+       char *prefix;
+ 
+       if (trans_pcie->pcie_dbg_dumped_once)
+               return;
+ 
+       /* Should be a multiple of 4 */
+       BUILD_BUG_ON(PCI_DUMP_SIZE > 4096 || PCI_DUMP_SIZE & 0x3);
+       /* Alloc a max size buffer */
+       if (PCI_ERR_ROOT_ERR_SRC +  4 > PCI_DUMP_SIZE)
+               alloc_size = PCI_ERR_ROOT_ERR_SRC +  4 + PREFIX_LEN;
+       else
+               alloc_size = PCI_DUMP_SIZE + PREFIX_LEN;
+       buf = kmalloc(alloc_size, GFP_ATOMIC);
+       if (!buf)
+               return;
+       prefix = (char *)buf + alloc_size - PREFIX_LEN;
+ 
+       IWL_ERR(trans, "iwlwifi transaction failed, dumping registers\n");
+ 
+       /* Print wifi device registers */
+       sprintf(prefix, "iwlwifi %s: ", pci_name(pdev));
+       IWL_ERR(trans, "iwlwifi device config registers:\n");
+       for (i = 0, ptr = buf; i < PCI_DUMP_SIZE; i += 4, ptr++)
+               if (pci_read_config_dword(pdev, i, ptr))
+                       goto err_read;
+       print_hex_dump(KERN_ERR, prefix, DUMP_PREFIX_OFFSET, 32, 4, buf, i, 0);
+ 
+       IWL_ERR(trans, "iwlwifi device memory mapped registers:\n");
+       for (i = 0, ptr = buf; i < PCI_DUMP_SIZE; i += 4, ptr++)
+               *ptr = iwl_read32(trans, i);
+       print_hex_dump(KERN_ERR, prefix, DUMP_PREFIX_OFFSET, 32, 4, buf, i, 0);
+ 
+       pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR);
+       if (pos) {
+               IWL_ERR(trans, "iwlwifi device AER capability structure:\n");
+               for (i = 0, ptr = buf; i < PCI_ERR_ROOT_COMMAND; i += 4, ptr++)
+                       if (pci_read_config_dword(pdev, pos + i, ptr))
+                               goto err_read;
+               print_hex_dump(KERN_ERR, prefix, DUMP_PREFIX_OFFSET,
+                              32, 4, buf, i, 0);
+       }
+ 
+       /* Print parent device registers next */
+       if (!pdev->bus->self)
+               goto out;
+ 
+       pdev = pdev->bus->self;
+       sprintf(prefix, "iwlwifi %s: ", pci_name(pdev));
+ 
+       IWL_ERR(trans, "iwlwifi parent port (%s) config registers:\n",
+               pci_name(pdev));
+       for (i = 0, ptr = buf; i < PCI_DUMP_SIZE; i += 4, ptr++)
+               if (pci_read_config_dword(pdev, i, ptr))
+                       goto err_read;
+       print_hex_dump(KERN_ERR, prefix, DUMP_PREFIX_OFFSET, 32, 4, buf, i, 0);
+ 
+       /* Print root port AER registers */
+       pos = 0;
+       pdev = pcie_find_root_port(pdev);
+       if (pdev)
+               pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR);
+       if (pos) {
+               IWL_ERR(trans, "iwlwifi root port (%s) AER cap structure:\n",
+                       pci_name(pdev));
+               sprintf(prefix, "iwlwifi %s: ", pci_name(pdev));
+               for (i = 0, ptr = buf; i <= PCI_ERR_ROOT_ERR_SRC; i += 4, ptr++)
+                       if (pci_read_config_dword(pdev, pos + i, ptr))
+                               goto err_read;
+               print_hex_dump(KERN_ERR, prefix, DUMP_PREFIX_OFFSET, 32,
+                              4, buf, i, 0);
+       }
+ 
+ err_read:
+       print_hex_dump(KERN_ERR, prefix, DUMP_PREFIX_OFFSET, 32, 4, buf, i, 0);
+       IWL_ERR(trans, "Read failed at 0x%X\n", i);
+ out:
+       trans_pcie->pcie_dbg_dumped_once = 1;
+       kfree(buf);
+ }
+ 
   static void iwl_pcie_free_fw_monitor(struct iwl_trans *trans)
   {
         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
@@@ -649,6 -736,7 +736,7 @@@ static int iwl_pcie_load_firmware_chunk
                                  trans_pcie->ucode_write_complete, 5 * HZ);
         if (!ret) {
                 IWL_ERR(trans, "Failed to load firmware chunk!\n");
+               iwl_trans_pcie_dump_regs(trans);
                 return -ETIMEDOUT;
         }
   
@@@ -1868,6 -1956,7 +1956,7 @@@ static bool iwl_trans_pcie_grab_nic_acc
                            (CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY |
                             CSR_GP_CNTRL_REG_FLAG_GOING_TO_SLEEP), 15000);
         if (unlikely(ret < 0)) {
+               iwl_trans_pcie_dump_regs(trans);
                 iwl_write32(trans, CSR_RESET, CSR_RESET_REG_FLAG_FORCE_NMI);
                 WARN_ONCE(1,
                           "Timeout waiting for hardware access (CSR_GP_CNTRL 0x%08x)\n",
@@@ -2076,12 -2165,12 +2165,12 @@@ static int iwl_trans_pcie_wait_txq_empt
   
         IWL_DEBUG_TX_QUEUES(trans, "Emptying queue %d...\n", txq_idx);
         txq = trans_pcie->txq[txq_idx];
- -      wr_ptr = ACCESS_ONCE(txq->write_ptr);
+ +      wr_ptr = READ_ONCE(txq->write_ptr);
   
- -      while (txq->read_ptr != ACCESS_ONCE(txq->write_ptr) &&
+ +      while (txq->read_ptr != READ_ONCE(txq->write_ptr) &&
                !time_after(jiffies,
                            now + msecs_to_jiffies(IWL_FLUSH_WAIT_MS))) {
- -              u8 write_ptr = ACCESS_ONCE(txq->write_ptr);
+ +              u8 write_ptr = READ_ONCE(txq->write_ptr);
   
                 if (WARN_ONCE(wr_ptr != write_ptr,
                               "WR pointer moved while flushing %d -> %d\n",
@@@ -2553,7 -2642,7 +2642,7 @@@ static u32 iwl_trans_pcie_dump_rbs(stru
   
         spin_lock(&rxq->lock);
   
- -      r = le16_to_cpu(ACCESS_ONCE(rxq->rb_stts->closed_rb_num)) & 0x0FFF;
+ +      r = le16_to_cpu(READ_ONCE(rxq->rb_stts->closed_rb_num)) & 0x0FFF;
   
         for (i = rxq->read, j = 0;
              i != r && j < allocated_rb_nums;
@@@ -2814,7 -2903,7 +2903,7 @@@ static struct iwl_trans_dump_dat
                 /* Dump RBs is supported only for pre-9000 devices (1 queue) */
                 struct iwl_rxq *rxq = &trans_pcie->rxq[0];
                 /* RBs */
- -              num_rbs = le16_to_cpu(ACCESS_ONCE(rxq->rb_stts->closed_rb_num))
+ +              num_rbs = le16_to_cpu(READ_ONCE(rxq->rb_stts->closed_rb_num))
                                       & 0x0FFF;
                 num_rbs = (num_rbs - rxq->read) & RX_QUEUE_MASK;
                 len += num_rbs * (sizeof(*data) +
@@@ -2932,6 -3021,7 +3021,7 @@@ static void iwl_trans_pcie_resume(struc
         .ref = iwl_trans_pcie_ref,                                      \
         .unref = iwl_trans_pcie_unref,                                  \
         .dump_data = iwl_trans_pcie_dump_data,                          \
+       .dump_regs = iwl_trans_pcie_dump_regs,                          \
         .d3_suspend = iwl_trans_pcie_d3_suspend,                        \
         .d3_resume = iwl_trans_pcie_d3_resume
   
diff --combined drivers/net/wireless/mac80211_hwsim.c

index d2b3d6177a556c39530a88666afebf01d996507a,ec2f4c31425a13d633a5a532fa1c51e2918505b1..07a49f58070aa061b763c8723bab99d335a9fab7
--- 1/drivers/net/wireless/mac80211_hwsim.c
--- 2/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@@ -396,7 -396,7 +396,7 @@@ static int mac80211_hwsim_vendor_cmd_te
         if (!tb[QCA_WLAN_VENDOR_ATTR_TEST])
                 return -EINVAL;
         val = nla_get_u32(tb[QCA_WLAN_VENDOR_ATTR_TEST]);
-       wiphy_debug(wiphy, "%s: test=%u\n", __func__, val);
+       wiphy_dbg(wiphy, "%s: test=%u\n", __func__, val);
   
         /* Send a vendor event as a test. Note that this would not normally be
          * done within a command handler, but rather, based on some other
@@@ -643,9 -643,9 +643,9 @@@ static void hwsim_send_ps_poll(void *da
         if (!vp->assoc)
                 return;
   
-       wiphy_debug(data->hw->wiphy,
-                   "%s: send PS-Poll to %pM for aid %d\n",
-                   __func__, vp->bssid, vp->aid);
+       wiphy_dbg(data->hw->wiphy,
+                 "%s: send PS-Poll to %pM for aid %d\n",
+                 __func__, vp->bssid, vp->aid);
   
         skb = dev_alloc_skb(sizeof(*pspoll));
         if (!skb)
@@@ -674,9 -674,9 +674,9 @@@ static void hwsim_send_nullfunc(struct 
         if (!vp->assoc)
                 return;
   
-       wiphy_debug(data->hw->wiphy,
-                   "%s: send data::nullfunc to %pM ps=%d\n",
-                   __func__, vp->bssid, ps);
+       wiphy_dbg(data->hw->wiphy,
+                 "%s: send data::nullfunc to %pM ps=%d\n",
+                 __func__, vp->bssid, ps);
   
         skb = dev_alloc_skb(sizeof(*hdr));
         if (!skb)
@@@ -1034,7 -1034,7 +1034,7 @@@ static void mac80211_hwsim_tx_frame_nl(
         msg_head = genlmsg_put(skb, 0, 0, &hwsim_genl_family, 0,
                                HWSIM_CMD_FRAME);
         if (msg_head == NULL) {
-               printk(KERN_DEBUG "mac80211_hwsim: problem with msg_head\n");
+               pr_debug("mac80211_hwsim: problem with msg_head\n");
                 goto nla_put_failure;
         }
   
@@@ -1093,7 -1093,7 +1093,7 @@@
   nla_put_failure:
         nlmsg_free(skb);
   err_free_txskb:
-       printk(KERN_DEBUG "mac80211_hwsim: error occurred in %s\n", __func__);
+       pr_debug("mac80211_hwsim: error occurred in %s\n", __func__);
         ieee80211_free_txskb(hw, my_skb);
         data->tx_failed++;
   }
@@@ -1347,7 -1347,7 +1347,7 @@@ static void mac80211_hwsim_tx(struct ie
         }
   
         if (data->idle && !data->tmp_chan) {
-               wiphy_debug(hw->wiphy, "Trying to TX when idle - reject\n");
+               wiphy_dbg(hw->wiphy, "Trying to TX when idle - reject\n");
                 ieee80211_free_txskb(hw, skb);
                 return;
         }
@@@ -1380,7 -1380,7 +1380,7 @@@
         mac80211_hwsim_monitor_rx(hw, skb, channel);
   
         /* wmediumd mode check */
- -      _portid = ACCESS_ONCE(data->wmediumd);
+ +      _portid = READ_ONCE(data->wmediumd);
   
         if (_portid)
                 return mac80211_hwsim_tx_frame_nl(hw, skb, _portid);
@@@ -1408,7 -1408,7 +1408,7 @@@
   static int mac80211_hwsim_start(struct ieee80211_hw *hw)
   {
         struct mac80211_hwsim_data *data = hw->priv;
-       wiphy_debug(hw->wiphy, "%s\n", __func__);
+       wiphy_dbg(hw->wiphy, "%s\n", __func__);
         data->started = true;
         return 0;
   }
@@@ -1419,16 -1419,16 +1419,16 @@@ static void mac80211_hwsim_stop(struct 
         struct mac80211_hwsim_data *data = hw->priv;
         data->started = false;
         tasklet_hrtimer_cancel(&data->beacon_timer);
-       wiphy_debug(hw->wiphy, "%s\n", __func__);
+       wiphy_dbg(hw->wiphy, "%s\n", __func__);
   }
   
   
   static int mac80211_hwsim_add_interface(struct ieee80211_hw *hw,
                                         struct ieee80211_vif *vif)
   {
-       wiphy_debug(hw->wiphy, "%s (type=%d mac_addr=%pM)\n",
-                   __func__, ieee80211_vif_type_p2p(vif),
-                   vif->addr);
+       wiphy_dbg(hw->wiphy, "%s (type=%d mac_addr=%pM)\n",
+                 __func__, ieee80211_vif_type_p2p(vif),
+                 vif->addr);
         hwsim_set_magic(vif);
   
         vif->cab_queue = 0;
@@@ -1447,9 -1447,9 +1447,9 @@@ static int mac80211_hwsim_change_interf
                                            bool newp2p)
   {
         newtype = ieee80211_iftype_p2p(newtype, newp2p);
-       wiphy_debug(hw->wiphy,
-                   "%s (old type=%d, new type=%d, mac_addr=%pM)\n",
-                   __func__, ieee80211_vif_type_p2p(vif),
+       wiphy_dbg(hw->wiphy,
+                 "%s (old type=%d, new type=%d, mac_addr=%pM)\n",
+                 __func__, ieee80211_vif_type_p2p(vif),
                     newtype, vif->addr);
         hwsim_check_magic(vif);
   
@@@ -1465,9 -1465,9 +1465,9 @@@
   static void mac80211_hwsim_remove_interface(
         struct ieee80211_hw *hw, struct ieee80211_vif *vif)
   {
-       wiphy_debug(hw->wiphy, "%s (type=%d mac_addr=%pM)\n",
-                   __func__, ieee80211_vif_type_p2p(vif),
-                   vif->addr);
+       wiphy_dbg(hw->wiphy, "%s (type=%d mac_addr=%pM)\n",
+                 __func__, ieee80211_vif_type_p2p(vif),
+                 vif->addr);
         hwsim_check_magic(vif);
         hwsim_clear_magic(vif);
   }
@@@ -1477,7 -1477,7 +1477,7 @@@ static void mac80211_hwsim_tx_frame(str
                                     struct ieee80211_channel *chan)
   {
         struct mac80211_hwsim_data *data = hw->priv;
- -      u32 _pid = ACCESS_ONCE(data->wmediumd);
+ +      u32 _pid = READ_ONCE(data->wmediumd);
   
         if (ieee80211_hw_check(hw, SUPPORTS_RC_TABLE)) {
                 struct ieee80211_tx_info *txi = IEEE80211_SKB_CB(skb);
@@@ -1589,23 -1589,23 +1589,23 @@@ static int mac80211_hwsim_config(struc
         int idx;
   
         if (conf->chandef.chan)
-               wiphy_debug(hw->wiphy,
-                           "%s (freq=%d(%d - %d)/%s idle=%d ps=%d smps=%s)\n",
-                           __func__,
-                           conf->chandef.chan->center_freq,
-                           conf->chandef.center_freq1,
-                           conf->chandef.center_freq2,
-                           hwsim_chanwidths[conf->chandef.width],
-                           !!(conf->flags & IEEE80211_CONF_IDLE),
-                           !!(conf->flags & IEEE80211_CONF_PS),
-                           smps_modes[conf->smps_mode]);
+               wiphy_dbg(hw->wiphy,
+                         "%s (freq=%d(%d - %d)/%s idle=%d ps=%d smps=%s)\n",
+                         __func__,
+                         conf->chandef.chan->center_freq,
+                         conf->chandef.center_freq1,
+                         conf->chandef.center_freq2,
+                         hwsim_chanwidths[conf->chandef.width],
+                         !!(conf->flags & IEEE80211_CONF_IDLE),
+                         !!(conf->flags & IEEE80211_CONF_PS),
+                         smps_modes[conf->smps_mode]);
         else
-               wiphy_debug(hw->wiphy,
-                           "%s (freq=0 idle=%d ps=%d smps=%s)\n",
-                           __func__,
-                           !!(conf->flags & IEEE80211_CONF_IDLE),
-                           !!(conf->flags & IEEE80211_CONF_PS),
-                           smps_modes[conf->smps_mode]);
+               wiphy_dbg(hw->wiphy,
+                         "%s (freq=0 idle=%d ps=%d smps=%s)\n",
+                         __func__,
+                         !!(conf->flags & IEEE80211_CONF_IDLE),
+                         !!(conf->flags & IEEE80211_CONF_PS),
+                         smps_modes[conf->smps_mode]);
   
         data->idle = !!(conf->flags & IEEE80211_CONF_IDLE);
   
@@@ -1659,7 -1659,7 +1659,7 @@@ static void mac80211_hwsim_configure_fi
   {
         struct mac80211_hwsim_data *data = hw->priv;
   
-       wiphy_debug(hw->wiphy, "%s\n", __func__);
+       wiphy_dbg(hw->wiphy, "%s\n", __func__);
   
         data->rx_filter = 0;
         if (*total_flags & FIF_ALLMULTI)
@@@ -1688,25 -1688,25 +1688,25 @@@ static void mac80211_hwsim_bss_info_cha
   
         hwsim_check_magic(vif);
   
-       wiphy_debug(hw->wiphy, "%s(changed=0x%x vif->addr=%pM)\n",
-                   __func__, changed, vif->addr);
+       wiphy_dbg(hw->wiphy, "%s(changed=0x%x vif->addr=%pM)\n",
+                 __func__, changed, vif->addr);
   
         if (changed & BSS_CHANGED_BSSID) {
-               wiphy_debug(hw->wiphy, "%s: BSSID changed: %pM\n",
-                           __func__, info->bssid);
+               wiphy_dbg(hw->wiphy, "%s: BSSID changed: %pM\n",
+                         __func__, info->bssid);
                 memcpy(vp->bssid, info->bssid, ETH_ALEN);
         }
   
         if (changed & BSS_CHANGED_ASSOC) {
-               wiphy_debug(hw->wiphy, "  ASSOC: assoc=%d aid=%d\n",
-                           info->assoc, info->aid);
+               wiphy_dbg(hw->wiphy, "  ASSOC: assoc=%d aid=%d\n",
+                         info->assoc, info->aid);
                 vp->assoc = info->assoc;
                 vp->aid = info->aid;
         }
   
         if (changed & BSS_CHANGED_BEACON_ENABLED) {
-               wiphy_debug(hw->wiphy, "  BCN EN: %d (BI=%u)\n",
-                           info->enable_beacon, info->beacon_int);
+               wiphy_dbg(hw->wiphy, "  BCN EN: %d (BI=%u)\n",
+                         info->enable_beacon, info->beacon_int);
                 vp->bcn_en = info->enable_beacon;
                 if (data->started &&
                     !hrtimer_is_queued(&data->beacon_timer.timer) &&
@@@ -1725,8 -1725,8 +1725,8 @@@
                         ieee80211_iterate_active_interfaces_atomic(
                                 data->hw, IEEE80211_IFACE_ITER_NORMAL,
                                 mac80211_hwsim_bcn_en_iter, &count);
-                       wiphy_debug(hw->wiphy, "  beaconing vifs remaining: %u",
-                                   count);
+                       wiphy_dbg(hw->wiphy, "  beaconing vifs remaining: %u",
+                                 count);
                         if (count == 0) {
                                 tasklet_hrtimer_cancel(&data->beacon_timer);
                                 data->beacon_int = 0;
@@@ -1735,31 -1735,31 +1735,31 @@@
         }
   
         if (changed & BSS_CHANGED_ERP_CTS_PROT) {
-               wiphy_debug(hw->wiphy, "  ERP_CTS_PROT: %d\n",
-                           info->use_cts_prot);
+               wiphy_dbg(hw->wiphy, "  ERP_CTS_PROT: %d\n",
+                         info->use_cts_prot);
         }
   
         if (changed & BSS_CHANGED_ERP_PREAMBLE) {
-               wiphy_debug(hw->wiphy, "  ERP_PREAMBLE: %d\n",
-                           info->use_short_preamble);
+               wiphy_dbg(hw->wiphy, "  ERP_PREAMBLE: %d\n",
+                         info->use_short_preamble);
         }
   
         if (changed & BSS_CHANGED_ERP_SLOT) {
-               wiphy_debug(hw->wiphy, "  ERP_SLOT: %d\n", info->use_short_slot);
+               wiphy_dbg(hw->wiphy, "  ERP_SLOT: %d\n", info->use_short_slot);
         }
   
         if (changed & BSS_CHANGED_HT) {
-               wiphy_debug(hw->wiphy, "  HT: op_mode=0x%x\n",
-                           info->ht_operation_mode);
+               wiphy_dbg(hw->wiphy, "  HT: op_mode=0x%x\n",
+                         info->ht_operation_mode);
         }
   
         if (changed & BSS_CHANGED_BASIC_RATES) {
-               wiphy_debug(hw->wiphy, "  BASIC_RATES: 0x%llx\n",
-                           (unsigned long long) info->basic_rates);
+               wiphy_dbg(hw->wiphy, "  BASIC_RATES: 0x%llx\n",
+                         (unsigned long long) info->basic_rates);
         }
   
         if (changed & BSS_CHANGED_TXPOWER)
-               wiphy_debug(hw->wiphy, "  TX Power: %d dBm\n", info->txpower);
+               wiphy_dbg(hw->wiphy, "  TX Power: %d dBm\n", info->txpower);
   }
   
   static int mac80211_hwsim_sta_add(struct ieee80211_hw *hw,
@@@ -1813,11 -1813,11 +1813,11 @@@ static int mac80211_hwsim_conf_tx
         struct ieee80211_vif *vif, u16 queue,
         const struct ieee80211_tx_queue_params *params)
   {
-       wiphy_debug(hw->wiphy,
-                   "%s (queue=%d txop=%d cw_min=%d cw_max=%d aifs=%d)\n",
-                   __func__, queue,
-                   params->txop, params->cw_min,
-                   params->cw_max, params->aifs);
+       wiphy_dbg(hw->wiphy,
+                 "%s (queue=%d txop=%d cw_min=%d cw_max=%d aifs=%d)\n",
+                 __func__, queue,
+                 params->txop, params->cw_min,
+                 params->cw_max, params->aifs);
         return 0;
   }
   
@@@ -1981,7 -1981,7 +1981,7 @@@ static void hw_scan_work(struct work_st
                         .aborted = false,
                 };
   
-               wiphy_debug(hwsim->hw->wiphy, "hw scan complete\n");
+               wiphy_dbg(hwsim->hw->wiphy, "hw scan complete\n");
                 ieee80211_scan_completed(hwsim->hw, &info);
                 hwsim->hw_scan_request = NULL;
                 hwsim->hw_scan_vif = NULL;
@@@ -1990,8 -1990,8 +1990,8 @@@
                 return;
         }
   
-       wiphy_debug(hwsim->hw->wiphy, "hw scan %d MHz\n",
-                   req->channels[hwsim->scan_chan_idx]->center_freq);
+       wiphy_dbg(hwsim->hw->wiphy, "hw scan %d MHz\n",
+                 req->channels[hwsim->scan_chan_idx]->center_freq);
   
         hwsim->tmp_chan = req->channels[hwsim->scan_chan_idx];
         if (hwsim->tmp_chan->flags & (IEEE80211_CHAN_NO_IR |
@@@ -2060,7 -2060,7 +2060,7 @@@ static int mac80211_hwsim_hw_scan(struc
         memset(hwsim->survey_data, 0, sizeof(hwsim->survey_data));
         mutex_unlock(&hwsim->mutex);
   
-       wiphy_debug(hw->wiphy, "hwsim hw_scan request\n");
+       wiphy_dbg(hw->wiphy, "hwsim hw_scan request\n");
   
         ieee80211_queue_delayed_work(hwsim->hw, &hwsim->hw_scan, 0);
   
@@@ -2075,7 -2075,7 +2075,7 @@@ static void mac80211_hwsim_cancel_hw_sc
                 .aborted = true,
         };
   
-       wiphy_debug(hw->wiphy, "hwsim cancel_hw_scan\n");
+       wiphy_dbg(hw->wiphy, "hwsim cancel_hw_scan\n");
   
         cancel_delayed_work_sync(&hwsim->hw_scan);
   
@@@ -2096,11 -2096,11 +2096,11 @@@ static void mac80211_hwsim_sw_scan(stru
         mutex_lock(&hwsim->mutex);
   
         if (hwsim->scanning) {
-               printk(KERN_DEBUG "two hwsim sw_scans detected!\n");
+               pr_debug("two hwsim sw_scans detected!\n");
                 goto out;
         }
   
-       printk(KERN_DEBUG "hwsim sw_scan request, prepping stuff\n");
+       pr_debug("hwsim sw_scan request, prepping stuff\n");
   
         memcpy(hwsim->scan_addr, mac_addr, ETH_ALEN);
         hwsim->scanning = true;
@@@ -2117,7 -2117,7 +2117,7 @@@ static void mac80211_hwsim_sw_scan_comp
   
         mutex_lock(&hwsim->mutex);
   
-       printk(KERN_DEBUG "hwsim sw_scan_complete\n");
+       pr_debug("hwsim sw_scan_complete\n");
         hwsim->scanning = false;
         eth_zero_addr(hwsim->scan_addr);
   
@@@ -2131,7 -2131,7 +2131,7 @@@ static void hw_roc_start(struct work_st
   
         mutex_lock(&hwsim->mutex);
   
-       wiphy_debug(hwsim->hw->wiphy, "hwsim ROC begins\n");
+       wiphy_dbg(hwsim->hw->wiphy, "hwsim ROC begins\n");
         hwsim->tmp_chan = hwsim->roc_chan;
         ieee80211_ready_on_channel(hwsim->hw);
   
@@@ -2151,7 -2151,7 +2151,7 @@@ static void hw_roc_done(struct work_str
         hwsim->tmp_chan = NULL;
         mutex_unlock(&hwsim->mutex);
   
-       wiphy_debug(hwsim->hw->wiphy, "hwsim ROC expired\n");
+       wiphy_dbg(hwsim->hw->wiphy, "hwsim ROC expired\n");
   }
   
   static int mac80211_hwsim_roc(struct ieee80211_hw *hw,
@@@ -2172,8 -2172,8 +2172,8 @@@
         hwsim->roc_duration = duration;
         mutex_unlock(&hwsim->mutex);
   
-       wiphy_debug(hw->wiphy, "hwsim ROC (%d MHz, %d ms)\n",
-                   chan->center_freq, duration);
+       wiphy_dbg(hw->wiphy, "hwsim ROC (%d MHz, %d ms)\n",
+                 chan->center_freq, duration);
         ieee80211_queue_delayed_work(hw, &hwsim->roc_start, HZ/50);
   
         return 0;
@@@ -2190,7 -2190,7 +2190,7 @@@ static int mac80211_hwsim_croc(struct i
         hwsim->tmp_chan = NULL;
         mutex_unlock(&hwsim->mutex);
   
-       wiphy_debug(hw->wiphy, "hwsim ROC canceled\n");
+       wiphy_dbg(hw->wiphy, "hwsim ROC canceled\n");
   
         return 0;
   }
@@@ -2199,20 -2199,20 +2199,20 @@@ static int mac80211_hwsim_add_chanctx(s
                                       struct ieee80211_chanctx_conf *ctx)
   {
         hwsim_set_chanctx_magic(ctx);
-       wiphy_debug(hw->wiphy,
-                   "add channel context control: %d MHz/width: %d/cfreqs:%d/%d MHz\n",
-                   ctx->def.chan->center_freq, ctx->def.width,
-                   ctx->def.center_freq1, ctx->def.center_freq2);
+       wiphy_dbg(hw->wiphy,
+                 "add channel context control: %d MHz/width: %d/cfreqs:%d/%d MHz\n",
+                 ctx->def.chan->center_freq, ctx->def.width,
+                 ctx->def.center_freq1, ctx->def.center_freq2);
         return 0;
   }
   
   static void mac80211_hwsim_remove_chanctx(struct ieee80211_hw *hw,
                                           struct ieee80211_chanctx_conf *ctx)
   {
-       wiphy_debug(hw->wiphy,
-                   "remove channel context control: %d MHz/width: %d/cfreqs:%d/%d MHz\n",
-                   ctx->def.chan->center_freq, ctx->def.width,
-                   ctx->def.center_freq1, ctx->def.center_freq2);
+       wiphy_dbg(hw->wiphy,
+                 "remove channel context control: %d MHz/width: %d/cfreqs:%d/%d MHz\n",
+                 ctx->def.chan->center_freq, ctx->def.width,
+                 ctx->def.center_freq1, ctx->def.center_freq2);
         hwsim_check_chanctx_magic(ctx);
         hwsim_clear_chanctx_magic(ctx);
   }
@@@ -2222,10 -2222,10 +2222,10 @@@ static void mac80211_hwsim_change_chanc
                                           u32 changed)
   {
         hwsim_check_chanctx_magic(ctx);
-       wiphy_debug(hw->wiphy,
-                   "change channel context control: %d MHz/width: %d/cfreqs:%d/%d MHz\n",
-                   ctx->def.chan->center_freq, ctx->def.width,
-                   ctx->def.center_freq1, ctx->def.center_freq2);
+       wiphy_dbg(hw->wiphy,
+                 "change channel context control: %d MHz/width: %d/cfreqs:%d/%d MHz\n",
+                 ctx->def.chan->center_freq, ctx->def.width,
+                 ctx->def.center_freq1, ctx->def.center_freq2);
   }
   
   static int mac80211_hwsim_assign_vif_chanctx(struct ieee80211_hw *hw,
@@@ -2479,7 -2479,7 +2479,7 @@@ static int mac80211_hwsim_new_radio(str
                 ops = &mac80211_hwsim_mchan_ops;
         hw = ieee80211_alloc_hw_nm(sizeof(*data), ops, param->hwname);
         if (!hw) {
-               printk(KERN_DEBUG "mac80211_hwsim: ieee80211_alloc_hw failed\n");
+               pr_debug("mac80211_hwsim: ieee80211_alloc_hw failed\n");
                 err = -ENOMEM;
                 goto failed;
         }
@@@ -2507,7 -2507,7 +2507,7 @@@
         data->dev->driver = &mac80211_hwsim_driver.driver;
         err = device_bind_driver(data->dev);
         if (err != 0) {
-               printk(KERN_DEBUG "mac80211_hwsim: device_bind_driver failed (%d)\n",
+               pr_debug("mac80211_hwsim: device_bind_driver failed (%d)\n",
                        err);
                 goto failed_bind;
         }
@@@ -2698,12 -2698,12 +2698,12 @@@
   
         err = ieee80211_register_hw(hw);
         if (err < 0) {
-               printk(KERN_DEBUG "mac80211_hwsim: ieee80211_register_hw failed (%d)\n",
+               pr_debug("mac80211_hwsim: ieee80211_register_hw failed (%d)\n",
                        err);
                 goto failed_hw;
         }
   
-       wiphy_debug(hw->wiphy, "hwaddr %pM registered\n", hw->wiphy->perm_addr);
+       wiphy_dbg(hw->wiphy, "hwaddr %pM registered\n", hw->wiphy->perm_addr);
   
         if (param->reg_alpha2) {
                 data->alpha2[0] = param->reg_alpha2[0];
@@@ -3067,7 -3067,7 +3067,7 @@@ static int hwsim_cloned_frame_received_
   
         return 0;
   err:
-       printk(KERN_DEBUG "mac80211_hwsim: error occurred in %s\n", __func__);
+       pr_debug("mac80211_hwsim: error occurred in %s\n", __func__);
   out:
         dev_kfree_skb(skb);
         return -EINVAL;
@@@ -3098,7 -3098,7 +3098,7 @@@ static int hwsim_register_received_nl(s
   
         hwsim_register_wmediumd(net, info->snd_portid);
   
-       printk(KERN_DEBUG "mac80211_hwsim: received a REGISTER, "
+       pr_debug("mac80211_hwsim: received a REGISTER, "
                "switching to wmediumd mode with pid %d\n", info->snd_portid);
   
         return 0;
@@@ -3387,7 -3387,7 +3387,7 @@@ static int __init hwsim_init_netlink(vo
         return 0;
   
   failure:
-       printk(KERN_DEBUG "mac80211_hwsim: error occurred in %s\n", __func__);
+       pr_debug("mac80211_hwsim: error occurred in %s\n", __func__);
         return -EINVAL;
   }
   
@@@ -3578,7 -3578,7 +3578,7 @@@ module_init(init_mac80211_hwsim)
   
   static void __exit exit_mac80211_hwsim(void)
   {
-       printk(KERN_DEBUG "mac80211_hwsim: unregister radios\n");
+       pr_debug("mac80211_hwsim: unregister radios\n");
   
         hwsim_exit_netlink();
   
diff --combined drivers/s390/net/qeth_core_main.c

index 61cf3e9c0acb80683d60e27802b259a568b0a6ac,457a4b4e82120639a68e2edc17e76262c7b89a86..49b9efeba1bda1e2390289b8ba536fa7bad0542c
--- 1/drivers/s390/net/qeth_core_main.c
--- 2/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@@ -52,7 -52,6 +52,6 @@@ EXPORT_SYMBOL_GPL(qeth_core_header_cach
   static struct kmem_cache *qeth_qdio_outbuf_cache;
   
   static struct device *qeth_core_root_dev;
- static unsigned int known_devices[][6] = QETH_MODELLIST_ARRAY;
   static struct lock_class_key qdio_out_skb_queue_key;
   static struct mutex qeth_mod_mutex;
   
@@@ -1386,6 -1385,7 +1385,7 @@@ static void qeth_init_qdio_info(struct 
         QETH_DBF_TEXT(SETUP, 4, "intqdinf");
         atomic_set(&card->qdio.state, QETH_QDIO_UNINITIALIZED);
         /* inbound */
+       card->qdio.no_in_queues = 1;
         card->qdio.in_buf_size = QETH_IN_BUF_SIZE_DEFAULT;
         if (card->info.type == QETH_CARD_TYPE_IQD)
                 card->qdio.init_pool.buf_count = QETH_IN_BUF_COUNT_HSDEFAULT;
@@@ -1519,34 -1519,17 +1519,17 @@@ out
         return NULL;
   }
   
- static int qeth_determine_card_type(struct qeth_card *card)
+ static void qeth_determine_card_type(struct qeth_card *card)
   {
-       int i = 0;
- 
         QETH_DBF_TEXT(SETUP, 2, "detcdtyp");
   
         card->qdio.do_prio_queueing = QETH_PRIOQ_DEFAULT;
         card->qdio.default_out_queue = QETH_DEFAULT_QUEUE;
-       while (known_devices[i][QETH_DEV_MODEL_IND]) {
-               if ((CARD_RDEV(card)->id.dev_type ==
-                               known_devices[i][QETH_DEV_TYPE_IND]) &&
-                   (CARD_RDEV(card)->id.dev_model ==
-                               known_devices[i][QETH_DEV_MODEL_IND])) {
-                       card->info.type = known_devices[i][QETH_DEV_MODEL_IND];
-                       card->qdio.no_out_queues =
-                               known_devices[i][QETH_QUEUE_NO_IND];
-                       card->qdio.no_in_queues = 1;
-                       card->info.is_multicast_different =
-                               known_devices[i][QETH_MULTICAST_IND];
-                       qeth_update_from_chp_desc(card);
-                       return 0;
-               }
-               i++;
-       }
-       card->info.type = QETH_CARD_TYPE_UNKNOWN;
-       dev_err(&card->gdev->dev, "The adapter hardware is of an "
-               "unknown type\n");
-       return -ENOENT;
+       card->info.type = CARD_RDEV(card)->id.driver_info;
+       card->qdio.no_out_queues = QETH_MAX_QUEUES;
+       if (card->info.type == QETH_CARD_TYPE_IQD)
+               card->info.is_multicast_different = 0x0103;
+       qeth_update_from_chp_desc(card);
   }
   
   static int qeth_clear_channel(struct qeth_channel *channel)
@@@ -2090,7 -2073,6 +2073,6 @@@ int qeth_send_control_data(struct qeth_
         spin_lock_irqsave(&card->lock, flags);
         list_add_tail(&reply->list, &card->cmd_waiter_list);
         spin_unlock_irqrestore(&card->lock, flags);
-       QETH_DBF_HEX(CTRL, 2, iob->data, QETH_DBF_CTRL_LEN);
   
         while (atomic_cmpxchg(&card->write.irq_pending, 0, 1)) ;
         qeth_prepare_control_data(card, len, iob);
@@@ -2233,23 -2215,15 +2215,15 @@@ static int qeth_cm_setup(struct qeth_ca
   static int qeth_get_initial_mtu_for_card(struct qeth_card *card)
   {
         switch (card->info.type) {
-       case QETH_CARD_TYPE_UNKNOWN:
-               return 1500;
         case QETH_CARD_TYPE_IQD:
                 return card->info.max_mtu;
         case QETH_CARD_TYPE_OSD:
-               switch (card->info.link_type) {
-               case QETH_LINK_TYPE_HSTR:
-               case QETH_LINK_TYPE_LANE_TR:
-                       return 2000;
-               default:
-                       return card->options.layer2 ? 1500 : 1492;
-               }
-       case QETH_CARD_TYPE_OSM:
         case QETH_CARD_TYPE_OSX:
-               return card->options.layer2 ? 1500 : 1492;
+               if (!card->options.layer2)
+                       return ETH_DATA_LEN - 8; /* L3: allow for LLC + SNAP */
+               /* fall through */
         default:
-               return 1500;
+               return ETH_DATA_LEN;
         }
   }
   
@@@ -2279,7 -2253,6 +2253,6 @@@ static int qeth_mtu_is_valid(struct qet
                 return ((mtu >= 576) &&
                         (mtu <= card->info.max_mtu));
         case QETH_CARD_TYPE_OSN:
-       case QETH_CARD_TYPE_UNKNOWN:
         default:
                 return 1;
         }
@@@ -4040,35 -4013,23 +4013,23 @@@ static int qeth_fill_buffer(struct qeth
         return flush_cnt;
   }
   
- int qeth_do_send_packet_fast(struct qeth_card *card,
-                            struct qeth_qdio_out_q *queue, struct sk_buff *skb,
+ int qeth_do_send_packet_fast(struct qeth_qdio_out_q *queue, struct sk_buff *skb,
                              struct qeth_hdr *hdr, unsigned int offset,
                              unsigned int hd_len)
   {
-       struct qeth_qdio_out_buffer *buffer;
-       int index;
+       int index = queue->next_buf_to_fill;
+       struct qeth_qdio_out_buffer *buffer = queue->bufs[index];
   
-       /* spin until we get the queue ... */
-       while (atomic_cmpxchg(&queue->state, QETH_OUT_Q_UNLOCKED,
-                             QETH_OUT_Q_LOCKED) != QETH_OUT_Q_UNLOCKED);
-       /* ... now we've got the queue */
-       index = queue->next_buf_to_fill;
-       buffer = queue->bufs[queue->next_buf_to_fill];
         /*
          * check if buffer is empty to make sure that we do not 'overtake'
          * ourselves and try to fill a buffer that is already primed
          */
         if (atomic_read(&buffer->state) != QETH_QDIO_BUF_EMPTY)
-               goto out;
-       queue->next_buf_to_fill = (queue->next_buf_to_fill + 1) %
-                                         QDIO_MAX_BUFFERS_PER_Q;
-       atomic_set(&queue->state, QETH_OUT_Q_UNLOCKED);
+               return -EBUSY;
+       queue->next_buf_to_fill = (index + 1) % QDIO_MAX_BUFFERS_PER_Q;
         qeth_fill_buffer(queue, buffer, skb, hdr, offset, hd_len);
         qeth_flush_buffers(queue, index, 1);
         return 0;
- out:
-       atomic_set(&queue->state, QETH_OUT_Q_UNLOCKED);
-       return -EBUSY;
   }
   EXPORT_SYMBOL_GPL(qeth_do_send_packet_fast);
   
@@@ -4923,7 -4884,6 +4884,6 @@@ static void qeth_qdio_establish_cq(stru
         if (card->options.cq == QETH_CQ_ENABLED) {
                 int offset = QDIO_MAX_BUFFERS_PER_Q *
                              (card->qdio.no_in_queues - 1);
-               i = QDIO_MAX_BUFFERS_PER_Q * (card->qdio.no_in_queues - 1);
                 for (i = 0; i < QDIO_MAX_BUFFERS_PER_Q; ++i) {
                         in_sbal_ptrs[offset + i] = (struct qdio_buffer *)
                                 virt_to_phys(card->qdio.c_q->bufs[i].buffer);
@@@ -5209,49 -5169,27 +5169,27 @@@ out
   }
   EXPORT_SYMBOL_GPL(qeth_core_hardsetup_card);
   
- static int qeth_create_skb_frag(struct qeth_qdio_buffer *qethbuffer,
-                               struct qdio_buffer_element *element,
-                               struct sk_buff **pskb, int offset, int *pfrag,
-                               int data_len)
+ static void qeth_create_skb_frag(struct qdio_buffer_element *element,
+                                struct sk_buff *skb, int offset, int data_len)
   {
         struct page *page = virt_to_page(element->addr);
-       if (*pskb == NULL) {
-               if (qethbuffer->rx_skb) {
-                       /* only if qeth_card.options.cq == QETH_CQ_ENABLED */
-                       *pskb = qethbuffer->rx_skb;
-                       qethbuffer->rx_skb = NULL;
-               } else {
-                       *pskb = dev_alloc_skb(QETH_RX_PULL_LEN + ETH_HLEN);
-                       if (!(*pskb))
-                               return -ENOMEM;
-               }
+       unsigned int next_frag;
   
-               skb_reserve(*pskb, ETH_HLEN);
-               if (data_len <= QETH_RX_PULL_LEN) {
-                       skb_put_data(*pskb, element->addr + offset, data_len);
-               } else {
-                       get_page(page);
-                       skb_put_data(*pskb, element->addr + offset,
-                                    QETH_RX_PULL_LEN);
-                       skb_fill_page_desc(*pskb, *pfrag, page,
-                               offset + QETH_RX_PULL_LEN,
-                               data_len - QETH_RX_PULL_LEN);
-                       (*pskb)->data_len += data_len - QETH_RX_PULL_LEN;
-                       (*pskb)->len      += data_len - QETH_RX_PULL_LEN;
-                       (*pskb)->truesize += data_len - QETH_RX_PULL_LEN;
-                       (*pfrag)++;
-               }
-       } else {
-               get_page(page);
-               skb_fill_page_desc(*pskb, *pfrag, page, offset, data_len);
-               (*pskb)->data_len += data_len;
-               (*pskb)->len      += data_len;
-               (*pskb)->truesize += data_len;
-               (*pfrag)++;
-       }
+       /* first fill the linear space */
+       if (!skb->len) {
+               unsigned int linear = min(data_len, skb_tailroom(skb));
   
+               skb_put_data(skb, element->addr + offset, linear);
+               data_len -= linear;
+               if (!data_len)
+                       return;
+               offset += linear;
+               /* fall through to add page frag for remaining data */
+       }
   
-       return 0;
+       next_frag = skb_shinfo(skb)->nr_frags;
+       get_page(page);
+       skb_add_rx_frag(skb, next_frag, page, offset, data_len, data_len);
   }
   
   static inline int qeth_is_last_sbale(struct qdio_buffer_element *sbale)
@@@ -5267,22 -5205,19 +5205,19 @@@ struct sk_buff *qeth_core_get_next_skb(
         struct qdio_buffer_element *element = *__element;
         struct qdio_buffer *buffer = qethbuffer->buffer;
         int offset = *__offset;
-       struct sk_buff *skb = NULL;
+       struct sk_buff *skb;
         int skb_len = 0;
         void *data_ptr;
         int data_len;
         int headroom = 0;
         int use_rx_sg = 0;
-       int frag = 0;
   
         /* qeth_hdr must not cross element boundaries */
-       if (element->length < offset + sizeof(struct qeth_hdr)) {
+       while (element->length < offset + sizeof(struct qeth_hdr)) {
                 if (qeth_is_last_sbale(element))
                         return NULL;
                 element++;
                 offset = 0;
-               if (element->length < sizeof(struct qeth_hdr))
-                       return NULL;
         }
         *hdr = element->addr + offset;
   
@@@ -5309,27 -5244,32 +5244,32 @@@
         if (((skb_len >= card->options.rx_sg_cb) &&
              (!(card->info.type == QETH_CARD_TYPE_OSN)) &&
              (!atomic_read(&card->force_alloc_skb))) ||
-           (card->options.cq == QETH_CQ_ENABLED)) {
+           (card->options.cq == QETH_CQ_ENABLED))
                 use_rx_sg = 1;
+ 
+       if (use_rx_sg && qethbuffer->rx_skb) {
+               /* QETH_CQ_ENABLED only: */
+               skb = qethbuffer->rx_skb;
+               qethbuffer->rx_skb = NULL;
         } else {
-               skb = dev_alloc_skb(skb_len + headroom);
-               if (!skb)
-                       goto no_mem;
-               if (headroom)
-                       skb_reserve(skb, headroom);
+               unsigned int linear = (use_rx_sg) ? QETH_RX_PULL_LEN : skb_len;
+ 
+               skb = dev_alloc_skb(linear + headroom);
         }
+       if (!skb)
+               goto no_mem;
+       if (headroom)
+               skb_reserve(skb, headroom);
   
         data_ptr = element->addr + offset;
         while (skb_len) {
                 data_len = min(skb_len, (int)(element->length - offset));
                 if (data_len) {
-                       if (use_rx_sg) {
-                               if (qeth_create_skb_frag(qethbuffer, element,
-                                   &skb, offset, &frag, data_len))
-                                       goto no_mem;
-                       } else {
+                       if (use_rx_sg)
+                               qeth_create_skb_frag(element, skb, offset,
+                                                    data_len);
+                       else
                                 skb_put_data(skb, data_ptr, data_len);
-                       }
                 }
                 skb_len -= data_len;
                 if (skb_len) {
@@@ -5429,7 -5369,7 +5369,7 @@@ int qeth_poll(struct napi_struct *napi
                 }
         }
   
-       napi_complete(napi);
+       napi_complete_done(napi, work_done);
         if (qdio_start_irq(card->data.ccwdev, 0))
                 napi_schedule(&card->napi);
   out:
@@@ -5737,11 -5677,7 +5677,7 @@@ static int qeth_core_probe_device(struc
         gdev->cdev[1]->handler = qeth_irq;
         gdev->cdev[2]->handler = qeth_irq;
   
-       rc = qeth_determine_card_type(card);
-       if (rc) {
-               QETH_DBF_TEXT_(SETUP, 2, "3err%d", rc);
-               goto err_card;
-       }
+       qeth_determine_card_type(card);
         rc = qeth_setup_card(card);
         if (rc) {
                 QETH_DBF_TEXT_(SETUP, 2, "2err%d", rc);
@@@ -5875,7 -5811,6 +5811,7 @@@ static struct ccwgroup_driver qeth_core
                 .owner = THIS_MODULE,
                 .name = "qeth",
         },
+ +      .ccw_driver = &qeth_ccw_driver,
         .setup = qeth_core_probe_device,
         .remove = qeth_core_remove_device,
         .set_online = qeth_core_set_online,
@@@ -6417,32 -6352,29 +6353,29 @@@ static int qeth_set_ipa_tso(struct qeth
         return rc;
   }
   
- /* try to restore device features on a device after recovery */
- int qeth_recover_features(struct net_device *dev)
+ #define QETH_HW_FEATURES (NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_TSO)
+ 
+ /**
+  * qeth_recover_features() - Restore device features after recovery
+  * @dev:      the recovering net_device
+  *
+  * Caller must hold rtnl lock.
+  */
+ void qeth_recover_features(struct net_device *dev)
   {
+       netdev_features_t features = dev->features;
         struct qeth_card *card = dev->ml_priv;
-       netdev_features_t recover = dev->features;
   
-       if (recover & NETIF_F_IP_CSUM) {
-               if (qeth_set_ipa_csum(card, 1, IPA_OUTBOUND_CHECKSUM))
-                       recover ^= NETIF_F_IP_CSUM;
-       }
-       if (recover & NETIF_F_RXCSUM) {
-               if (qeth_set_ipa_csum(card, 1, IPA_INBOUND_CHECKSUM))
-                       recover ^= NETIF_F_RXCSUM;
-       }
-       if (recover & NETIF_F_TSO) {
-               if (qeth_set_ipa_tso(card, 1))
-                       recover ^= NETIF_F_TSO;
-       }
- 
-       if (recover == dev->features)
-               return 0;
+       /* force-off any feature that needs an IPA sequence.
+        * netdev_update_features() will restart them.
+        */
+       dev->features &= ~QETH_HW_FEATURES;
+       netdev_update_features(dev);
   
+       if (features == dev->features)
+               return;
         dev_warn(&card->gdev->dev,
                  "Device recovery failed to restore all offload features\n");
-       dev->features = recover;
-       return -EIO;
   }
   EXPORT_SYMBOL_GPL(qeth_recover_features);
   
@@@ -6499,8 -6431,7 +6432,7 @@@ netdev_features_t qeth_fix_features(str
         /* if the card isn't up, remove features that require hw changes */
         if (card->state == CARD_STATE_DOWN ||
             card->state == CARD_STATE_RECOVER)
-               features = features & ~(NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
-                                       NETIF_F_TSO);
+               features &= ~QETH_HW_FEATURES;
         QETH_DBF_HEX(SETUP, 2, &features, sizeof(features));
         return features;
   }
diff --combined include/linux/dynamic_queue_limits.h

index 023eae69398c4c393ea18f698b4957c7ee7fbaf1,be12147315059730e4e7c2e0dc24d13e27beee7c..99fc06f0afc1b6c818ddc19b09ecdebc29d2bed2
--- 1/include/linux/dynamic_queue_limits.h
--- 2/include/linux/dynamic_queue_limits.h
+++ b/include/linux/dynamic_queue_limits.h
@@@ -89,7 -89,7 +89,7 @@@ static inline void dql_queued(struct dq
   /* Returns how many objects can be queued, < 0 indicates over limit. */
   static inline int dql_avail(const struct dql *dql)
   {
- -      return ACCESS_ONCE(dql->adj_limit) - ACCESS_ONCE(dql->num_queued);
+ +      return READ_ONCE(dql->adj_limit) - READ_ONCE(dql->num_queued);
   }
   
   /* Record number of completed objects and recalculate the limit. */
@@@ -99,7 -99,7 +99,7 @@@ void dql_completed(struct dql *dql, uns
   void dql_reset(struct dql *dql);
   
   /* Initialize dql state */
- int dql_init(struct dql *dql, unsigned hold_time);
+ void dql_init(struct dql *dql, unsigned int hold_time);
   
   #endif /* _KERNEL_ */
   
diff --combined include/linux/of.h

index 43b5034c6a1e0f4f931bc2ff60ee8fc50cdd8d4c,b32d418d011a977e511e95cbc9f9aa4df4394af6..d3dea1d1e3a95ff27dc56c14cc2dcd89565be1a6
--- 1/include/linux/of.h
--- 2/include/linux/of.h
+++ b/include/linux/of.h
@@@ -37,15 -37,9 +37,15 @@@ struct property 
         int     length;
         void    *value;
         struct property *next;
+ +#if defined(CONFIG_OF_DYNAMIC) || defined(CONFIG_SPARC)
         unsigned long _flags;
+ +#endif
+ +#if defined(CONFIG_OF_PROMTREE)
         unsigned int unique_id;
+ +#endif
+ +#if defined(CONFIG_OF_KOBJ)
         struct bin_attribute attr;
+ +#endif
   };
   
   #if defined(CONFIG_SPARC)
@@@ -64,9 -58,7 +64,9 @@@ struct device_node 
         struct  device_node *parent;
         struct  device_node *child;
         struct  device_node *sibling;
+ +#if defined(CONFIG_OF_KOBJ)
         struct  kobject kobj;
+ +#endif
         unsigned long _flags;
         void    *data;
   #if defined(CONFIG_SPARC)
@@@ -111,17 -103,21 +111,17 @@@ extern struct kobj_type of_node_ktype
   extern const struct fwnode_operations of_fwnode_ops;
   static inline void of_node_init(struct device_node *node)
   {
+ +#if defined(CONFIG_OF_KOBJ)
         kobject_init(&node->kobj, &of_node_ktype);
+ +#endif
         node->fwnode.ops = &of_fwnode_ops;
   }
   
- -/* true when node is initialized */
- -static inline int of_node_is_initialized(struct device_node *node)
- -{
- -      return node && node->kobj.state_initialized;
- -}
- -
- -/* true when node is attached (i.e. present on sysfs) */
- -static inline int of_node_is_attached(struct device_node *node)
- -{
- -      return node && node->kobj.state_in_sysfs;
- -}
+ +#if defined(CONFIG_OF_KOBJ)
+ +#define of_node_kobj(n) (&(n)->kobj)
+ +#else
+ +#define of_node_kobj(n) NULL
+ +#endif
   
   #ifdef CONFIG_OF_DYNAMIC
   extern struct device_node *of_node_get(struct device_node *node);
@@@ -207,7 -203,6 +207,7 @@@ static inline void of_node_clear_flag(s
         clear_bit(flag, &n->_flags);
   }
   
+ +#if defined(CONFIG_OF_DYNAMIC) || defined(CONFIG_SPARC)
   static inline int of_property_check_flag(struct property *p, unsigned long flag)
   {
         return test_bit(flag, &p->_flags);
@@@ -222,7 -217,6 +222,7 @@@ static inline void of_property_clear_fl
   {
         clear_bit(flag, &p->_flags);
   }
+ +#endif
   
   extern struct device_node *__of_find_all_nodes(struct device_node *prev);
   extern struct device_node *of_find_all_nodes(struct device_node *prev);
@@@ -681,12 -675,6 +681,6 @@@ static inline int of_property_count_ele
         return -ENOSYS;
   }
   
- static inline int of_property_read_u32_index(const struct device_node *np,
-                       const char *propname, u32 index, u32 *out_value)
- {
-       return -ENOSYS;
- }
- 
   static inline int of_property_read_u8_array(const struct device_node *np,
                         const char *propname, u8 *out_values, size_t sz)
   {
@@@ -713,16 -701,14 +707,14 @@@ static inline int of_property_read_u64_
         return -ENOSYS;
   }
   
- static inline int of_property_read_string(const struct device_node *np,
-                                         const char *propname,
-                                         const char **out_string)
+ static inline int of_property_read_u32_index(const struct device_node *np,
+                       const char *propname, u32 index, u32 *out_value)
   {
         return -ENOSYS;
   }
   
- static inline int of_property_read_string_helper(const struct device_node *np,
-                                                const char *propname,
-                                                const char **out_strs, size_t sz, int index)
+ static inline int of_property_read_u64_index(const struct device_node *np,
+                       const char *propname, u32 index, u64 *out_value)
   {
         return -ENOSYS;
   }
@@@ -750,12 -736,51 +742,51 @@@ static inline int of_n_size_cells(struc
         return 0;
   }
   
+ static inline int of_property_read_variable_u8_array(const struct device_node *np,
+                                       const char *propname, u8 *out_values,
+                                       size_t sz_min, size_t sz_max)
+ {
+       return -ENOSYS;
+ }
+ 
+ static inline int of_property_read_variable_u16_array(const struct device_node *np,
+                                       const char *propname, u16 *out_values,
+                                       size_t sz_min, size_t sz_max)
+ {
+       return -ENOSYS;
+ }
+ 
+ static inline int of_property_read_variable_u32_array(const struct device_node *np,
+                                       const char *propname,
+                                       u32 *out_values,
+                                       size_t sz_min,
+                                       size_t sz_max)
+ {
+       return -ENOSYS;
+ }
+ 
   static inline int of_property_read_u64(const struct device_node *np,
                                        const char *propname, u64 *out_value)
   {
         return -ENOSYS;
   }
   
+ static inline int of_property_read_variable_u64_array(const struct device_node *np,
+                                       const char *propname,
+                                       u64 *out_values,
+                                       size_t sz_min,
+                                       size_t sz_max)
+ {
+       return -ENOSYS;
+ }
+ 
+ static inline int of_property_read_string(const struct device_node *np,
+                                         const char *propname,
+                                         const char **out_string)
+ {
+       return -ENOSYS;
+ }
+ 
   static inline int of_property_match_string(const struct device_node *np,
                                            const char *propname,
                                            const char *string)
@@@ -763,6 -788,13 +794,13 @@@
         return -ENOSYS;
   }
   
+ static inline int of_property_read_string_helper(const struct device_node *np,
+                                                const char *propname,
+                                                const char **out_strs, size_t sz, int index)
+ {
+       return -ENOSYS;
+ }
+ 
   static inline struct device_node *of_parse_phandle(const struct device_node *np,
                                                    const char *phandle_name,
                                                    int index)
@@@ -1289,6 -1321,9 +1327,6 @@@ static inline int of_reconfig_get_state
   }
   #endif /* CONFIG_OF_DYNAMIC */
   
- -/* CONFIG_OF_RESOLVE api */
- -extern int of_resolve_phandles(struct device_node *tree);
- -
   /**
    * of_device_is_system_power_controller - Tells if system-power-controller is found for device_node
    * @np: Pointer to the given device_node
@@@ -1305,7 -1340,7 +1343,7 @@@ static inline bool of_device_is_system_
    */
   
   enum of_overlay_notify_action {
- -      OF_OVERLAY_PRE_APPLY,
+ +      OF_OVERLAY_PRE_APPLY = 0,
         OF_OVERLAY_POST_APPLY,
         OF_OVERLAY_PRE_REMOVE,
         OF_OVERLAY_POST_REMOVE,
@@@ -1319,26 -1354,26 +1357,26 @@@ struct of_overlay_notify_data 
   #ifdef CONFIG_OF_OVERLAY
   
   /* ID based overlays; the API for external users */
- -int of_overlay_create(struct device_node *tree);
- -int of_overlay_destroy(int id);
- -int of_overlay_destroy_all(void);
+ +int of_overlay_apply(struct device_node *tree, int *ovcs_id);
+ +int of_overlay_remove(int *ovcs_id);
+ +int of_overlay_remove_all(void);
   
   int of_overlay_notifier_register(struct notifier_block *nb);
   int of_overlay_notifier_unregister(struct notifier_block *nb);
   
   #else
   
- -static inline int of_overlay_create(struct device_node *tree)
+ +static inline int of_overlay_apply(struct device_node *tree, int *ovcs_id)
   {
         return -ENOTSUPP;
   }
   
- -static inline int of_overlay_destroy(int id)
+ +static inline int of_overlay_remove(int *ovcs_id)
   {
         return -ENOTSUPP;
   }
   
- -static inline int of_overlay_destroy_all(void)
+ +static inline int of_overlay_remove_all(void)
   {
         return -ENOTSUPP;
   }
diff --combined include/linux/rtnetlink.h

index 54bcd970bfd3c9586ac2be2d836ebe72f18261bf,d090d466cd0bbe09b5bc8141c6de55ac8fa13a56..2032ce2eb20bff492698a1309aa043470de0991f
--- 1/include/linux/rtnetlink.h
--- 2/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@@ -18,9 -18,11 +18,11 @@@ extern int rtnl_put_cacheinfo(struct sk
                               u32 id, long expires, u32 error);
   
   void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change, gfp_t flags);
+ void rtmsg_ifinfo_newnet(int type, struct net_device *dev, unsigned int change,
+                        gfp_t flags, int *new_nsid);
   struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
                                        unsigned change, u32 event,
-                                      gfp_t flags);
+                                      gfp_t flags, int *new_nsid);
   void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev,
                        gfp_t flags);
   
@@@ -68,7 -70,7 +70,7 @@@ static inline bool lockdep_rtnl_is_held
    * @p: The pointer to read, prior to dereferencing
    *
    * Return the value of the specified RCU-protected pointer, but omit
- - * both the smp_read_barrier_depends() and the ACCESS_ONCE(), because
+ + * both the smp_read_barrier_depends() and the READ_ONCE(), because
    * caller holds RTNL.
    */
   #define rtnl_dereference(p)                                   \
diff --combined include/net/netfilter/nf_tables.h

index 470c1c71e7f4443e296f031e92d4743385a4610e,01570a8f298217bc08c8972e2202e0ce9f5739e3..fecc6112c768a7aa0acf711fd89130eaf53d7fd1
--- 1/include/net/netfilter/nf_tables.h
--- 2/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@@ -312,6 -312,7 +312,7 @@@ struct nft_expr
    *    @flush: deactivate element in the next generation
    *    @remove: remove element from set
    *    @walk: iterate over all set elemeennts
+  *    @get: get set elements
    *    @privsize: function to return size of set private data
    *    @init: initialize private data of new set instance
    *    @destroy: destroy private data of set instance
@@@ -351,6 -352,10 +352,10 @@@ struct nft_set_ops 
         void                            (*walk)(const struct nft_ctx *ctx,
                                                 struct nft_set *set,
                                                 struct nft_set_iter *iter);
+       void *                          (*get)(const struct net *net,
+                                              const struct nft_set *set,
+                                              const struct nft_set_elem *elem,
+                                              unsigned int flags);
   
         unsigned int                    (*privsize)(const struct nlattr * const nla[],
                                                     const struct nft_set_desc *desc);
@@@ -1165,8 -1170,8 +1170,8 @@@ static inline u8 nft_genmask_next(cons
   
   static inline u8 nft_genmask_cur(const struct net *net)
   {
- -      /* Use ACCESS_ONCE() to prevent refetching the value for atomicity */
- -      return 1 << ACCESS_ONCE(net->nft.gencursor);
+ +      /* Use READ_ONCE() to prevent refetching the value for atomicity */
+ +      return 1 << READ_ONCE(net->nft.gencursor);
   }
   
   #define NFT_GENMASK_ANY               ((1 << 0) | (1 << 1))
diff --combined kernel/events/core.c

index 4c39c05e029a7f357cb872ffa982d6ca2381fddb,42d24bd64ea4676ba740495f58504918c6db315d..3939a4674e0ae48395f290edc20d61f4c307c89b
--- 1/kernel/events/core.c
--- 2/kernel/events/core.c
+++ b/kernel/events/core.c
@@@ -209,7 -209,7 +209,7 @@@ static int event_function(void *info
         struct perf_event_context *task_ctx = cpuctx->task_ctx;
         int ret = 0;
   
- -      WARN_ON_ONCE(!irqs_disabled());
+ +      lockdep_assert_irqs_disabled();
   
         perf_ctx_lock(cpuctx, task_ctx);
         /*
@@@ -306,7 -306,7 +306,7 @@@ static void event_function_local(struc
         struct task_struct *task = READ_ONCE(ctx->task);
         struct perf_event_context *task_ctx = NULL;
   
- -      WARN_ON_ONCE(!irqs_disabled());
+ +      lockdep_assert_irqs_disabled();
   
         if (task) {
                 if (task == TASK_TOMBSTONE)
@@@ -582,88 -582,6 +582,88 @@@ static inline u64 perf_event_clock(stru
         return event->clock();
   }
   
+ +/*
+ + * State based event timekeeping...
+ + *
+ + * The basic idea is to use event->state to determine which (if any) time
+ + * fields to increment with the current delta. This means we only need to
+ + * update timestamps when we change state or when they are explicitly requested
+ + * (read).
+ + *
+ + * Event groups make things a little more complicated, but not terribly so. The
+ + * rules for a group are that if the group leader is OFF the entire group is
+ + * OFF, irrespecive of what the group member states are. This results in
+ + * __perf_effective_state().
+ + *
+ + * A futher ramification is that when a group leader flips between OFF and
+ + * !OFF, we need to update all group member times.
+ + *
+ + *
+ + * NOTE: perf_event_time() is based on the (cgroup) context time, and thus we
+ + * need to make sure the relevant context time is updated before we try and
+ + * update our timestamps.
+ + */
+ +
+ +static __always_inline enum perf_event_state
+ +__perf_effective_state(struct perf_event *event)
+ +{
+ +      struct perf_event *leader = event->group_leader;
+ +
+ +      if (leader->state <= PERF_EVENT_STATE_OFF)
+ +              return leader->state;
+ +
+ +      return event->state;
+ +}
+ +
+ +static __always_inline void
+ +__perf_update_times(struct perf_event *event, u64 now, u64 *enabled, u64 *running)
+ +{
+ +      enum perf_event_state state = __perf_effective_state(event);
+ +      u64 delta = now - event->tstamp;
+ +
+ +      *enabled = event->total_time_enabled;
+ +      if (state >= PERF_EVENT_STATE_INACTIVE)
+ +              *enabled += delta;
+ +
+ +      *running = event->total_time_running;
+ +      if (state >= PERF_EVENT_STATE_ACTIVE)
+ +              *running += delta;
+ +}
+ +
+ +static void perf_event_update_time(struct perf_event *event)
+ +{
+ +      u64 now = perf_event_time(event);
+ +
+ +      __perf_update_times(event, now, &event->total_time_enabled,
+ +                                      &event->total_time_running);
+ +      event->tstamp = now;
+ +}
+ +
+ +static void perf_event_update_sibling_time(struct perf_event *leader)
+ +{
+ +      struct perf_event *sibling;
+ +
+ +      list_for_each_entry(sibling, &leader->sibling_list, group_entry)
+ +              perf_event_update_time(sibling);
+ +}
+ +
+ +static void
+ +perf_event_set_state(struct perf_event *event, enum perf_event_state state)
+ +{
+ +      if (event->state == state)
+ +              return;
+ +
+ +      perf_event_update_time(event);
+ +      /*
+ +       * If a group leader gets enabled/disabled all its siblings
+ +       * are affected too.
+ +       */
+ +      if ((event->state < 0) ^ (state < 0))
+ +              perf_event_update_sibling_time(event);
+ +
+ +      WRITE_ONCE(event->state, state);
+ +}
+ +
   #ifdef CONFIG_CGROUP_PERF
   
   static inline bool
@@@ -923,6 -841,40 +923,6 @@@ perf_cgroup_set_shadow_time(struct perf
         event->shadow_ctx_time = now - t->timestamp;
   }
   
- -static inline void
- -perf_cgroup_defer_enabled(struct perf_event *event)
- -{
- -      /*
- -       * when the current task's perf cgroup does not match
- -       * the event's, we need to remember to call the
- -       * perf_mark_enable() function the first time a task with
- -       * a matching perf cgroup is scheduled in.
- -       */
- -      if (is_cgroup_event(event) && !perf_cgroup_match(event))
- -              event->cgrp_defer_enabled = 1;
- -}
- -
- -static inline void
- -perf_cgroup_mark_enabled(struct perf_event *event,
- -                       struct perf_event_context *ctx)
- -{
- -      struct perf_event *sub;
- -      u64 tstamp = perf_event_time(event);
- -
- -      if (!event->cgrp_defer_enabled)
- -              return;
- -
- -      event->cgrp_defer_enabled = 0;
- -
- -      event->tstamp_enabled = tstamp - event->total_time_enabled;
- -      list_for_each_entry(sub, &event->sibling_list, group_entry) {
- -              if (sub->state >= PERF_EVENT_STATE_INACTIVE) {
- -                      sub->tstamp_enabled = tstamp - sub->total_time_enabled;
- -                      sub->cgrp_defer_enabled = 0;
- -              }
- -      }
- -}
- -
   /*
    * Update cpuctx->cgrp so that it is set when first cgroup event is added and
    * cleared when last cgroup event is removed.
@@@ -1022,6 -974,17 +1022,6 @@@ static inline u64 perf_cgroup_event_tim
         return 0;
   }
   
- -static inline void
- -perf_cgroup_defer_enabled(struct perf_event *event)
- -{
- -}
- -
- -static inline void
- -perf_cgroup_mark_enabled(struct perf_event *event,
- -                       struct perf_event_context *ctx)
- -{
- -}
- -
   static inline void
   list_update_cgroup_event(struct perf_event *event,
                          struct perf_event_context *ctx, bool add)
@@@ -1043,7 -1006,7 +1043,7 @@@ static enum hrtimer_restart perf_mux_hr
         struct perf_cpu_context *cpuctx;
         int rotations = 0;
   
- -      WARN_ON(!irqs_disabled());
+ +      lockdep_assert_irqs_disabled();
   
         cpuctx = container_of(hr, struct perf_cpu_context, hrtimer);
         rotations = perf_rotate_context(cpuctx);
@@@ -1130,7 -1093,7 +1130,7 @@@ static void perf_event_ctx_activate(str
   {
         struct list_head *head = this_cpu_ptr(&active_ctx_list);
   
- -      WARN_ON(!irqs_disabled());
+ +      lockdep_assert_irqs_disabled();
   
         WARN_ON(!list_empty(&ctx->active_ctx_list));
   
@@@ -1139,7 -1102,7 +1139,7 @@@
   
   static void perf_event_ctx_deactivate(struct perf_event_context *ctx)
   {
- -      WARN_ON(!irqs_disabled());
+ +      lockdep_assert_irqs_disabled();
   
         WARN_ON(list_empty(&ctx->active_ctx_list));
   
@@@ -1239,7 -1202,7 +1239,7 @@@ perf_event_ctx_lock_nested(struct perf_
   
   again:
         rcu_read_lock();
- -      ctx = ACCESS_ONCE(event->ctx);
+ +      ctx = READ_ONCE(event->ctx);
         if (!atomic_inc_not_zero(&ctx->refcount)) {
                 rcu_read_unlock();
                 goto again;
@@@ -1435,6 -1398,60 +1435,6 @@@ static u64 perf_event_time(struct perf_
         return ctx ? ctx->time : 0;
   }
   
- -/*
- - * Update the total_time_enabled and total_time_running fields for a event.
- - */
- -static void update_event_times(struct perf_event *event)
- -{
- -      struct perf_event_context *ctx = event->ctx;
- -      u64 run_end;
- -
- -      lockdep_assert_held(&ctx->lock);
- -
- -      if (event->state < PERF_EVENT_STATE_INACTIVE ||
- -          event->group_leader->state < PERF_EVENT_STATE_INACTIVE)
- -              return;
- -
- -      /*
- -       * in cgroup mode, time_enabled represents
- -       * the time the event was enabled AND active
- -       * tasks were in the monitored cgroup. This is
- -       * independent of the activity of the context as
- -       * there may be a mix of cgroup and non-cgroup events.
- -       *
- -       * That is why we treat cgroup events differently
- -       * here.
- -       */
- -      if (is_cgroup_event(event))
- -              run_end = perf_cgroup_event_time(event);
- -      else if (ctx->is_active)
- -              run_end = ctx->time;
- -      else
- -              run_end = event->tstamp_stopped;
- -
- -      event->total_time_enabled = run_end - event->tstamp_enabled;
- -
- -      if (event->state == PERF_EVENT_STATE_INACTIVE)
- -              run_end = event->tstamp_stopped;
- -      else
- -              run_end = perf_event_time(event);
- -
- -      event->total_time_running = run_end - event->tstamp_running;
- -
- -}
- -
- -/*
- - * Update total_time_enabled and total_time_running for all events in a group.
- - */
- -static void update_group_times(struct perf_event *leader)
- -{
- -      struct perf_event *event;
- -
- -      update_event_times(leader);
- -      list_for_each_entry(event, &leader->sibling_list, group_entry)
- -              update_event_times(event);
- -}
- -
   static enum event_type_t get_event_type(struct perf_event *event)
   {
         struct perf_event_context *ctx = event->ctx;
@@@ -1477,8 -1494,6 +1477,8 @@@ list_add_event(struct perf_event *event
         WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
         event->attach_state |= PERF_ATTACH_CONTEXT;
   
+ +      event->tstamp = perf_event_time(event);
+ +
         /*
          * If we're a stand alone event or group leader, we go to the context
          * list, group events are kept attached to the group so that
@@@ -1686,6 -1701,8 +1686,6 @@@ list_del_event(struct perf_event *event
         if (event->group_leader == event)
                 list_del_init(&event->group_entry);
   
- -      update_group_times(event);
- -
         /*
          * If event was in error state, then keep it
          * that way, otherwise bogus counts will be
@@@ -1694,7 -1711,7 +1694,7 @@@
          * of the event
          */
         if (event->state > PERF_EVENT_STATE_OFF)
- -              event->state = PERF_EVENT_STATE_OFF;
+ +              perf_event_set_state(event, PERF_EVENT_STATE_OFF);
   
         ctx->generation++;
   }
@@@ -1793,24 -1810,38 +1793,24 @@@ event_sched_out(struct perf_event *even
                   struct perf_cpu_context *cpuctx,
                   struct perf_event_context *ctx)
   {
- -      u64 tstamp = perf_event_time(event);
- -      u64 delta;
+ +      enum perf_event_state state = PERF_EVENT_STATE_INACTIVE;
   
         WARN_ON_ONCE(event->ctx != ctx);
         lockdep_assert_held(&ctx->lock);
   
- -      /*
- -       * An event which could not be activated because of
- -       * filter mismatch still needs to have its timings
- -       * maintained, otherwise bogus information is return
- -       * via read() for time_enabled, time_running:
- -       */
- -      if (event->state == PERF_EVENT_STATE_INACTIVE &&
- -          !event_filter_match(event)) {
- -              delta = tstamp - event->tstamp_stopped;
- -              event->tstamp_running += delta;
- -              event->tstamp_stopped = tstamp;
- -      }
- -
         if (event->state != PERF_EVENT_STATE_ACTIVE)
                 return;
   
         perf_pmu_disable(event->pmu);
   
- -      event->tstamp_stopped = tstamp;
         event->pmu->del(event, 0);
         event->oncpu = -1;
- -      event->state = PERF_EVENT_STATE_INACTIVE;
+ +
         if (event->pending_disable) {
                 event->pending_disable = 0;
- -              event->state = PERF_EVENT_STATE_OFF;
+ +              state = PERF_EVENT_STATE_OFF;
         }
+ +      perf_event_set_state(event, state);
   
         if (!is_software_event(event))
                 cpuctx->active_oncpu--;
@@@ -1830,9 -1861,7 +1830,9 @@@ group_sched_out(struct perf_event *grou
                 struct perf_event_context *ctx)
   {
         struct perf_event *event;
- -      int state = group_event->state;
+ +
+ +      if (group_event->state != PERF_EVENT_STATE_ACTIVE)
+ +              return;
   
         perf_pmu_disable(ctx->pmu);
   
@@@ -1846,7 -1875,7 +1846,7 @@@
   
         perf_pmu_enable(ctx->pmu);
   
- -      if (state == PERF_EVENT_STATE_ACTIVE && group_event->attr.exclusive)
+ +      if (group_event->attr.exclusive)
                 cpuctx->exclusive = 0;
   }
   
@@@ -1866,11 -1895,6 +1866,11 @@@ __perf_remove_from_context(struct perf_
   {
         unsigned long flags = (unsigned long)info;
   
+ +      if (ctx->is_active & EVENT_TIME) {
+ +              update_context_time(ctx);
+ +              update_cgrp_time_from_cpuctx(cpuctx);
+ +      }
+ +
         event_sched_out(event, cpuctx, ctx);
         if (flags & DETACH_GROUP)
                 perf_group_detach(event);
@@@ -1933,17 -1957,14 +1933,17 @@@ static void __perf_event_disable(struc
         if (event->state < PERF_EVENT_STATE_INACTIVE)
                 return;
   
- -      update_context_time(ctx);
- -      update_cgrp_time_from_event(event);
- -      update_group_times(event);
+ +      if (ctx->is_active & EVENT_TIME) {
+ +              update_context_time(ctx);
+ +              update_cgrp_time_from_event(event);
+ +      }
+ +
         if (event == event->group_leader)
                 group_sched_out(event, cpuctx, ctx);
         else
                 event_sched_out(event, cpuctx, ctx);
- -      event->state = PERF_EVENT_STATE_OFF;
+ +
+ +      perf_event_set_state(event, PERF_EVENT_STATE_OFF);
   }
   
   /*
@@@ -2000,7 -2021,8 +2000,7 @@@ void perf_event_disable_inatomic(struc
   }
   
   static void perf_set_shadow_time(struct perf_event *event,
- -                               struct perf_event_context *ctx,
- -                               u64 tstamp)
+ +                               struct perf_event_context *ctx)
   {
         /*
          * use the correct time source for the time snapshot
@@@ -2028,9 -2050,9 +2028,9 @@@
          * is cleaner and simpler to understand.
          */
         if (is_cgroup_event(event))
- -              perf_cgroup_set_shadow_time(event, tstamp);
+ +              perf_cgroup_set_shadow_time(event, event->tstamp);
         else
- -              event->shadow_ctx_time = tstamp - ctx->timestamp;
+ +              event->shadow_ctx_time = event->tstamp - ctx->timestamp;
   }
   
   #define MAX_INTERRUPTS (~0ULL)
@@@ -2043,6 -2065,7 +2043,6 @@@ event_sched_in(struct perf_event *event
                  struct perf_cpu_context *cpuctx,
                  struct perf_event_context *ctx)
   {
- -      u64 tstamp = perf_event_time(event);
         int ret = 0;
   
         lockdep_assert_held(&ctx->lock);
@@@ -2052,12 -2075,11 +2052,12 @@@
   
         WRITE_ONCE(event->oncpu, smp_processor_id());
         /*
- -       * Order event::oncpu write to happen before the ACTIVE state
- -       * is visible.
+ +       * Order event::oncpu write to happen before the ACTIVE state is
+ +       * visible. This allows perf_event_{stop,read}() to observe the correct
+ +       * ->oncpu if it sees ACTIVE.
          */
         smp_wmb();
- -      WRITE_ONCE(event->state, PERF_EVENT_STATE_ACTIVE);
+ +      perf_event_set_state(event, PERF_EVENT_STATE_ACTIVE);
   
         /*
          * Unthrottle events, since we scheduled we might have missed several
@@@ -2069,19 -2091,26 +2069,19 @@@
                 event->hw.interrupts = 0;
         }
   
- -      /*
- -       * The new state must be visible before we turn it on in the hardware:
- -       */
- -      smp_wmb();
- -
         perf_pmu_disable(event->pmu);
   
- -      perf_set_shadow_time(event, ctx, tstamp);
+ +      perf_set_shadow_time(event, ctx);
   
         perf_log_itrace_start(event);
   
         if (event->pmu->add(event, PERF_EF_START)) {
- -              event->state = PERF_EVENT_STATE_INACTIVE;
+ +              perf_event_set_state(event, PERF_EVENT_STATE_INACTIVE);
                 event->oncpu = -1;
                 ret = -EAGAIN;
                 goto out;
         }
   
- -      event->tstamp_running += tstamp - event->tstamp_stopped;
- -
         if (!is_software_event(event))
                 cpuctx->active_oncpu++;
         if (!ctx->nr_active++)
@@@ -2105,6 -2134,8 +2105,6 @@@ group_sched_in(struct perf_event *group
   {
         struct perf_event *event, *partial_group = NULL;
         struct pmu *pmu = ctx->pmu;
- -      u64 now = ctx->time;
- -      bool simulate = false;
   
         if (group_event->state == PERF_EVENT_STATE_OFF)
                 return 0;
@@@ -2134,13 -2165,27 +2134,13 @@@ group_error
         /*
          * Groups can be scheduled in as one unit only, so undo any
          * partial group before returning:
- -       * The events up to the failed event are scheduled out normally,
- -       * tstamp_stopped will be updated.
- -       *
- -       * The failed events and the remaining siblings need to have
- -       * their timings updated as if they had gone thru event_sched_in()
- -       * and event_sched_out(). This is required to get consistent timings
- -       * across the group. This also takes care of the case where the group
- -       * could never be scheduled by ensuring tstamp_stopped is set to mark
- -       * the time the event was actually stopped, such that time delta
- -       * calculation in update_event_times() is correct.
+ +       * The events up to the failed event are scheduled out normally.
          */
         list_for_each_entry(event, &group_event->sibling_list, group_entry) {
                 if (event == partial_group)
- -                      simulate = true;
+ +                      break;
   
- -              if (simulate) {
- -                      event->tstamp_running += now - event->tstamp_stopped;
- -                      event->tstamp_stopped = now;
- -              } else {
- -                      event_sched_out(event, cpuctx, ctx);
- -              }
+ +              event_sched_out(event, cpuctx, ctx);
         }
         event_sched_out(group_event, cpuctx, ctx);
   
@@@ -2182,11 -2227,46 +2182,11 @@@ static int group_can_go_on(struct perf_
         return can_add_hw;
   }
   
- -/*
- - * Complement to update_event_times(). This computes the tstamp_* values to
- - * continue 'enabled' state from @now, and effectively discards the time
- - * between the prior tstamp_stopped and now (as we were in the OFF state, or
- - * just switched (context) time base).
- - *
- - * This further assumes '@event->state == INACTIVE' (we just came from OFF) and
- - * cannot have been scheduled in yet. And going into INACTIVE state means
- - * '@event->tstamp_stopped = @now'.
- - *
- - * Thus given the rules of update_event_times():
- - *
- - *   total_time_enabled = tstamp_stopped - tstamp_enabled
- - *   total_time_running = tstamp_stopped - tstamp_running
- - *
- - * We can insert 'tstamp_stopped == now' and reverse them to compute new
- - * tstamp_* values.
- - */
- -static void __perf_event_enable_time(struct perf_event *event, u64 now)
- -{
- -      WARN_ON_ONCE(event->state != PERF_EVENT_STATE_INACTIVE);
- -
- -      event->tstamp_stopped = now;
- -      event->tstamp_enabled = now - event->total_time_enabled;
- -      event->tstamp_running = now - event->total_time_running;
- -}
- -
   static void add_event_to_ctx(struct perf_event *event,
                                struct perf_event_context *ctx)
   {
- -      u64 tstamp = perf_event_time(event);
- -
         list_add_event(event, ctx);
         perf_group_attach(event);
- -      /*
- -       * We can be called with event->state == STATE_OFF when we create with
- -       * .disabled = 1. In that case the IOC_ENABLE will call this function.
- -       */
- -      if (event->state == PERF_EVENT_STATE_INACTIVE)
- -              __perf_event_enable_time(event, tstamp);
   }
   
   static void ctx_sched_out(struct perf_event_context *ctx,
@@@ -2417,6 -2497,28 +2417,6 @@@ again
         raw_spin_unlock_irq(&ctx->lock);
   }
   
- -/*
- - * Put a event into inactive state and update time fields.
- - * Enabling the leader of a group effectively enables all
- - * the group members that aren't explicitly disabled, so we
- - * have to update their ->tstamp_enabled also.
- - * Note: this works for group members as well as group leaders
- - * since the non-leader members' sibling_lists will be empty.
- - */
- -static void __perf_event_mark_enabled(struct perf_event *event)
- -{
- -      struct perf_event *sub;
- -      u64 tstamp = perf_event_time(event);
- -
- -      event->state = PERF_EVENT_STATE_INACTIVE;
- -      __perf_event_enable_time(event, tstamp);
- -      list_for_each_entry(sub, &event->sibling_list, group_entry) {
- -              /* XXX should not be > INACTIVE if event isn't */
- -              if (sub->state >= PERF_EVENT_STATE_INACTIVE)
- -                      __perf_event_enable_time(sub, tstamp);
- -      }
- -}
- -
   /*
    * Cross CPU call to enable a performance event
    */
@@@ -2435,12 -2537,14 +2435,12 @@@ static void __perf_event_enable(struct 
         if (ctx->is_active)
                 ctx_sched_out(ctx, cpuctx, EVENT_TIME);
   
- -      __perf_event_mark_enabled(event);
+ +      perf_event_set_state(event, PERF_EVENT_STATE_INACTIVE);
   
         if (!ctx->is_active)
                 return;
   
         if (!event_filter_match(event)) {
- -              if (is_cgroup_event(event))
- -                      perf_cgroup_defer_enabled(event);
                 ctx_sched_in(ctx, cpuctx, EVENT_TIME, current);
                 return;
         }
@@@ -2760,10 -2864,18 +2760,10 @@@ static void __perf_event_sync_stat(stru
          * we know the event must be on the current CPU, therefore we
          * don't need to use it.
          */
- -      switch (event->state) {
- -      case PERF_EVENT_STATE_ACTIVE:
+ +      if (event->state == PERF_EVENT_STATE_ACTIVE)
                 event->pmu->read(event);
- -              /* fall-through */
   
- -      case PERF_EVENT_STATE_INACTIVE:
- -              update_event_times(event);
- -              break;
- -
- -      default:
- -              break;
- -      }
+ +      perf_event_update_time(event);
   
         /*
          * In order to keep per-task stats reliable we need to flip the event
@@@ -3000,6 -3112,10 +3000,6 @@@ ctx_pinned_sched_in(struct perf_event_c
                 if (!event_filter_match(event))
                         continue;
   
- -              /* may need to reset tstamp_enabled */
- -              if (is_cgroup_event(event))
- -                      perf_cgroup_mark_enabled(event, ctx);
- -
                 if (group_can_go_on(event, cpuctx, 1))
                         group_sched_in(event, cpuctx, ctx);
   
@@@ -3007,8 -3123,10 +3007,8 @@@
                  * If this pinned group hasn't been scheduled,
                  * put it in error state.
                  */
- -              if (event->state == PERF_EVENT_STATE_INACTIVE) {
- -                      update_group_times(event);
- -                      event->state = PERF_EVENT_STATE_ERROR;
- -              }
+ +              if (event->state == PERF_EVENT_STATE_INACTIVE)
+ +                      perf_event_set_state(event, PERF_EVENT_STATE_ERROR);
         }
   }
   
@@@ -3030,6 -3148,10 +3030,6 @@@ ctx_flexible_sched_in(struct perf_event
                 if (!event_filter_match(event))
                         continue;
   
- -              /* may need to reset tstamp_enabled */
- -              if (is_cgroup_event(event))
- -                      perf_cgroup_mark_enabled(event, ctx);
- -
                 if (group_can_go_on(event, cpuctx, can_add_hw)) {
                         if (group_sched_in(event, cpuctx, ctx))
                                 can_add_hw = 0;
@@@ -3401,7 -3523,7 +3401,7 @@@ void perf_event_task_tick(void
         struct perf_event_context *ctx, *tmp;
         int throttled;
   
- -      WARN_ON(!irqs_disabled());
+ +      lockdep_assert_irqs_disabled();
   
         __this_cpu_inc(perf_throttled_seq);
         throttled = __this_cpu_xchg(perf_throttled_count, 0);
@@@ -3421,7 -3543,7 +3421,7 @@@ static int event_enable_on_exec(struct 
         if (event->state >= PERF_EVENT_STATE_INACTIVE)
                 return 0;
   
- -      __perf_event_mark_enabled(event);
+ +      perf_event_set_state(event, PERF_EVENT_STATE_INACTIVE);
   
         return 1;
   }
@@@ -3515,15 -3637,12 +3515,15 @@@ static void __perf_event_read(void *inf
                 return;
   
         raw_spin_lock(&ctx->lock);
- -      if (ctx->is_active) {
+ +      if (ctx->is_active & EVENT_TIME) {
                 update_context_time(ctx);
                 update_cgrp_time_from_event(event);
         }
   
- -      update_event_times(event);
+ +      perf_event_update_time(event);
+ +      if (data->group)
+ +              perf_event_update_sibling_time(event);
+ +
         if (event->state != PERF_EVENT_STATE_ACTIVE)
                 goto unlock;
   
@@@ -3538,6 -3657,7 +3538,6 @@@
         pmu->read(event);
   
         list_for_each_entry(sub, &event->sibling_list, group_entry) {
- -              update_event_times(sub);
                 if (sub->state == PERF_EVENT_STATE_ACTIVE) {
                         /*
                          * Use sibling's PMU rather than @event's since
@@@ -3571,6 -3691,7 +3571,6 @@@ int perf_event_read_local(struct perf_e
   {
         unsigned long flags;
         int ret = 0;
- -      u64 now;
   
         /*
          * Disabling interrupts avoids all counter scheduling (context
@@@ -3601,26 -3722,23 +3601,25 @@@
                 goto out;
         }
   
- 
- -      now = event->shadow_ctx_time + perf_clock();
- -      if (enabled)
- -              *enabled = now - event->tstamp_enabled;
         /*
          * If the event is currently on this CPU, its either a per-task event,
          * or local to this CPU. Furthermore it means its ACTIVE (otherwise
          * oncpu == -1).
          */
- -      if (event->oncpu == smp_processor_id()) {
+ +      if (event->oncpu == smp_processor_id())
                 event->pmu->read(event);
- -              if (running)
- -                      *running = now - event->tstamp_running;
- -      } else if (running) {
- -              *running = event->total_time_running;
- -      }
   
         *value = local64_read(&event->count);
+ +      if (enabled || running) {
+ +              u64 now = event->shadow_ctx_time + perf_clock();
+ +              u64 __enabled, __running;
+ +
+ +              __perf_update_times(event, now, &__enabled, &__running);
+ +              if (enabled)
+ +                      *enabled = __enabled;
+ +              if (running)
+ +                      *running = __running;
+ +      }
   out:
         local_irq_restore(flags);
   
@@@ -3629,35 -3747,23 +3628,35 @@@
   
   static int perf_event_read(struct perf_event *event, bool group)
   {
+ +      enum perf_event_state state = READ_ONCE(event->state);
         int event_cpu, ret = 0;
   
         /*
          * If event is enabled and currently active on a CPU, update the
          * value in the event structure:
          */
- -      if (event->state == PERF_EVENT_STATE_ACTIVE) {
- -              struct perf_read_data data = {
- -                      .event = event,
- -                      .group = group,
- -                      .ret = 0,
- -              };
+ +again:
+ +      if (state == PERF_EVENT_STATE_ACTIVE) {
+ +              struct perf_read_data data;
+ +
+ +              /*
+ +               * Orders the ->state and ->oncpu loads such that if we see
+ +               * ACTIVE we must also see the right ->oncpu.
+ +               *
+ +               * Matches the smp_wmb() from event_sched_in().
+ +               */
+ +              smp_rmb();
   
                 event_cpu = READ_ONCE(event->oncpu);
                 if ((unsigned)event_cpu >= nr_cpu_ids)
                         return 0;
   
+ +              data = (struct perf_read_data){
+ +                      .event = event,
+ +                      .group = group,
+ +                      .ret = 0,
+ +              };
+ +
                 preempt_disable();
                 event_cpu = __perf_event_read_cpu(event, event_cpu);
   
@@@ -3674,30 -3780,24 +3673,30 @@@
                 (void)smp_call_function_single(event_cpu, __perf_event_read, &data, 1);
                 preempt_enable();
                 ret = data.ret;
- -      } else if (event->state == PERF_EVENT_STATE_INACTIVE) {
+ +
+ +      } else if (state == PERF_EVENT_STATE_INACTIVE) {
                 struct perf_event_context *ctx = event->ctx;
                 unsigned long flags;
   
                 raw_spin_lock_irqsave(&ctx->lock, flags);
+ +              state = event->state;
+ +              if (state != PERF_EVENT_STATE_INACTIVE) {
+ +                      raw_spin_unlock_irqrestore(&ctx->lock, flags);
+ +                      goto again;
+ +              }
+ +
                 /*
- -               * may read while context is not active
- -               * (e.g., thread is blocked), in that case
- -               * we cannot update context time
+ +               * May read while context is not active (e.g., thread is
+ +               * blocked), in that case we cannot update context time
                  */
- -              if (ctx->is_active) {
+ +              if (ctx->is_active & EVENT_TIME) {
                         update_context_time(ctx);
                         update_cgrp_time_from_event(event);
                 }
+ +
+ +              perf_event_update_time(event);
                 if (group)
- -                      update_group_times(event);
- -              else
- -                      update_event_times(event);
+ +                      perf_event_update_sibling_time(event);
                 raw_spin_unlock_irqrestore(&ctx->lock, flags);
         }
   
@@@ -4143,7 -4243,7 +4142,7 @@@ static void perf_remove_from_owner(stru
          * indeed free this event, otherwise we need to serialize on
          * owner->perf_event_mutex.
          */
- -      owner = lockless_dereference(event->owner);
+ +      owner = READ_ONCE(event->owner);
         if (owner) {
                 /*
                  * Since delayed_put_task_struct() also drops the last
@@@ -4240,7 -4340,7 +4239,7 @@@ again
                  * Cannot change, child events are not migrated, see the
                  * comment with perf_event_ctx_lock_nested().
                  */
- -              ctx = lockless_dereference(child->ctx);
+ +              ctx = READ_ONCE(child->ctx);
                 /*
                  * Since child_mutex nests inside ctx::mutex, we must jump
                  * through hoops. We start by grabbing a reference on the ctx.
@@@ -4300,7 -4400,7 +4299,7 @@@ static int perf_release(struct inode *i
         return 0;
   }
   
- -u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
+ +static u64 __perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
   {
         struct perf_event *child;
         u64 total = 0;
@@@ -4328,18 -4428,6 +4327,18 @@@
   
         return total;
   }
+ +
+ +u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
+ +{
+ +      struct perf_event_context *ctx;
+ +      u64 count;
+ +
+ +      ctx = perf_event_ctx_lock(event);
+ +      count = __perf_event_read_value(event, enabled, running);
+ +      perf_event_ctx_unlock(event, ctx);
+ +
+ +      return count;
+ +}
   EXPORT_SYMBOL_GPL(perf_event_read_value);
   
   static int __perf_read_group_add(struct perf_event *leader,
@@@ -4355,8 -4443,6 +4354,8 @@@
         if (ret)
                 return ret;
   
+ +      raw_spin_lock_irqsave(&ctx->lock, flags);
+ +
         /*
          * Since we co-schedule groups, {enabled,running} times of siblings
          * will be identical to those of the leader, so we only publish one
@@@ -4379,6 -4465,8 +4378,6 @@@
         if (read_format & PERF_FORMAT_ID)
                 values[n++] = primary_event_id(leader);
   
- -      raw_spin_lock_irqsave(&ctx->lock, flags);
- -
         list_for_each_entry(sub, &leader->sibling_list, group_entry) {
                 values[n++] += perf_event_count(sub);
                 if (read_format & PERF_FORMAT_ID)
@@@ -4442,7 -4530,7 +4441,7 @@@ static int perf_read_one(struct perf_ev
         u64 values[4];
         int n = 0;
   
- -      values[n++] = perf_event_read_value(event, &enabled, &running);
+ +      values[n++] = __perf_event_read_value(event, &enabled, &running);
         if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
                 values[n++] = enabled;
         if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
@@@ -4821,7 -4909,8 +4820,7 @@@ static void calc_timer_values(struct pe
   
         *now = perf_clock();
         ctx_time = event->shadow_ctx_time + *now;
- -      *enabled = ctx_time - event->tstamp_enabled;
- -      *running = ctx_time - event->tstamp_running;
+ +      __perf_update_times(event, ctx_time, enabled, running);
   }
   
   static void perf_event_init_userpage(struct perf_event *event)
@@@ -5225,8 -5314,8 +5224,8 @@@ static int perf_mmap(struct file *file
                 if (!rb)
                         goto aux_unlock;
   
- -              aux_offset = ACCESS_ONCE(rb->user_page->aux_offset);
- -              aux_size = ACCESS_ONCE(rb->user_page->aux_size);
+ +              aux_offset = READ_ONCE(rb->user_page->aux_offset);
+ +              aux_size = READ_ONCE(rb->user_page->aux_size);
   
                 if (aux_offset < perf_data_size(rb) + PAGE_SIZE)
                         goto aux_unlock;
@@@ -7867,11 -7956,9 +7866,9 @@@ void perf_trace_run_bpf_submit(void *ra
                                struct pt_regs *regs, struct hlist_head *head,
                                struct task_struct *task)
   {
-       struct bpf_prog *prog = call->prog;
- 
-       if (prog) {
+       if (bpf_prog_array_valid(call)) {
                 *(struct pt_regs **)raw_data = regs;
-               if (!trace_call_bpf(prog, raw_data) || hlist_empty(head)) {
+               if (!trace_call_bpf(call, raw_data) || hlist_empty(head)) {
                         perf_swevent_put_recursion_context(rctx);
                         return;
                 }
@@@ -8060,13 -8147,11 +8057,11 @@@ static int perf_event_set_bpf_prog(stru
   {
         bool is_kprobe, is_tracepoint, is_syscall_tp;
         struct bpf_prog *prog;
+       int ret;
   
         if (event->attr.type != PERF_TYPE_TRACEPOINT)
                 return perf_event_set_bpf_handler(event, prog_fd);
   
-       if (event->tp_event->prog)
-               return -EEXIST;
- 
         is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_UKPROBE;
         is_tracepoint = event->tp_event->flags & TRACE_EVENT_FL_TRACEPOINT;
         is_syscall_tp = is_syscall_trace_event(event->tp_event);
@@@ -8094,26 -8179,20 +8089,20 @@@
                         return -EACCES;
                 }
         }
-       event->tp_event->prog = prog;
-       event->tp_event->bpf_prog_owner = event;
   
-       return 0;
+       ret = perf_event_attach_bpf_prog(event, prog);
+       if (ret)
+               bpf_prog_put(prog);
+       return ret;
   }
   
   static void perf_event_free_bpf_prog(struct perf_event *event)
   {
-       struct bpf_prog *prog;
- 
-       perf_event_free_bpf_handler(event);
- 
-       if (!event->tp_event)
+       if (event->attr.type != PERF_TYPE_TRACEPOINT) {
+               perf_event_free_bpf_handler(event);
                 return;
- 
-       prog = event->tp_event->prog;
-       if (prog && event->tp_event->bpf_prog_owner == event) {
-               event->tp_event->prog = NULL;
-               bpf_prog_put(prog);
         }
+       perf_event_detach_bpf_prog(event);
   }
   
   #else
@@@ -9326,11 -9405,6 +9315,11 @@@ static void account_event(struct perf_e
                 inc = true;
   
         if (inc) {
+ +              /*
+ +               * We need the mutex here because static_branch_enable()
+ +               * must complete *before* the perf_sched_count increment
+ +               * becomes visible.
+ +               */
                 if (atomic_inc_not_zero(&perf_sched_count))
                         goto enabled;
   
@@@ -10456,7 -10530,7 +10445,7 @@@ perf_event_exit_event(struct perf_even
         if (parent_event)
                 perf_group_detach(child_event);
         list_del_event(child_event, child_ctx);
- -      child_event->state = PERF_EVENT_STATE_EXIT; /* is_event_hup() */
+ +      perf_event_set_state(child_event, PERF_EVENT_STATE_EXIT); /* is_event_hup() */
         raw_spin_unlock_irq(&child_ctx->lock);
   
         /*
@@@ -10694,7 -10768,7 +10683,7 @@@ inherit_event(struct perf_event *parent
               struct perf_event *group_leader,
               struct perf_event_context *child_ctx)
   {
- -      enum perf_event_active_state parent_state = parent_event->state;
+ +      enum perf_event_state parent_state = parent_event->state;
         struct perf_event *child_event;
         unsigned long flags;
   
@@@ -11030,7 -11104,6 +11019,7 @@@ static void __perf_event_exit_context(v
         struct perf_event *event;
   
         raw_spin_lock(&ctx->lock);
+ +      ctx_sched_out(ctx, cpuctx, EVENT_TIME);
         list_for_each_entry(event, &ctx->event_list, event_entry)
                 __perf_remove_from_context(event, cpuctx, ctx, (void *)DETACH_GROUP);
         raw_spin_unlock(&ctx->lock);
diff --combined lib/dynamic_queue_limits.c

index da4672a50a54a2046bb86479c57dc11552a1981c,8dbfdf6445f8aa4f9d452b267ef6a6d00e375906..e659a027036ece3714d3261deb252c9d6008ec83
--- 1/lib/dynamic_queue_limits.c
--- 2/lib/dynamic_queue_limits.c
+++ b/lib/dynamic_queue_limits.c
@@@ -21,7 -21,7 +21,7 @@@ void dql_completed(struct dql *dql, uns
         unsigned int ovlimit, completed, num_queued;
         bool all_prev_completed;
   
- -      num_queued = ACCESS_ONCE(dql->num_queued);
+ +      num_queued = READ_ONCE(dql->num_queued);
   
         /* Can't complete more than what's in queue */
         BUG_ON(count > num_queued - dql->num_completed);
@@@ -128,12 -128,11 +128,11 @@@ void dql_reset(struct dql *dql
   }
   EXPORT_SYMBOL(dql_reset);
   
- int dql_init(struct dql *dql, unsigned hold_time)
+ void dql_init(struct dql *dql, unsigned int hold_time)
   {
         dql->max_limit = DQL_MAX_LIMIT;
         dql->min_limit = 0;
         dql->slack_hold_time = hold_time;
         dql_reset(dql);
-       return 0;
   }
   EXPORT_SYMBOL(dql_init);
diff --combined net/atm/mpc.c

index 63138c8c2269cd190b9f8d50cf2d179ee63682f8,883d25778fa47dcadf64b90f451c918ca8bc45fe..e882d8b5db05e889be00fe26a0595458ead470a4
--- 1/net/atm/mpc.c
--- 2/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@@ -95,7 -95,7 +95,7 @@@ static netdev_tx_t mpc_send_packet(stru
   static int mpoa_event_listener(struct notifier_block *mpoa_notifier,
                                unsigned long event, void *dev);
   static void mpc_timer_refresh(void);
- static void mpc_cache_check(unsigned long checking_time);
+ static void mpc_cache_check(struct timer_list *unused);
   
   static struct llc_snap_hdr llc_snap_mpoa_ctrl = {
         0xaa, 0xaa, 0x03,
@@@ -121,7 -121,7 +121,7 @@@ static struct notifier_block mpoa_notif
   
   struct mpoa_client *mpcs = NULL; /* FIXME */
   static struct atm_mpoa_qos *qos_head = NULL;
- -static DEFINE_TIMER(mpc_timer, NULL, 0, 0);
+ +static DEFINE_TIMER(mpc_timer, NULL);
   
   
   static struct mpoa_client *find_mpc_by_itfnum(int itf)
@@@ -799,7 -799,6 +799,6 @@@ static int atm_mpoa_mpoad_attach(struc
         int err;
   
         if (mpcs == NULL) {
-               init_timer(&mpc_timer);
                 mpc_timer_refresh();
   
                 /* This lets us now how our LECs are doing */
@@@ -1408,15 -1407,17 +1407,17 @@@ static void clean_up(struct k_message *
         msg_to_mpoad(msg, mpc);
   }
   
+ static unsigned long checking_time;
+ 
   static void mpc_timer_refresh(void)
   {
         mpc_timer.expires = jiffies + (MPC_P2 * HZ);
-       mpc_timer.data = mpc_timer.expires;
-       mpc_timer.function = mpc_cache_check;
+       checking_time = mpc_timer.expires;
+       mpc_timer.function = (TIMER_FUNC_TYPE)mpc_cache_check;
         add_timer(&mpc_timer);
   }
   
- static void mpc_cache_check(unsigned long checking_time)
+ static void mpc_cache_check(struct timer_list *unused)
   {
         struct mpoa_client *mpc = mpcs;
         static unsigned long previous_resolving_check_time;
diff --combined net/core/dev.c

index 61559ca3980b8d25d5faf6106d52717b16cc4cd3,ad5f90dacd92b9bee1e1f6b209f1d4999954b002..8ee29f4f5fa91894e63734cfee3ee6909fd21b26
--- 1/net/core/dev.c
--- 2/net/core/dev.c
+++ b/net/core/dev.c
@@@ -145,6 -145,7 +145,7 @@@
   #include <linux/crash_dump.h>
   #include <linux/sctp.h>
   #include <net/udp_tunnel.h>
+ #include <linux/net_namespace.h>
   
   #include "net-sysfs.h"
   
@@@ -162,7 -163,6 +163,6 @@@ static struct list_head offload_base __
   
   static int netif_rx_internal(struct sk_buff *skb);
   static int call_netdevice_notifiers_info(unsigned long val,
-                                        struct net_device *dev,
                                          struct netdev_notifier_info *info);
   static struct napi_struct *napi_by_id(unsigned int napi_id);
   
@@@ -188,6 -188,8 +188,8 @@@
   DEFINE_RWLOCK(dev_base_lock);
   EXPORT_SYMBOL(dev_base_lock);
   
+ static DEFINE_MUTEX(ifalias_mutex);
+ 
   /* protects napi_hash addition/deletion and napi_gen_id */
   static DEFINE_SPINLOCK(napi_hash_lock);
   
@@@ -1062,7 -1064,10 +1064,10 @@@ static int __dev_alloc_name(struct net 
         unsigned long *inuse;
         struct net_device *d;
   
-       p = strnchr(name, IFNAMSIZ-1, '%');
+       if (!dev_valid_name(name))
+               return -EINVAL;
+ 
+       p = strchr(name, '%');
         if (p) {
                 /*
                  * Verify the string as this thing may have come from
@@@ -1093,8 -1098,7 +1098,7 @@@
                 free_page((unsigned long) inuse);
         }
   
-       if (buf != name)
-               snprintf(buf, IFNAMSIZ, name, i);
+       snprintf(buf, IFNAMSIZ, name, i);
         if (!__dev_get_by_name(net, buf))
                 return i;
   
@@@ -1102,7 -1106,21 +1106,21 @@@
          * when the name is long and there isn't enough space left
          * for the digits, or if all bits are used.
          */
-       return -ENFILE;
+       return p ? -ENFILE : -EEXIST;
+ }
+ 
+ static int dev_alloc_name_ns(struct net *net,
+                            struct net_device *dev,
+                            const char *name)
+ {
+       char buf[IFNAMSIZ];
+       int ret;
+ 
+       BUG_ON(!net);
+       ret = __dev_alloc_name(net, name, buf);
+       if (ret >= 0)
+               strlcpy(dev->name, buf, IFNAMSIZ);
+       return ret;
   }
   
   /**
@@@ -1121,48 -1139,14 +1139,14 @@@
   
   int dev_alloc_name(struct net_device *dev, const char *name)
   {
-       char buf[IFNAMSIZ];
-       struct net *net;
-       int ret;
- 
-       BUG_ON(!dev_net(dev));
-       net = dev_net(dev);
-       ret = __dev_alloc_name(net, name, buf);
-       if (ret >= 0)
-               strlcpy(dev->name, buf, IFNAMSIZ);
-       return ret;
+       return dev_alloc_name_ns(dev_net(dev), dev, name);
   }
   EXPORT_SYMBOL(dev_alloc_name);
   
- static int dev_alloc_name_ns(struct net *net,
-                            struct net_device *dev,
-                            const char *name)
- {
-       char buf[IFNAMSIZ];
-       int ret;
- 
-       ret = __dev_alloc_name(net, name, buf);
-       if (ret >= 0)
-               strlcpy(dev->name, buf, IFNAMSIZ);
-       return ret;
- }
- 
   int dev_get_valid_name(struct net *net, struct net_device *dev,
                        const char *name)
   {
-       BUG_ON(!net);
- 
-       if (!dev_valid_name(name))
-               return -EINVAL;
- 
-       if (strchr(name, '%'))
-               return dev_alloc_name_ns(net, dev, name);
-       else if (__dev_get_by_name(net, name))
-               return -EEXIST;
-       else if (dev->name != name)
-               strlcpy(dev->name, name, IFNAMSIZ);
- 
-       return 0;
+       return dev_alloc_name_ns(net, dev, name);
   }
   EXPORT_SYMBOL(dev_get_valid_name);
   
@@@ -1265,29 -1249,53 +1249,53 @@@ rollback
    */
   int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
   {
-       char *new_ifalias;
- 
-       ASSERT_RTNL();
+       struct dev_ifalias *new_alias = NULL;
   
         if (len >= IFALIASZ)
                 return -EINVAL;
   
-       if (!len) {
-               kfree(dev->ifalias);
-               dev->ifalias = NULL;
-               return 0;
+       if (len) {
+               new_alias = kmalloc(sizeof(*new_alias) + len + 1, GFP_KERNEL);
+               if (!new_alias)
+                       return -ENOMEM;
+ 
+               memcpy(new_alias->ifalias, alias, len);
+               new_alias->ifalias[len] = 0;
         }
   
-       new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
-       if (!new_ifalias)
-               return -ENOMEM;
-       dev->ifalias = new_ifalias;
-       memcpy(dev->ifalias, alias, len);
-       dev->ifalias[len] = 0;
+       mutex_lock(&ifalias_mutex);
+       rcu_swap_protected(dev->ifalias, new_alias,
+                          mutex_is_locked(&ifalias_mutex));
+       mutex_unlock(&ifalias_mutex);
+ 
+       if (new_alias)
+               kfree_rcu(new_alias, rcuhead);
   
         return len;
   }
   
+ /**
+  *    dev_get_alias - get ifalias of a device
+  *    @dev: device
+  *    @name: buffer to store name of ifalias
+  *    @len: size of buffer
+  *
+  *    get ifalias for a device.  Caller must make sure dev cannot go
+  *    away,  e.g. rcu read lock or own a reference count to device.
+  */
+ int dev_get_alias(const struct net_device *dev, char *name, size_t len)
+ {
+       const struct dev_ifalias *alias;
+       int ret = 0;
+ 
+       rcu_read_lock();
+       alias = rcu_dereference(dev->ifalias);
+       if (alias)
+               ret = snprintf(name, len, "%s", alias->ifalias);
+       rcu_read_unlock();
+ 
+       return ret;
+ }
   
   /**
    *    netdev_features_change - device changes features
@@@ -1312,10 -1320,11 +1320,11 @@@ EXPORT_SYMBOL(netdev_features_change)
   void netdev_state_change(struct net_device *dev)
   {
         if (dev->flags & IFF_UP) {
-               struct netdev_notifier_change_info change_info;
+               struct netdev_notifier_change_info change_info = {
+                       .info.dev = dev,
+               };
   
-               change_info.flags_changed = 0;
-               call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
+               call_netdevice_notifiers_info(NETDEV_CHANGE,
                                               &change_info.info);
                 rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
         }
@@@ -1536,9 -1545,10 +1545,10 @@@ EXPORT_SYMBOL(dev_disable_lro)
   static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
                                    struct net_device *dev)
   {
-       struct netdev_notifier_info info;
+       struct netdev_notifier_info info = {
+               .dev = dev,
+       };
   
-       netdev_notifier_info_init(&info, dev);
         return nb->notifier_call(nb, val, &info);
   }
   
@@@ -1663,11 -1673,9 +1673,9 @@@ EXPORT_SYMBOL(unregister_netdevice_noti
    */
   
   static int call_netdevice_notifiers_info(unsigned long val,
-                                        struct net_device *dev,
                                          struct netdev_notifier_info *info)
   {
         ASSERT_RTNL();
-       netdev_notifier_info_init(info, dev);
         return raw_notifier_call_chain(&netdev_chain, val, info);
   }
   
@@@ -1682,9 -1690,11 +1690,11 @@@
   
   int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
   {
-       struct netdev_notifier_info info;
+       struct netdev_notifier_info info = {
+               .dev = dev,
+       };
   
-       return call_netdevice_notifiers_info(val, dev, &info);
+       return call_netdevice_notifiers_info(val, &info);
   }
   EXPORT_SYMBOL(call_netdevice_notifiers);
   
@@@ -2012,6 -2022,7 +2022,7 @@@ int netdev_txq_to_tc(struct net_device 
   
         return 0;
   }
+ EXPORT_SYMBOL(netdev_txq_to_tc);
   
   #ifdef CONFIG_XPS
   static DEFINE_MUTEX(xps_map_mutex);
@@@ -3245,22 -3256,22 +3256,22 @@@ EXPORT_SYMBOL(dev_loopback_xmit)
   static struct sk_buff *
   sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
   {
-       struct tcf_proto *cl = rcu_dereference_bh(dev->egress_cl_list);
+       struct mini_Qdisc *miniq = rcu_dereference_bh(dev->miniq_egress);
         struct tcf_result cl_res;
   
-       if (!cl)
+       if (!miniq)
                 return skb;
   
         /* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */
-       qdisc_bstats_cpu_update(cl->q, skb);
+       mini_qdisc_bstats_cpu_update(miniq, skb);
   
-       switch (tcf_classify(skb, cl, &cl_res, false)) {
+       switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) {
         case TC_ACT_OK:
         case TC_ACT_RECLASSIFY:
                 skb->tc_index = TC_H_MIN(cl_res.classid);
                 break;
         case TC_ACT_SHOT:
-               qdisc_qstats_cpu_drop(cl->q);
+               mini_qdisc_qstats_cpu_drop(miniq);
                 *ret = NET_XMIT_DROP;
                 kfree_skb(skb);
                 return NULL;
@@@ -3725,7 -3736,7 +3736,7 @@@ bool rps_may_expire_flow(struct net_dev
         flow_table = rcu_dereference(rxqueue->rps_flow_table);
         if (flow_table && flow_id <= flow_table->mask) {
                 rflow = &flow_table->flows[flow_id];
- -              cpu = ACCESS_ONCE(rflow->cpu);
+ +              cpu = READ_ONCE(rflow->cpu);
                 if (rflow->filter == filter_id && cpu < nr_cpu_ids &&
                     ((int)(per_cpu(softnet_data, cpu).input_queue_head -
                            rflow->last_qtail) <
@@@ -3864,8 -3875,8 +3875,8 @@@ drop
   static u32 netif_receive_generic_xdp(struct sk_buff *skb,
                                      struct bpf_prog *xdp_prog)
   {
+       u32 metalen, act = XDP_DROP;
         struct xdp_buff xdp;
-       u32 act = XDP_DROP;
         void *orig_data;
         int hlen, off;
         u32 mac_len;
@@@ -3876,8 -3887,25 +3887,25 @@@
         if (skb_cloned(skb))
                 return XDP_PASS;
   
-       if (skb_linearize(skb))
-               goto do_drop;
+       /* XDP packets must be linear and must have sufficient headroom
+        * of XDP_PACKET_HEADROOM bytes. This is the guarantee that also
+        * native XDP provides, thus we need to do it here as well.
+        */
+       if (skb_is_nonlinear(skb) ||
+           skb_headroom(skb) < XDP_PACKET_HEADROOM) {
+               int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
+               int troom = skb->tail + skb->data_len - skb->end;
+ 
+               /* In case we have to go down the path and also linearize,
+                * then lets do the pskb_expand_head() work just once here.
+                */
+               if (pskb_expand_head(skb,
+                                    hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
+                                    troom > 0 ? troom + 128 : 0, GFP_ATOMIC))
+                       goto do_drop;
+               if (troom > 0 && __skb_linearize(skb))
+                       goto do_drop;
+       }
   
         /* The XDP program wants to see the packet starting at the MAC
          * header.
@@@ -3885,6 -3913,7 +3913,7 @@@
         mac_len = skb->data - skb_mac_header(skb);
         hlen = skb_headlen(skb) + mac_len;
         xdp.data = skb->data - mac_len;
+       xdp.data_meta = xdp.data;
         xdp.data_end = xdp.data + hlen;
         xdp.data_hard_start = skb->data - skb_headroom(skb);
         orig_data = xdp.data;
@@@ -3902,10 -3931,12 +3931,12 @@@
         case XDP_REDIRECT:
         case XDP_TX:
                 __skb_push(skb, mac_len);
-               /* fall through */
+               break;
         case XDP_PASS:
+               metalen = xdp.data - xdp.data_meta;
+               if (metalen)
+                       skb_metadata_set(skb, metalen);
                 break;
- 
         default:
                 bpf_warn_invalid_xdp_action(act);
                 /* fall through */
@@@ -4140,7 -4171,7 +4171,7 @@@ sch_handle_ingress(struct sk_buff *skb
                    struct net_device *orig_dev)
   {
   #ifdef CONFIG_NET_CLS_ACT
-       struct tcf_proto *cl = rcu_dereference_bh(skb->dev->ingress_cl_list);
+       struct mini_Qdisc *miniq = rcu_dereference_bh(skb->dev->miniq_ingress);
         struct tcf_result cl_res;
   
         /* If there's at least one ingress present somewhere (so
@@@ -4148,8 -4179,9 +4179,9 @@@
          * that are not configured with an ingress qdisc will bail
          * out here.
          */
-       if (!cl)
+       if (!miniq)
                 return skb;
+ 
         if (*pt_prev) {
                 *ret = deliver_skb(skb, *pt_prev, orig_dev);
                 *pt_prev = NULL;
@@@ -4157,15 -4189,15 +4189,15 @@@
   
         qdisc_skb_cb(skb)->pkt_len = skb->len;
         skb->tc_at_ingress = 1;
-       qdisc_bstats_cpu_update(cl->q, skb);
+       mini_qdisc_bstats_cpu_update(miniq, skb);
   
-       switch (tcf_classify(skb, cl, &cl_res, false)) {
+       switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) {
         case TC_ACT_OK:
         case TC_ACT_RECLASSIFY:
                 skb->tc_index = TC_H_MIN(cl_res.classid);
                 break;
         case TC_ACT_SHOT:
-               qdisc_qstats_cpu_drop(cl->q);
+               mini_qdisc_qstats_cpu_drop(miniq);
                 kfree_skb(skb);
                 return NULL;
         case TC_ACT_STOLEN:
@@@ -4443,6 -4475,33 +4475,33 @@@ out
         return ret;
   }
   
+ /**
+  *    netif_receive_skb_core - special purpose version of netif_receive_skb
+  *    @skb: buffer to process
+  *
+  *    More direct receive version of netif_receive_skb().  It should
+  *    only be used by callers that have a need to skip RPS and Generic XDP.
+  *    Caller must also take care of handling if (page_is_)pfmemalloc.
+  *
+  *    This function may only be called from softirq context and interrupts
+  *    should be enabled.
+  *
+  *    Return values (usually ignored):
+  *    NET_RX_SUCCESS: no congestion
+  *    NET_RX_DROP: packet was dropped
+  */
+ int netif_receive_skb_core(struct sk_buff *skb)
+ {
+       int ret;
+ 
+       rcu_read_lock();
+       ret = __netif_receive_skb_core(skb, false);
+       rcu_read_unlock();
+ 
+       return ret;
+ }
+ EXPORT_SYMBOL(netif_receive_skb_core);
+ 
   static int __netif_receive_skb(struct sk_buff *skb)
   {
         int ret;
@@@ -4468,7 -4527,7 +4527,7 @@@
         return ret;
   }
   
- static int generic_xdp_install(struct net_device *dev, struct netdev_xdp *xdp)
+ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
   {
         struct bpf_prog *old = rtnl_dereference(dev->xdp_prog);
         struct bpf_prog *new = xdp->prog;
@@@ -4695,6 -4754,7 +4754,7 @@@ static void gro_list_prepare(struct nap
                 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
                 diffs |= p->vlan_tci ^ skb->vlan_tci;
                 diffs |= skb_metadata_dst_cmp(p, skb);
+               diffs |= skb_metadata_differs(p, skb);
                 if (maclen == ETH_HLEN)
                         diffs |= compare_ether_header(skb_mac_header(p),
                                                       skb_mac_header(skb));
@@@ -6228,9 -6288,19 +6288,19 @@@ static void __netdev_adjacent_dev_unlin
   
   static int __netdev_upper_dev_link(struct net_device *dev,
                                    struct net_device *upper_dev, bool master,
-                                  void *upper_priv, void *upper_info)
- {
-       struct netdev_notifier_changeupper_info changeupper_info;
+                                  void *upper_priv, void *upper_info,
+                                  struct netlink_ext_ack *extack)
+ {
+       struct netdev_notifier_changeupper_info changeupper_info = {
+               .info = {
+                       .dev = dev,
+                       .extack = extack,
+               },
+               .upper_dev = upper_dev,
+               .master = master,
+               .linking = true,
+               .upper_info = upper_info,
+       };
         int ret = 0;
   
         ASSERT_RTNL();
@@@ -6248,12 -6318,7 +6318,7 @@@
         if (master && netdev_master_upper_dev_get(dev))
                 return -EBUSY;
   
-       changeupper_info.upper_dev = upper_dev;
-       changeupper_info.master = master;
-       changeupper_info.linking = true;
-       changeupper_info.upper_info = upper_info;
- 
-       ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
+       ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER,
                                             &changeupper_info.info);
         ret = notifier_to_errno(ret);
         if (ret)
@@@ -6264,7 -6329,7 +6329,7 @@@
         if (ret)
                 return ret;
   
-       ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
+       ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER,
                                             &changeupper_info.info);
         ret = notifier_to_errno(ret);
         if (ret)
@@@ -6289,9 -6354,11 +6354,11 @@@ rollback
    * returns zero.
    */
   int netdev_upper_dev_link(struct net_device *dev,
-                         struct net_device *upper_dev)
+                         struct net_device *upper_dev,
+                         struct netlink_ext_ack *extack)
   {
-       return __netdev_upper_dev_link(dev, upper_dev, false, NULL, NULL);
+       return __netdev_upper_dev_link(dev, upper_dev, false,
+                                      NULL, NULL, extack);
   }
   EXPORT_SYMBOL(netdev_upper_dev_link);
   
@@@ -6310,10 -6377,11 +6377,11 @@@
    */
   int netdev_master_upper_dev_link(struct net_device *dev,
                                  struct net_device *upper_dev,
-                                void *upper_priv, void *upper_info)
+                                void *upper_priv, void *upper_info,
+                                struct netlink_ext_ack *extack)
   {
         return __netdev_upper_dev_link(dev, upper_dev, true,
-                                      upper_priv, upper_info);
+                                      upper_priv, upper_info, extack);
   }
   EXPORT_SYMBOL(netdev_master_upper_dev_link);
   
@@@ -6328,20 -6396,24 +6396,24 @@@
   void netdev_upper_dev_unlink(struct net_device *dev,
                              struct net_device *upper_dev)
   {
-       struct netdev_notifier_changeupper_info changeupper_info;
+       struct netdev_notifier_changeupper_info changeupper_info = {
+               .info = {
+                       .dev = dev,
+               },
+               .upper_dev = upper_dev,
+               .linking = false,
+       };
   
         ASSERT_RTNL();
   
-       changeupper_info.upper_dev = upper_dev;
         changeupper_info.master = netdev_master_upper_dev_get(dev) == upper_dev;
-       changeupper_info.linking = false;
   
-       call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
+       call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER,
                                       &changeupper_info.info);
   
         __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
   
-       call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
+       call_netdevice_notifiers_info(NETDEV_CHANGEUPPER,
                                       &changeupper_info.info);
   }
   EXPORT_SYMBOL(netdev_upper_dev_unlink);
@@@ -6357,11 -6429,13 +6429,13 @@@
   void netdev_bonding_info_change(struct net_device *dev,
                                 struct netdev_bonding_info *bonding_info)
   {
-       struct netdev_notifier_bonding_info     info;
+       struct netdev_notifier_bonding_info info = {
+               .info.dev = dev,
+       };
   
         memcpy(&info.bonding_info, bonding_info,
                sizeof(struct netdev_bonding_info));
-       call_netdevice_notifiers_info(NETDEV_BONDING_INFO, dev,
+       call_netdevice_notifiers_info(NETDEV_BONDING_INFO,
                                       &info.info);
   }
   EXPORT_SYMBOL(netdev_bonding_info_change);
@@@ -6487,11 -6561,13 +6561,13 @@@ EXPORT_SYMBOL(dev_get_nest_level)
   void netdev_lower_state_changed(struct net_device *lower_dev,
                                 void *lower_state_info)
   {
-       struct netdev_notifier_changelowerstate_info changelowerstate_info;
+       struct netdev_notifier_changelowerstate_info changelowerstate_info = {
+               .info.dev = lower_dev,
+       };
   
         ASSERT_RTNL();
         changelowerstate_info.lower_state_info = lower_state_info;
-       call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE, lower_dev,
+       call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE,
                                       &changelowerstate_info.info);
   }
   EXPORT_SYMBOL(netdev_lower_state_changed);
@@@ -6782,11 -6858,14 +6858,14 @@@ void __dev_notify_flags(struct net_devi
   
         if (dev->flags & IFF_UP &&
             (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) {
-               struct netdev_notifier_change_info change_info;
+               struct netdev_notifier_change_info change_info = {
+                       .info = {
+                               .dev = dev,
+                       },
+                       .flags_changed = changes,
+               };
   
-               change_info.flags_changed = changes;
-               call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
-                                             &change_info.info);
+               call_netdevice_notifiers_info(NETDEV_CHANGE, &change_info.info);
         }
   }
   
@@@ -6993,26 -7072,26 +7072,26 @@@ int dev_change_proto_down(struct net_de
   }
   EXPORT_SYMBOL(dev_change_proto_down);
   
- u8 __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op, u32 *prog_id)
+ u8 __dev_xdp_attached(struct net_device *dev, bpf_op_t bpf_op, u32 *prog_id)
   {
-       struct netdev_xdp xdp;
+       struct netdev_bpf xdp;
   
         memset(&xdp, 0, sizeof(xdp));
         xdp.command = XDP_QUERY_PROG;
   
         /* Query must always succeed. */
-       WARN_ON(xdp_op(dev, &xdp) < 0);
+       WARN_ON(bpf_op(dev, &xdp) < 0);
         if (prog_id)
                 *prog_id = xdp.prog_id;
   
         return xdp.prog_attached;
   }
   
- static int dev_xdp_install(struct net_device *dev, xdp_op_t xdp_op,
+ static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op,
                            struct netlink_ext_ack *extack, u32 flags,
                            struct bpf_prog *prog)
   {
-       struct netdev_xdp xdp;
+       struct netdev_bpf xdp;
   
         memset(&xdp, 0, sizeof(xdp));
         if (flags & XDP_FLAGS_HW_MODE)
@@@ -7023,7 -7102,7 +7102,7 @@@
         xdp.flags = flags;
         xdp.prog = prog;
   
-       return xdp_op(dev, &xdp);
+       return bpf_op(dev, &xdp);
   }
   
   /**
@@@ -7040,32 -7119,36 +7119,36 @@@ int dev_change_xdp_fd(struct net_devic
   {
         const struct net_device_ops *ops = dev->netdev_ops;
         struct bpf_prog *prog = NULL;
-       xdp_op_t xdp_op, xdp_chk;
+       bpf_op_t bpf_op, bpf_chk;
         int err;
   
         ASSERT_RTNL();
   
-       xdp_op = xdp_chk = ops->ndo_xdp;
-       if (!xdp_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE)))
+       bpf_op = bpf_chk = ops->ndo_bpf;
+       if (!bpf_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE)))
                 return -EOPNOTSUPP;
-       if (!xdp_op || (flags & XDP_FLAGS_SKB_MODE))
-               xdp_op = generic_xdp_install;
-       if (xdp_op == xdp_chk)
-               xdp_chk = generic_xdp_install;
+       if (!bpf_op || (flags & XDP_FLAGS_SKB_MODE))
+               bpf_op = generic_xdp_install;
+       if (bpf_op == bpf_chk)
+               bpf_chk = generic_xdp_install;
   
         if (fd >= 0) {
-               if (xdp_chk && __dev_xdp_attached(dev, xdp_chk, NULL))
+               if (bpf_chk && __dev_xdp_attached(dev, bpf_chk, NULL))
                         return -EEXIST;
                 if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) &&
-                   __dev_xdp_attached(dev, xdp_op, NULL))
+                   __dev_xdp_attached(dev, bpf_op, NULL))
                         return -EBUSY;
   
-               prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP);
+               if (bpf_op == ops->ndo_bpf)
+                       prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP,
+                                                    dev);
+               else
+                       prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP);
                 if (IS_ERR(prog))
                         return PTR_ERR(prog);
         }
   
-       err = dev_xdp_install(dev, xdp_op, extack, flags, prog);
+       err = dev_xdp_install(dev, bpf_op, extack, flags, prog);
         if (err < 0 && prog)
                 bpf_prog_put(prog);
   
@@@ -7157,7 -7240,7 +7240,7 @@@ static void rollback_registered_many(st
                 if (!dev->rtnl_link_ops ||
                     dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
                         skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0,
-                                                    GFP_KERNEL);
+                                                    GFP_KERNEL, NULL);
   
                 /*
                  *      Flush the unicast and multicast chains
@@@ -7994,7 -8077,7 +8077,7 @@@ struct net_device *alloc_netdev_mqs(in
                 unsigned int txqs, unsigned int rxqs)
   {
         struct net_device *dev;
-       size_t alloc_size;
+       unsigned int alloc_size;
         struct net_device *p;
   
         BUG_ON(strlen(name) >= sizeof(dev->name));
@@@ -8244,7 -8327,7 +8327,7 @@@ EXPORT_SYMBOL(unregister_netdev)
   
   int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
   {
-       int err;
+       int err, new_nsid;
   
         ASSERT_RTNL();
   
@@@ -8300,7 -8383,11 +8383,11 @@@
         call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
         rcu_barrier();
         call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
-       rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
+       if (dev->rtnl_link_ops && dev->rtnl_link_ops->get_link_net)
+               new_nsid = peernet2id_alloc(dev_net(dev), net);
+       else
+               new_nsid = peernet2id(dev_net(dev), net);
+       rtmsg_ifinfo_newnet(RTM_DELLINK, dev, ~0U, GFP_KERNEL, &new_nsid);
   
         /*
          *      Flush the unicast and multicast chains
@@@ -8562,6 -8649,8 +8649,8 @@@ static void __net_exit netdev_exit(stru
   {
         kfree(net->dev_name_head);
         kfree(net->dev_index_head);
+       if (net != &init_net)
+               WARN_ON_ONCE(!list_empty(&net->dev_base_head));
   }
   
   static struct pernet_operations __net_initdata netdev_net_ops = {
diff --combined net/core/pktgen.c

index 3b2034f6d49d20a0df890d02ea30ebd05cdb87b4,40db0b7e37ac9a9dd358236aeecc56963813fbe3..f95a150862250be5704c31e443928542d18d848f
--- 1/net/core/pktgen.c
--- 2/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@@ -2165,7 -2165,7 +2165,7 @@@ static void pktgen_setup_inject(struct 
                                                 + pkt_dev->pkt_overhead;
                 }
   
-               for (i = 0; i < IN6_ADDR_HSIZE; i++)
+               for (i = 0; i < sizeof(struct in6_addr); i++)
                         if (pkt_dev->cur_in6_saddr.s6_addr[i]) {
                                 set = 1;
                                 break;
@@@ -2711,7 -2711,7 +2711,7 @@@ static inline __be16 build_tci(unsigne
   static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb,
                                 int datalen)
   {
-       struct timeval timestamp;
+       struct timespec64 timestamp;
         struct pktgen_hdr *pgh;
   
         pgh = skb_put(skb, sizeof(*pgh));
@@@ -2773,9 -2773,17 +2773,17 @@@
                 pgh->tv_sec = 0;
                 pgh->tv_usec = 0;
         } else {
-               do_gettimeofday(&timestamp);
+               /*
+                * pgh->tv_sec wraps in y2106 when interpreted as unsigned
+                * as done by wireshark, or y2038 when interpreted as signed.
+                * This is probably harmless, but if anyone wants to improve
+                * it, we could introduce a variant that puts 64-bit nanoseconds
+                * into the respective header bytes.
+                * This would also be slightly faster to read.
+                */
+               ktime_get_real_ts64(&timestamp);
                 pgh->tv_sec = htonl(timestamp.tv_sec);
-               pgh->tv_usec = htonl(timestamp.tv_usec);
+               pgh->tv_usec = htonl(timestamp.tv_nsec / NSEC_PER_USEC);
         }
   }
   
@@@ -3377,7 -3385,7 +3385,7 @@@ static void pktgen_wait_for_skb(struct 
   
   static void pktgen_xmit(struct pktgen_dev *pkt_dev)
   {
- -      unsigned int burst = ACCESS_ONCE(pkt_dev->burst);
+ +      unsigned int burst = READ_ONCE(pkt_dev->burst);
         struct net_device *odev = pkt_dev->odev;
         struct netdev_queue *txq;
         struct sk_buff *skb;
diff --combined net/decnet/dn_route.c

index 6538632fbd0342d4fe22aca8dcd66401a150c7a4,bff5ab88cdbb4e0496223271fcd2798c3edc8395..b36dceab0dc12000a73e6fec63e28ffa98691f59
--- 1/net/decnet/dn_route.c
--- 2/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@@ -131,7 -131,7 +131,7 @@@ static struct dn_rt_hash_bucket *dn_rt_
   static unsigned int dn_rt_hash_mask;
   
   static struct timer_list dn_route_timer;
- -static DEFINE_TIMER(dn_rt_flush_timer, dn_run_flush, 0, 0);
+ +static DEFINE_TIMER(dn_rt_flush_timer, dn_run_flush);
   int decnet_dst_gc_interval = 2;
   
   static struct dst_ops dn_dst_ops = {
@@@ -338,7 -338,7 +338,7 @@@ static int dn_insert_route(struct dn_ro
                                            dn_rt_hash_table[hash].chain);
                         rcu_assign_pointer(dn_rt_hash_table[hash].chain, rth);
   
-                       dst_use(&rth->dst, now);
+                       dst_hold_and_use(&rth->dst, now);
                         spin_unlock_bh(&dn_rt_hash_table[hash].lock);
   
                         dst_release_immediate(&rt->dst);
@@@ -351,7 -351,7 +351,7 @@@
         rcu_assign_pointer(rt->dst.dn_next, dn_rt_hash_table[hash].chain);
         rcu_assign_pointer(dn_rt_hash_table[hash].chain, rt);
   
-       dst_use(&rt->dst, now);
+       dst_hold_and_use(&rt->dst, now);
         spin_unlock_bh(&dn_rt_hash_table[hash].lock);
         *rp = rt;
         return 0;
@@@ -1258,7 -1258,7 +1258,7 @@@ static int __dn_route_output_key(struc
                             (flp->flowidn_mark == rt->fld.flowidn_mark) &&
                             dn_is_output_route(rt) &&
                             (rt->fld.flowidn_oif == flp->flowidn_oif)) {
-                               dst_use(&rt->dst, jiffies);
+                               dst_hold_and_use(&rt->dst, jiffies);
                                 rcu_read_unlock_bh();
                                 *pprt = &rt->dst;
                                 return 0;
@@@ -1535,7 -1535,7 +1535,7 @@@ static int dn_route_input(struct sk_buf
                     (rt->fld.flowidn_oif == 0) &&
                     (rt->fld.flowidn_mark == skb->mark) &&
                     (rt->fld.flowidn_iif == cb->iif)) {
-                       dst_use(&rt->dst, jiffies);
+                       dst_hold_and_use(&rt->dst, jiffies);
                         rcu_read_unlock();
                         skb_dst_set(skb, (struct dst_entry *)rt);
                         return 0;
diff --combined net/ipv4/inet_fragment.c

index f9597ba2659986408b3d43c4821e0b7793fa6670,7f3ef5c287a10d107577377db2718b676949b021..26a3d0315728ed2b16ca46080a3546668100bc8e
--- 1/net/ipv4/inet_fragment.c
--- 2/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@@ -147,7 -147,7 +147,7 @@@ inet_evict_bucket(struct inet_frags *f
         spin_unlock(&hb->chain_lock);
   
         hlist_for_each_entry_safe(fq, n, &expired, list_evictor)
-               f->frag_expire((unsigned long) fq);
+               f->frag_expire(&fq->timer);
   
         return evicted;
   }
@@@ -164,7 -164,7 +164,7 @@@ static void inet_frag_worker(struct wor
   
         local_bh_disable();
   
- -      for (i = ACCESS_ONCE(f->next_bucket); budget; --budget) {
+ +      for (i = READ_ONCE(f->next_bucket); budget; --budget) {
                 evicted += inet_evict_bucket(f, &f->hash[i]);
                 i = (i + 1) & (INETFRAGS_HASHSZ - 1);
                 if (evicted > INETFRAGS_EVICT_MAX)
@@@ -366,7 -366,7 +366,7 @@@ static struct inet_frag_queue *inet_fra
         f->constructor(q, arg);
         add_frag_mem_limit(nf, f->qsize);
   
-       setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
+       timer_setup(&q->timer, f->frag_expire, 0);
         spin_lock_init(&q->lock);
         refcount_set(&q->refcnt, 1);
   
diff --combined net/ipv4/route.c

index c0864562083b58e8a9143e051eb62a8a4e723d4a,bc40bd4111969f640603dd7c5b04fdb7fdcd4afd..3b427757b1f8ecfee63c0f0667dfa0c38c1653ae
--- 1/net/ipv4/route.c
--- 2/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@@ -495,7 -495,7 +495,7 @@@ u32 ip_idents_reserve(u32 hash, int seg
   {
         u32 *p_tstamp = ip_tstamps + hash % IP_IDENTS_SZ;
         atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ;
- -      u32 old = ACCESS_ONCE(*p_tstamp);
+ +      u32 old = READ_ONCE(*p_tstamp);
         u32 now = (u32)jiffies;
         u32 new, delta = 0;
   
@@@ -1250,7 -1250,7 +1250,7 @@@ static void set_class_tag(struct rtabl
   static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
   {
         unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr);
-       unsigned int advmss = max_t(unsigned int, dst->dev->mtu - header_size,
+       unsigned int advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size,
                                     ip_rt_min_advmss);
   
         return min(advmss, IPV4_MAX_PMTU - header_size);
@@@ -3038,7 -3038,6 +3038,6 @@@ struct ip_rt_acct __percpu *ip_rt_acct 
   
   int __init ip_rt_init(void)
   {
-       int rc = 0;
         int cpu;
   
         ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL);
@@@ -3095,7 -3094,7 +3094,7 @@@
   #endif
         register_pernet_subsys(&rt_genid_ops);
         register_pernet_subsys(&ipv4_inetpeer_ops);
-       return rc;
+       return 0;
   }
   
   #ifdef CONFIG_SYSCTL
diff --combined net/ipv4/tcp_input.c

index 887585045b271af66600f1814ac9d3a601f38773,f0b572fe959ae5b7c47989bd724859d0794de31b..dabbf1d392fb98c4ec3ef42cc814383dde9304aa
--- 1/net/ipv4/tcp_input.c
--- 2/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@@ -76,25 -76,10 +76,10 @@@
   #include <linux/ipsec.h>
   #include <asm/unaligned.h>
   #include <linux/errqueue.h>
+ #include <trace/events/tcp.h>
+ #include <linux/static_key.h>
   
- int sysctl_tcp_fack __read_mostly;
- int sysctl_tcp_max_reordering __read_mostly = 300;
- int sysctl_tcp_dsack __read_mostly = 1;
- int sysctl_tcp_app_win __read_mostly = 31;
- int sysctl_tcp_adv_win_scale __read_mostly = 1;
- EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
- 
- /* rfc5961 challenge ack rate limiting */
- int sysctl_tcp_challenge_ack_limit = 1000;
- 
- int sysctl_tcp_stdurg __read_mostly;
- int sysctl_tcp_rfc1337 __read_mostly;
   int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
- int sysctl_tcp_frto __read_mostly = 2;
- int sysctl_tcp_min_rtt_wlen __read_mostly = 300;
- int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
- int sysctl_tcp_early_retrans __read_mostly = 3;
- int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
   
   #define FLAG_DATA             0x01 /* Incoming frame contained data.          */
   #define FLAG_WIN_UPDATE               0x02 /* Incoming ACK was a window update.       */
@@@ -335,7 -320,7 +320,7 @@@ static void tcp_sndbuf_expand(struct so
         sndmem *= nr_segs * per_mss;
   
         if (sk->sk_sndbuf < sndmem)
-               sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
+               sk->sk_sndbuf = min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2]);
   }
   
   /* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
@@@ -368,8 -353,8 +353,8 @@@ static int __tcp_grow_window(const stru
   {
         struct tcp_sock *tp = tcp_sk(sk);
         /* Optimize this! */
-       int truesize = tcp_win_from_space(skb->truesize) >> 1;
-       int window = tcp_win_from_space(sysctl_tcp_rmem[2]) >> 1;
+       int truesize = tcp_win_from_space(sk, skb->truesize) >> 1;
+       int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
   
         while (tp->rcv_ssthresh <= window) {
                 if (truesize <= skb->len)
@@@ -394,7 -379,7 +379,7 @@@ static void tcp_grow_window(struct soc
                 /* Check #2. Increase window, if skb with such overhead
                  * will fit to rcvbuf in future.
                  */
-               if (tcp_win_from_space(skb->truesize) <= skb->len)
+               if (tcp_win_from_space(sk, skb->truesize) <= skb->len)
                         incr = 2 * tp->advmss;
                 else
                         incr = __tcp_grow_window(sk, skb);
@@@ -420,11 -405,11 +405,11 @@@ static void tcp_fixup_rcvbuf(struct soc
         /* Dynamic Right Sizing (DRS) has 2 to 3 RTT latency
          * Allow enough cushion so that sender is not limited by our window
          */
-       if (sysctl_tcp_moderate_rcvbuf)
+       if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf)
                 rcvmem <<= 2;
   
         if (sk->sk_rcvbuf < rcvmem)
-               sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]);
+               sk->sk_rcvbuf = min(rcvmem, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
   }
   
   /* 4. Try to fixup all. It is made immediately after connection enters
@@@ -432,6 -417,7 +417,7 @@@
    */
   void tcp_init_buffer_space(struct sock *sk)
   {
+       int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win;
         struct tcp_sock *tp = tcp_sk(sk);
         int maxwin;
   
@@@ -450,14 -436,14 +436,14 @@@
         if (tp->window_clamp >= maxwin) {
                 tp->window_clamp = maxwin;
   
-               if (sysctl_tcp_app_win && maxwin > 4 * tp->advmss)
+               if (tcp_app_win && maxwin > 4 * tp->advmss)
                         tp->window_clamp = max(maxwin -
-                                              (maxwin >> sysctl_tcp_app_win),
+                                              (maxwin >> tcp_app_win),
                                                4 * tp->advmss);
         }
   
         /* Force reservation of one segment. */
-       if (sysctl_tcp_app_win &&
+       if (tcp_app_win &&
             tp->window_clamp > 2 * tp->advmss &&
             tp->window_clamp + tp->advmss > maxwin)
                 tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
@@@ -471,15 -457,16 +457,16 @@@ static void tcp_clamp_window(struct soc
   {
         struct tcp_sock *tp = tcp_sk(sk);
         struct inet_connection_sock *icsk = inet_csk(sk);
+       struct net *net = sock_net(sk);
   
         icsk->icsk_ack.quick = 0;
   
-       if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
+       if (sk->sk_rcvbuf < net->ipv4.sysctl_tcp_rmem[2] &&
             !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
             !tcp_under_memory_pressure(sk) &&
             sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
                 sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
-                                   sysctl_tcp_rmem[2]);
+                                   net->ipv4.sysctl_tcp_rmem[2]);
         }
         if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
                 tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
@@@ -610,7 -597,7 +597,7 @@@ void tcp_rcv_space_adjust(struct sock *
          * <prev RTT . ><current RTT .. ><next RTT .... >
          */
   
-       if (sysctl_tcp_moderate_rcvbuf &&
+       if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
             !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
                 int rcvwin, rcvmem, rcvbuf;
   
@@@ -634,10 -621,11 +621,11 @@@
                 }
   
                 rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
-               while (tcp_win_from_space(rcvmem) < tp->advmss)
+               while (tcp_win_from_space(sk, rcvmem) < tp->advmss)
                         rcvmem += 128;
   
-               rcvbuf = min(rcvwin / tp->advmss * rcvmem, sysctl_tcp_rmem[2]);
+               rcvbuf = min(rcvwin / tp->advmss * rcvmem,
+                            sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
                 if (rcvbuf > sk->sk_rcvbuf) {
                         sk->sk_rcvbuf = rcvbuf;
   
@@@ -781,15 -769,6 +769,6 @@@ static void tcp_rtt_estimator(struct so
         tp->srtt_us = max(1U, srtt);
   }
   
- /* Set the sk_pacing_rate to allow proper sizing of TSO packets.
-  * Note: TCP stack does not yet implement pacing.
-  * FQ packet scheduler can be used to implement cheap but effective
-  * TCP pacing, to smooth the burst on large writes when packets
-  * in flight is significantly lower than cwnd (or rwin)
-  */
- int sysctl_tcp_pacing_ss_ratio __read_mostly = 200;
- int sysctl_tcp_pacing_ca_ratio __read_mostly = 120;
- 
   static void tcp_update_pacing_rate(struct sock *sk)
   {
         const struct tcp_sock *tp = tcp_sk(sk);
@@@ -807,21 -786,21 +786,21 @@@
          *       end of slow start and should slow down.
          */
         if (tp->snd_cwnd < tp->snd_ssthresh / 2)
-               rate *= sysctl_tcp_pacing_ss_ratio;
+               rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio;
         else
-               rate *= sysctl_tcp_pacing_ca_ratio;
+               rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio;
   
         rate *= max(tp->snd_cwnd, tp->packets_out);
   
         if (likely(tp->srtt_us))
                 do_div(rate, tp->srtt_us);
   
- -      /* ACCESS_ONCE() is needed because sch_fq fetches sk_pacing_rate
+ +      /* WRITE_ONCE() is needed because sch_fq fetches sk_pacing_rate
          * without any lock. We want to make sure compiler wont store
          * intermediate values in this location.
          */
- -      ACCESS_ONCE(sk->sk_pacing_rate) = min_t(u64, rate,
- -                                              sk->sk_max_pacing_rate);
+ +      WRITE_ONCE(sk->sk_pacing_rate, min_t(u64, rate,
+ +                                           sk->sk_max_pacing_rate));
   }
   
   /* Calculate rto without backoff.  This is the second half of Van Jacobson's
@@@ -863,60 -842,46 +842,46 @@@ __u32 tcp_init_cwnd(const struct tcp_so
         return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
   }
   
- /*
-  * Packet counting of FACK is based on in-order assumptions, therefore TCP
-  * disables it when reordering is detected
-  */
- void tcp_disable_fack(struct tcp_sock *tp)
- {
-       /* RFC3517 uses different metric in lost marker => reset on change */
-       if (tcp_is_fack(tp))
-               tp->lost_skb_hint = NULL;
-       tp->rx_opt.sack_ok &= ~TCP_FACK_ENABLED;
- }
- 
   /* Take a notice that peer is sending D-SACKs */
   static void tcp_dsack_seen(struct tcp_sock *tp)
   {
         tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
+       tp->rack.dsack_seen = 1;
   }
   
- static void tcp_update_reordering(struct sock *sk, const int metric,
-                                 const int ts)
+ /* It's reordering when higher sequence was delivered (i.e. sacked) before
+  * some lower never-retransmitted sequence ("low_seq"). The maximum reordering
+  * distance is approximated in full-mss packet distance ("reordering").
+  */
+ static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq,
+                                     const int ts)
   {
         struct tcp_sock *tp = tcp_sk(sk);
-       int mib_idx;
+       const u32 mss = tp->mss_cache;
+       u32 fack, metric;
   
-       if (WARN_ON_ONCE(metric < 0))
+       fack = tcp_highest_sack_seq(tp);
+       if (!before(low_seq, fack))
                 return;
   
-       if (metric > tp->reordering) {
-               tp->reordering = min(sysctl_tcp_max_reordering, metric);
- 
+       metric = fack - low_seq;
+       if ((metric > tp->reordering * mss) && mss) {
   #if FASTRETRANS_DEBUG > 1
                 pr_debug("Disorder%d %d %u f%u s%u rr%d\n",
                          tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
                          tp->reordering,
-                        tp->fackets_out,
+                        0,
                          tp->sacked_out,
                          tp->undo_marker ? tp->undo_retrans : 0);
   #endif
-               tcp_disable_fack(tp);
+               tp->reordering = min_t(u32, (metric + mss - 1) / mss,
+                                      sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
         }
   
         tp->rack.reord = 1;
- 
         /* This exciting event is worth to be remembered. 8) */
-       if (ts)
-               mib_idx = LINUX_MIB_TCPTSREORDER;
-       else if (tcp_is_reno(tp))
-               mib_idx = LINUX_MIB_TCPRENOREORDER;
-       else if (tcp_is_fack(tp))
-               mib_idx = LINUX_MIB_TCPFACKREORDER;
-       else
-               mib_idx = LINUX_MIB_TCPSACKREORDER;
- 
-       NET_INC_STATS(sock_net(sk), mib_idx);
+       NET_INC_STATS(sock_net(sk),
+                     ts ? LINUX_MIB_TCPTSREORDER : LINUX_MIB_TCPSACKREORDER);
   }
   
   /* This must be called before lost_out is incremented */
@@@ -990,7 -955,6 +955,6 @@@ void tcp_skb_mark_lost_uncond_verify(st
    * 3. Loss detection event of two flavors:
    *    A. Scoreboard estimator decided the packet is lost.
    *       A'. Reno "three dupacks" marks head of queue lost.
-  *       A''. Its FACK modification, head until snd.fack is lost.
    *    B. SACK arrives sacking SND.NXT at the moment, when the
    *       segment was retransmitted.
    * 4. D-SACK added new rule: D-SACK changes any tag to S.
@@@ -1133,8 -1097,7 +1097,7 @@@ static bool tcp_check_dsack(struct soc
   }
   
   struct tcp_sacktag_state {
-       int     reord;
-       int     fack_count;
+       u32     reord;
         /* Timestamps for earliest and latest never-retransmitted segment
          * that was SACKed. RTO needs the earliest RTT to stay conservative,
          * but congestion control should still get an accurate delay signal.
@@@ -1143,6 -1106,7 +1106,7 @@@
         u64     last_sackt;
         struct rate_sample *rate;
         int     flag;
+       unsigned int mss_now;
   };
   
   /* Check if skb is fully within the SACK block. In presence of GSO skbs,
@@@ -1192,7 -1156,8 +1156,8 @@@ static int tcp_match_skb_to_sack(struc
                 if (pkt_len >= skb->len && !in_sack)
                         return 0;
   
-               err = tcp_fragment(sk, skb, pkt_len, mss, GFP_ATOMIC);
+               err = tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
+                                  pkt_len, mss, GFP_ATOMIC);
                 if (err < 0)
                         return err;
         }
@@@ -1208,15 -1173,15 +1173,15 @@@ static u8 tcp_sacktag_one(struct sock *
                           u64 xmit_time)
   {
         struct tcp_sock *tp = tcp_sk(sk);
-       int fack_count = state->fack_count;
   
         /* Account D-SACK for retransmitted packet. */
         if (dup_sack && (sacked & TCPCB_RETRANS)) {
                 if (tp->undo_marker && tp->undo_retrans > 0 &&
                     after(end_seq, tp->undo_marker))
                         tp->undo_retrans--;
-               if (sacked & TCPCB_SACKED_ACKED)
-                       state->reord = min(fack_count, state->reord);
+               if ((sacked & TCPCB_SACKED_ACKED) &&
+                   before(start_seq, state->reord))
+                               state->reord = start_seq;
         }
   
         /* Nothing to do; acked frame is about to be dropped (was ACKed). */
@@@ -1242,9 -1207,10 +1207,10 @@@
                                  * which was in hole. It is reordering.
                                  */
                                 if (before(start_seq,
-                                          tcp_highest_sack_seq(tp)))
-                                       state->reord = min(fack_count,
-                                                          state->reord);
+                                          tcp_highest_sack_seq(tp)) &&
+                                   before(start_seq, state->reord))
+                                       state->reord = start_seq;
+ 
                                 if (!after(end_seq, tp->high_seq))
                                         state->flag |= FLAG_ORIG_SACK_ACKED;
                                 if (state->first_sackt == 0)
@@@ -1263,15 -1229,10 +1229,10 @@@
                 tp->sacked_out += pcount;
                 tp->delivered += pcount;  /* Out-of-order packets delivered */
   
-               fack_count += pcount;
- 
                 /* Lost marker hint past SACKed? Tweak RFC3517 cnt */
-               if (!tcp_is_fack(tp) && tp->lost_skb_hint &&
+               if (tp->lost_skb_hint &&
                     before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq))
                         tp->lost_cnt_hint += pcount;
- 
-               if (fack_count > tp->fackets_out)
-                       tp->fackets_out = fack_count;
         }
   
         /* D-SACK. We can detect redundant retransmission in S|R and plain R
@@@ -1289,13 -1250,13 +1250,13 @@@
   /* Shift newly-SACKed bytes from this skb to the immediately previous
    * already-SACKed sk_buff. Mark the newly-SACKed bytes as such.
    */
- static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
+ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev,
+                           struct sk_buff *skb,
                             struct tcp_sacktag_state *state,
                             unsigned int pcount, int shifted, int mss,
                             bool dup_sack)
   {
         struct tcp_sock *tp = tcp_sk(sk);
-       struct sk_buff *prev = tcp_write_queue_prev(sk, skb);
         u32 start_seq = TCP_SKB_CB(skb)->seq;   /* start of newly-SACKed */
         u32 end_seq = start_seq + shifted;      /* end of newly-SACKed */
   
@@@ -1364,8 -1325,7 +1325,7 @@@
         if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp))
                 TCP_SKB_CB(prev)->tx.delivered_mstamp = 0;
   
-       tcp_unlink_write_queue(skb, sk);
-       sk_wmem_free_skb(sk, skb);
+       tcp_rtx_queue_unlink_and_free(skb, sk);
   
         NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKMERGED);
   
@@@ -1415,9 -1375,9 +1375,9 @@@ static struct sk_buff *tcp_shift_skb_da
                 goto fallback;
   
         /* Can only happen with delayed DSACK + discard craziness */
-       if (unlikely(skb == tcp_write_queue_head(sk)))
+       prev = skb_rb_prev(skb);
+       if (!prev)
                 goto fallback;
-       prev = tcp_write_queue_prev(sk, skb);
   
         if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED)
                 goto fallback;
@@@ -1496,18 -1456,17 +1456,17 @@@
   
         if (!skb_shift(prev, skb, len))
                 goto fallback;
-       if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack))
+       if (!tcp_shifted_skb(sk, prev, skb, state, pcount, len, mss, dup_sack))
                 goto out;
   
         /* Hole filled allows collapsing with the next as well, this is very
          * useful when hole on every nth skb pattern happens
          */
-       if (prev == tcp_write_queue_tail(sk))
+       skb = skb_rb_next(prev);
+       if (!skb)
                 goto out;
-       skb = tcp_write_queue_next(sk, prev);
   
         if (!skb_can_shift(skb) ||
-           (skb == tcp_send_head(sk)) ||
             ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) ||
             (mss != tcp_skb_seglen(skb)))
                 goto out;
@@@ -1515,11 -1474,11 +1474,11 @@@
         len = skb->len;
         if (skb_shift(prev, skb, len)) {
                 pcount += tcp_skb_pcount(skb);
-               tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss, 0);
+               tcp_shifted_skb(sk, prev, skb, state, tcp_skb_pcount(skb),
+                               len, mss, 0);
         }
   
   out:
-       state->fack_count += pcount;
         return prev;
   
   noop:
@@@ -1539,13 -1498,10 +1498,10 @@@ static struct sk_buff *tcp_sacktag_walk
         struct tcp_sock *tp = tcp_sk(sk);
         struct sk_buff *tmp;
   
-       tcp_for_write_queue_from(skb, sk) {
+       skb_rbtree_walk_from(skb) {
                 int in_sack = 0;
                 bool dup_sack = dup_sack_in;
   
-               if (skb == tcp_send_head(sk))
-                       break;
- 
                 /* queue is in-order => we can short-circuit the walk early */
                 if (!before(TCP_SKB_CB(skb)->seq, end_seq))
                         break;
@@@ -1594,34 -1550,48 +1550,48 @@@
                                                 tcp_skb_pcount(skb),
                                                 skb->skb_mstamp);
                         tcp_rate_skb_delivered(sk, skb, state->rate);
+                       if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
+                               list_del_init(&skb->tcp_tsorted_anchor);
   
                         if (!before(TCP_SKB_CB(skb)->seq,
                                     tcp_highest_sack_seq(tp)))
                                 tcp_advance_highest_sack(sk, skb);
                 }
- 
-               state->fack_count += tcp_skb_pcount(skb);
         }
         return skb;
   }
   
- /* Avoid all extra work that is being done by sacktag while walking in
-  * a normal way
-  */
+ static struct sk_buff *tcp_sacktag_bsearch(struct sock *sk,
+                                          struct tcp_sacktag_state *state,
+                                          u32 seq)
+ {
+       struct rb_node *parent, **p = &sk->tcp_rtx_queue.rb_node;
+       struct sk_buff *skb;
+ 
+       while (*p) {
+               parent = *p;
+               skb = rb_to_skb(parent);
+               if (before(seq, TCP_SKB_CB(skb)->seq)) {
+                       p = &parent->rb_left;
+                       continue;
+               }
+               if (!before(seq, TCP_SKB_CB(skb)->end_seq)) {
+                       p = &parent->rb_right;
+                       continue;
+               }
+               return skb;
+       }
+       return NULL;
+ }
+ 
   static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
                                         struct tcp_sacktag_state *state,
                                         u32 skip_to_seq)
   {
-       tcp_for_write_queue_from(skb, sk) {
-               if (skb == tcp_send_head(sk))
-                       break;
- 
-               if (after(TCP_SKB_CB(skb)->end_seq, skip_to_seq))
-                       break;
+       if (skb && after(TCP_SKB_CB(skb)->seq, skip_to_seq))
+               return skb;
   
-               state->fack_count += tcp_skb_pcount(skb);
-       }
-       return skb;
+       return tcp_sacktag_bsearch(sk, state, skip_to_seq);
   }
   
   static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
@@@ -1666,13 -1636,10 +1636,10 @@@ tcp_sacktag_write_queue(struct sock *sk
         int first_sack_index;
   
         state->flag = 0;
-       state->reord = tp->packets_out;
+       state->reord = tp->snd_nxt;
   
-       if (!tp->sacked_out) {
-               if (WARN_ON(tp->fackets_out))
-                       tp->fackets_out = 0;
+       if (!tp->sacked_out)
                 tcp_highest_sack_reset(sk);
-       }
   
         found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
                                          num_sacks, prior_snd_una);
@@@ -1743,8 -1710,8 +1710,8 @@@
                 }
         }
   
-       skb = tcp_write_queue_head(sk);
-       state->fack_count = 0;
+       state->mss_now = tcp_current_mss(sk);
+       skb = NULL;
         i = 0;
   
         if (!tp->sacked_out) {
@@@ -1801,7 -1768,6 +1768,6 @@@
                                 skb = tcp_highest_sack(sk);
                                 if (!skb)
                                         break;
-                               state->fack_count = tp->fackets_out;
                                 cache++;
                                 goto walk;
                         }
@@@ -1816,7 -1782,6 +1782,6 @@@
                         skb = tcp_highest_sack(sk);
                         if (!skb)
                                 break;
-                       state->fack_count = tp->fackets_out;
                 }
                 skb = tcp_sacktag_skip(skb, sk, state, start_seq);
   
@@@ -1836,9 -1801,8 +1801,8 @@@ advance_sp
         for (j = 0; j < used_sacks; j++)
                 tp->recv_sack_cache[i++] = sp[j];
   
-       if ((state->reord < tp->fackets_out) &&
-           ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker))
-               tcp_update_reordering(sk, tp->fackets_out - state->reord, 0);
+       if (inet_csk(sk)->icsk_ca_state != TCP_CA_Loss || tp->undo_marker)
+               tcp_check_sack_reordering(sk, state->reord, 0);
   
         tcp_verify_left_out(tp);
   out:
@@@ -1876,8 -1840,13 +1840,13 @@@ static bool tcp_limit_reno_sacked(struc
   static void tcp_check_reno_reordering(struct sock *sk, const int addend)
   {
         struct tcp_sock *tp = tcp_sk(sk);
-       if (tcp_limit_reno_sacked(tp))
-               tcp_update_reordering(sk, tp->packets_out + addend, 0);
+ 
+       if (!tcp_limit_reno_sacked(tp))
+               return;
+ 
+       tp->reordering = min_t(u32, tp->packets_out + addend,
+                              sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
+       NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER);
   }
   
   /* Emulate SACKs for SACKless connection: account for a new dupack. */
@@@ -1923,7 -1892,6 +1892,6 @@@ void tcp_clear_retrans(struct tcp_sock 
         tp->lost_out = 0;
         tp->undo_marker = 0;
         tp->undo_retrans = -1;
-       tp->fackets_out = 0;
         tp->sacked_out = 0;
   }
   
@@@ -1968,19 -1936,15 +1936,15 @@@ void tcp_enter_loss(struct sock *sk
         if (tcp_is_reno(tp))
                 tcp_reset_reno_sack(tp);
   
-       skb = tcp_write_queue_head(sk);
+       skb = tcp_rtx_queue_head(sk);
         is_reneg = skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED);
         if (is_reneg) {
                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
                 tp->sacked_out = 0;
-               tp->fackets_out = 0;
         }
         tcp_clear_all_retrans_hints(tp);
   
-       tcp_for_write_queue(skb, sk) {
-               if (skb == tcp_send_head(sk))
-                       break;
- 
+       skb_rbtree_walk_from(skb) {
                 mark_lost = (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
                              is_reneg);
                 if (mark_lost)
@@@ -2014,7 -1978,7 +1978,7 @@@
          * falsely raise the receive window, which results in repeated
          * timeouts and stop-and-go behavior.
          */
-       tp->frto = sysctl_tcp_frto &&
+       tp->frto = net->ipv4.sysctl_tcp_frto &&
                    (new_recovery || icsk->icsk_retransmits) &&
                    !inet_csk(sk)->icsk_mtup.probe_size;
   }
@@@ -2043,19 -2007,10 +2007,10 @@@ static bool tcp_check_sack_reneging(str
         return false;
   }
   
- static inline int tcp_fackets_out(const struct tcp_sock *tp)
- {
-       return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out;
- }
- 
   /* Heurestics to calculate number of duplicate ACKs. There's no dupACKs
    * counter when SACK is enabled (without SACK, sacked_out is used for
    * that purpose).
    *
-  * Instead, with FACK TCP uses fackets_out that includes both SACKed
-  * segments up to the highest received SACK block so far and holes in
-  * between them.
-  *
    * With reordering, holes may still be in flight, so RFC3517 recovery
    * uses pure sacked_out (total number of SACKed segments) even though
    * it violates the RFC that uses duplicate ACKs, often these are equal
@@@ -2065,10 -2020,10 +2020,10 @@@
    */
   static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
   {
-       return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
+       return tp->sacked_out + 1;
   }
   
- /* Linux NewReno/SACK/FACK/ECN state machine.
+ /* Linux NewReno/SACK/ECN state machine.
    * --------------------------------------
    *
    * "Open"     Normal state, no dubious events, fast path.
@@@ -2133,16 -2088,6 +2088,6 @@@
    *            dynamically measured and adjusted. This is implemented in
    *            tcp_rack_mark_lost.
    *
-  *            FACK (Disabled by default. Subsumbed by RACK):
-  *            It is the simplest heuristics. As soon as we decided
-  *            that something is lost, we decide that _all_ not SACKed
-  *            packets until the most forward SACK are lost. I.e.
-  *            lost_out = fackets_out - sacked_out and left_out = fackets_out.
-  *            It is absolutely correct estimate, if network does not reorder
-  *            packets. And it loses any connection to reality when reordering
-  *            takes place. We use FACK by default until reordering
-  *            is suspected on the path to this destination.
-  *
    *            If the receiver does not support SACK:
    *
    *            NewReno (RFC6582): in Recovery we assume that one segment
@@@ -2191,7 -2136,7 +2136,7 @@@ static bool tcp_time_to_recover(struct 
   }
   
   /* Detect loss in event "A" above by marking head of queue up as lost.
-  * For FACK or non-SACK(Reno) senders, the first "packets" number of segments
+  * For non-SACK(Reno) senders, the first "packets" number of segments
    * are considered lost. For RFC3517 SACK, a segment is considered lost if it
    * has at least tp->reordering SACKed seqments above it; "packets" refers to
    * the maximum SACKed segments to pass before reaching this limit.
@@@ -2206,20 -2151,18 +2151,18 @@@ static void tcp_mark_head_lost(struct s
         const u32 loss_high = tcp_is_sack(tp) ?  tp->snd_nxt : tp->high_seq;
   
         WARN_ON(packets > tp->packets_out);
-       if (tp->lost_skb_hint) {
-               skb = tp->lost_skb_hint;
-               cnt = tp->lost_cnt_hint;
+       skb = tp->lost_skb_hint;
+       if (skb) {
                 /* Head already handled? */
-               if (mark_head && skb != tcp_write_queue_head(sk))
+               if (mark_head && after(TCP_SKB_CB(skb)->seq, tp->snd_una))
                         return;
+               cnt = tp->lost_cnt_hint;
         } else {
-               skb = tcp_write_queue_head(sk);
+               skb = tcp_rtx_queue_head(sk);
                 cnt = 0;
         }
   
-       tcp_for_write_queue_from(skb, sk) {
-               if (skb == tcp_send_head(sk))
-                       break;
+       skb_rbtree_walk_from(skb) {
                 /* TODO: do this better */
                 /* this is not the most efficient way to do this... */
                 tp->lost_skb_hint = skb;
@@@ -2229,12 -2172,12 +2172,12 @@@
                         break;
   
                 oldcnt = cnt;
-               if (tcp_is_fack(tp) || tcp_is_reno(tp) ||
+               if (tcp_is_reno(tp) ||
                     (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
                         cnt += tcp_skb_pcount(skb);
   
                 if (cnt > packets) {
-                       if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) ||
+                       if (tcp_is_sack(tp) ||
                             (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
                             (oldcnt >= packets))
                                 break;
@@@ -2243,7 -2186,8 +2186,8 @@@
                         /* If needed, chop off the prefix to mark as lost. */
                         lost = (packets - oldcnt) * mss;
                         if (lost < skb->len &&
-                           tcp_fragment(sk, skb, lost, mss, GFP_ATOMIC) < 0)
+                           tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
+                                        lost, mss, GFP_ATOMIC) < 0)
                                 break;
                         cnt = packets;
                 }
@@@ -2264,11 -2208,6 +2208,6 @@@ static void tcp_update_scoreboard(struc
   
         if (tcp_is_reno(tp)) {
                 tcp_mark_head_lost(sk, 1, 1);
-       } else if (tcp_is_fack(tp)) {
-               int lost = tp->fackets_out - tp->reordering;
-               if (lost <= 0)
-                       lost = 1;
-               tcp_mark_head_lost(sk, lost, 0);
         } else {
                 int sacked_upto = tp->sacked_out - tp->reordering;
                 if (sacked_upto >= 0)
@@@ -2327,16 -2266,16 +2266,16 @@@ static bool tcp_any_retrans_done(const 
         if (tp->retrans_out)
                 return true;
   
-       skb = tcp_write_queue_head(sk);
+       skb = tcp_rtx_queue_head(sk);
         if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
                 return true;
   
         return false;
   }
   
- #if FASTRETRANS_DEBUG > 1
   static void DBGUNDO(struct sock *sk, const char *msg)
   {
+ #if FASTRETRANS_DEBUG > 1
         struct tcp_sock *tp = tcp_sk(sk);
         struct inet_sock *inet = inet_sk(sk);
   
@@@ -2358,10 -2297,8 +2297,8 @@@
                          tp->packets_out);
         }
   #endif
- }
- #else
- #define DBGUNDO(x...) do { } while (0)
   #endif
+ }
   
   static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
   {
@@@ -2370,9 -2307,7 +2307,7 @@@
         if (unmark_loss) {
                 struct sk_buff *skb;
   
-               tcp_for_write_queue(skb, sk) {
-                       if (skb == tcp_send_head(sk))
-                               break;
+               skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
                         TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
                 }
                 tp->lost_out = 0;
@@@ -2417,6 -2352,8 +2352,8 @@@ static bool tcp_try_undo_recovery(struc
                         mib_idx = LINUX_MIB_TCPFULLUNDO;
   
                 NET_INC_STATS(sock_net(sk), mib_idx);
+       } else if (tp->rack.reo_wnd_persist) {
+               tp->rack.reo_wnd_persist--;
         }
         if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
                 /* Hold old state until something *above* high_seq
@@@ -2436,6 -2373,8 +2373,8 @@@ static bool tcp_try_undo_dsack(struct s
         struct tcp_sock *tp = tcp_sk(sk);
   
         if (tp->undo_marker && !tp->undo_retrans) {
+               tp->rack.reo_wnd_persist = min(TCP_RACK_RECOVERY_THRESH,
+                                              tp->rack.reo_wnd_persist + 1);
                 DBGUNDO(sk, "D-SACK");
                 tcp_undo_cwnd_reduction(sk, false);
                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
@@@ -2616,9 -2555,7 +2555,7 @@@ void tcp_simple_retransmit(struct sock 
         struct sk_buff *skb;
         unsigned int mss = tcp_current_mss(sk);
   
-       tcp_for_write_queue(skb, sk) {
-               if (skb == tcp_send_head(sk))
-                       break;
+       skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
                 if (tcp_skb_seglen(skb) > mss &&
                     !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
                         if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
@@@ -2712,7 -2649,7 +2649,7 @@@ static void tcp_process_loss(struct soc
                          * is updated in tcp_ack()). Otherwise fall back to
                          * the conventional recovery.
                          */
-                       if (tcp_send_head(sk) &&
+                       if (!tcp_write_queue_empty(sk) &&
                             after(tcp_wnd_end(tp), tp->snd_nxt)) {
                                 *rexmit = REXMIT_NEW;
                                 return;
@@@ -2739,15 -2676,15 +2676,15 @@@
   }
   
   /* Undo during fast recovery after partial ACK. */
- static bool tcp_try_undo_partial(struct sock *sk, const int acked)
+ static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una)
   {
         struct tcp_sock *tp = tcp_sk(sk);
   
         if (tp->undo_marker && tcp_packet_delayed(tp)) {
                 /* Plain luck! Hole if filled with delayed
-                * packet, rather than with a retransmit.
+                * packet, rather than with a retransmit. Check reordering.
                  */
-               tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
+               tcp_check_sack_reordering(sk, prior_snd_una, 1);
   
                 /* We are getting evidence that the reordering degree is higher
                  * than we realized. If there are no retransmits out then we
@@@ -2774,7 -2711,7 +2711,7 @@@ static void tcp_rack_identify_loss(stru
         struct tcp_sock *tp = tcp_sk(sk);
   
         /* Use RACK to detect loss */
-       if (sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) {
+       if (sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) {
                 u32 prior_retrans = tp->retrans_out;
   
                 tcp_rack_mark_lost(sk);
@@@ -2783,6 -2720,14 +2720,14 @@@
         }
   }
   
+ static bool tcp_force_fast_retransmit(struct sock *sk)
+ {
+       struct tcp_sock *tp = tcp_sk(sk);
+ 
+       return after(tcp_highest_sack_seq(tp),
+                    tp->snd_una + tp->reordering * tp->mss_cache);
+ }
+ 
   /* Process an event, which can update packets-in-flight not trivially.
    * Main goal of this function is to calculate new estimate for left_out,
    * taking into account both packets sitting in receiver's buffer and
@@@ -2795,19 -2740,17 +2740,17 @@@
    * It does _not_ decide what to send, it is made in function
    * tcp_xmit_retransmit_queue().
    */
- static void tcp_fastretrans_alert(struct sock *sk, const int acked,
+ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
                                   bool is_dupack, int *ack_flag, int *rexmit)
   {
         struct inet_connection_sock *icsk = inet_csk(sk);
         struct tcp_sock *tp = tcp_sk(sk);
         int fast_rexmit = 0, flag = *ack_flag;
         bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
-                                   (tcp_fackets_out(tp) > tp->reordering));
+                                    tcp_force_fast_retransmit(sk));
   
-       if (WARN_ON(!tp->packets_out && tp->sacked_out))
+       if (!tp->packets_out && tp->sacked_out)
                 tp->sacked_out = 0;
-       if (WARN_ON(!tp->sacked_out && tp->fackets_out))
-               tp->fackets_out = 0;
   
         /* Now state machine starts.
          * A. ECE, hence prohibit cwnd undoing, the reduction is required. */
@@@ -2854,11 -2797,11 +2797,11 @@@
                         if (tcp_is_reno(tp) && is_dupack)
                                 tcp_add_reno_sack(sk);
                 } else {
-                       if (tcp_try_undo_partial(sk, acked))
+                       if (tcp_try_undo_partial(sk, prior_snd_una))
                                 return;
                         /* Partial ACK arrived. Force fast retransmit. */
                         do_lost = tcp_is_reno(tp) ||
-                                 tcp_fackets_out(tp) > tp->reordering;
+                                 tcp_force_fast_retransmit(sk);
                 }
                 if (tcp_try_undo_dsack(sk)) {
                         tcp_try_keep_open(sk);
@@@ -2873,6 -2816,7 +2816,7 @@@
                       (*ack_flag & FLAG_LOST_RETRANS)))
                         return;
                 /* Change state if cwnd is undone or retransmits are lost */
+               /* fall through */
         default:
                 if (tcp_is_reno(tp)) {
                         if (flag & FLAG_SND_UNA_ADVANCED)
@@@ -2913,8 -2857,8 +2857,8 @@@
   
   static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us)
   {
+       u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ;
         struct tcp_sock *tp = tcp_sk(sk);
-       u32 wlen = sysctl_tcp_min_rtt_wlen * HZ;
   
         minmax_running_min(&tp->rtt_min, wlen, tcp_jiffies32,
                            rtt_us ? : jiffies_to_usecs(1));
@@@ -3056,28 -3000,31 +3000,31 @@@ static void tcp_ack_tstamp(struct sock 
   
         shinfo = skb_shinfo(skb);
         if (!before(shinfo->tskey, prior_snd_una) &&
-           before(shinfo->tskey, tcp_sk(sk)->snd_una))
-               __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
+           before(shinfo->tskey, tcp_sk(sk)->snd_una)) {
+               tcp_skb_tsorted_save(skb) {
+                       __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
+               } tcp_skb_tsorted_restore(skb);
+       }
   }
   
   /* Remove acknowledged frames from the retransmission queue. If our packet
    * is before the ack sequence we can discard it as it's confirmed to have
    * arrived at the other end.
    */
- static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
-                              u32 prior_snd_una, int *acked,
+ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
+                              u32 prior_snd_una,
                                struct tcp_sacktag_state *sack)
   {
         const struct inet_connection_sock *icsk = inet_csk(sk);
         u64 first_ackt, last_ackt;
         struct tcp_sock *tp = tcp_sk(sk);
         u32 prior_sacked = tp->sacked_out;
-       u32 reord = tp->packets_out;
+       u32 reord = tp->snd_nxt; /* lowest acked un-retx un-sacked seq */
+       struct sk_buff *skb, *next;
         bool fully_acked = true;
         long sack_rtt_us = -1L;
         long seq_rtt_us = -1L;
         long ca_rtt_us = -1L;
-       struct sk_buff *skb;
         u32 pkts_acked = 0;
         u32 last_in_flight = 0;
         bool rtt_update;
@@@ -3085,8 -3032,9 +3032,9 @@@
   
         first_ackt = 0;
   
-       while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
+       for (skb = skb_rb_first(&sk->tcp_rtx_queue); skb; skb = next) {
                 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
+               const u32 start_seq = scb->seq;
                 u8 sacked = scb->sacked;
                 u32 acked_pcount;
   
@@@ -3103,8 -3051,6 +3051,6 @@@
                                 break;
                         fully_acked = false;
                 } else {
-                       /* Speedup tcp_unlink_write_queue() and next loop */
-                       prefetchw(skb->next);
                         acked_pcount = tcp_skb_pcount(skb);
                 }
   
@@@ -3119,7 -3065,8 +3065,8 @@@
                                 first_ackt = last_ackt;
   
                         last_in_flight = TCP_SKB_CB(skb)->tx.in_flight;
-                       reord = min(pkts_acked, reord);
+                       if (before(start_seq, reord))
+                               reord = start_seq;
                         if (!after(scb->end_seq, tp->high_seq))
                                 flag |= FLAG_ORIG_SACK_ACKED;
                 }
@@@ -3156,12 -3103,12 +3103,12 @@@
                 if (!fully_acked)
                         break;
   
-               tcp_unlink_write_queue(skb, sk);
-               sk_wmem_free_skb(sk, skb);
+               next = skb_rb_next(skb);
                 if (unlikely(skb == tp->retransmit_skb_hint))
                         tp->retransmit_skb_hint = NULL;
                 if (unlikely(skb == tp->lost_skb_hint))
                         tp->lost_skb_hint = NULL;
+               tcp_rtx_queue_unlink_and_free(skb, sk);
         }
   
         if (!skb)
@@@ -3197,16 -3144,12 +3144,12 @@@
                         int delta;
   
                         /* Non-retransmitted hole got filled? That's reordering */
-                       if (reord < prior_fackets && reord <= tp->fackets_out)
-                               tcp_update_reordering(sk, tp->fackets_out - reord, 0);
+                       if (before(reord, prior_fack))
+                               tcp_check_sack_reordering(sk, reord, 0);
   
-                       delta = tcp_is_fack(tp) ? pkts_acked :
-                                                 prior_sacked - tp->sacked_out;
+                       delta = prior_sacked - tp->sacked_out;
                         tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta);
                 }
- 
-               tp->fackets_out -= min(pkts_acked, tp->fackets_out);
- 
         } else if (skb && rtt_update && sack_rtt_us >= 0 &&
                    sack_rtt_us > tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp)) {
                 /* Do not re-arm RTO if the sack RTT is measured from data sent
@@@ -3247,18 -3190,19 +3190,19 @@@
                 }
         }
   #endif
-       *acked = pkts_acked;
         return flag;
   }
   
   static void tcp_ack_probe(struct sock *sk)
   {
-       const struct tcp_sock *tp = tcp_sk(sk);
         struct inet_connection_sock *icsk = inet_csk(sk);
+       struct sk_buff *head = tcp_send_head(sk);
+       const struct tcp_sock *tp = tcp_sk(sk);
   
         /* Was it a usable window open? */
- 
-       if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq, tcp_wnd_end(tp))) {
+       if (!head)
+               return;
+       if (!after(TCP_SKB_CB(head)->end_seq, tcp_wnd_end(tp))) {
                 icsk->icsk_backoff = 0;
                 inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
                 /* Socket must be waked up by subsequent tcp_data_snd_check().
@@@ -3378,7 -3322,7 +3322,7 @@@ static int tcp_ack_update_window(struc
                         tp->pred_flags = 0;
                         tcp_fast_path_check(sk);
   
-                       if (tcp_send_head(sk))
+                       if (!tcp_write_queue_empty(sk))
                                 tcp_slow_start_after_idle_check(sk);
   
                         if (nwin > tp->max_window) {
@@@ -3399,7 -3343,7 +3343,7 @@@ static bool __tcp_oow_rate_limited(stru
         if (*last_oow_ack_time) {
                 s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time);
   
-               if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
+               if (0 <= elapsed && elapsed < net->ipv4.sysctl_tcp_invalid_ratelimit) {
                         NET_INC_STATS(net, mib_idx);
                         return true;    /* rate-limited: don't send yet! */
                 }
@@@ -3435,10 -3379,11 +3379,11 @@@ static void tcp_send_challenge_ack(stru
         static u32 challenge_timestamp;
         static unsigned int challenge_count;
         struct tcp_sock *tp = tcp_sk(sk);
+       struct net *net = sock_net(sk);
         u32 count, now;
   
         /* First check our per-socket dupack rate limit. */
-       if (__tcp_oow_rate_limited(sock_net(sk),
+       if (__tcp_oow_rate_limited(net,
                                    LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
                                    &tp->last_oow_ack_time))
                 return;
@@@ -3446,16 -3391,16 +3391,16 @@@
         /* Then check host-wide RFC 5961 rate limit. */
         now = jiffies / HZ;
         if (now != challenge_timestamp) {
-               u32 half = (sysctl_tcp_challenge_ack_limit + 1) >> 1;
+               u32 ack_limit = net->ipv4.sysctl_tcp_challenge_ack_limit;
+               u32 half = (ack_limit + 1) >> 1;
   
                 challenge_timestamp = now;
-               WRITE_ONCE(challenge_count, half +
-                          prandom_u32_max(sysctl_tcp_challenge_ack_limit));
+               WRITE_ONCE(challenge_count, half + prandom_u32_max(ack_limit));
         }
         count = READ_ONCE(challenge_count);
         if (count > 0) {
                 WRITE_ONCE(challenge_count, count - 1);
-               NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK);
+               NET_INC_STATS(net, LINUX_MIB_TCPCHALLENGEACK);
                 tcp_send_ack(sk);
         }
   }
@@@ -3553,18 -3498,17 +3498,17 @@@ static int tcp_ack(struct sock *sk, con
         u32 ack_seq = TCP_SKB_CB(skb)->seq;
         u32 ack = TCP_SKB_CB(skb)->ack_seq;
         bool is_dupack = false;
-       u32 prior_fackets;
         int prior_packets = tp->packets_out;
         u32 delivered = tp->delivered;
         u32 lost = tp->lost;
-       int acked = 0; /* Number of packets newly acked */
         int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */
+       u32 prior_fack;
   
         sack_state.first_sackt = 0;
         sack_state.rate = &rs;
   
-       /* We very likely will need to access write queue head. */
-       prefetchw(sk->sk_write_queue.next);
+       /* We very likely will need to access rtx queue. */
+       prefetch(sk->tcp_rtx_queue.rb_node);
   
         /* If the ack is older than previous acks
          * then we can probably ignore it.
@@@ -3590,7 -3534,7 +3534,7 @@@
                 icsk->icsk_retransmits = 0;
         }
   
-       prior_fackets = tp->fackets_out;
+       prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una;
         rs.prior_in_flight = tcp_packets_in_flight(tp);
   
         /* ts_recent update must be made after we are sure that the packet
@@@ -3646,8 -3590,9 +3590,9 @@@
                 goto no_queue;
   
         /* See if we can take anything off of the retransmit queue. */
-       flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked,
-                                   &sack_state);
+       flag |= tcp_clean_rtx_queue(sk, prior_fack, prior_snd_una, &sack_state);
+ 
+       tcp_rack_update_reo_wnd(sk, &rs);
   
         if (tp->tlp_high_seq)
                 tcp_process_tlp_ack(sk, ack, flag);
@@@ -3657,7 -3602,8 +3602,8 @@@
   
         if (tcp_ack_is_dubious(sk, flag)) {
                 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
-               tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
+               tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
+                                     &rexmit);
         }
   
         if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
@@@ -3673,13 -3619,13 +3619,13 @@@
   no_queue:
         /* If data was DSACKed, see if we can undo a cwnd reduction. */
         if (flag & FLAG_DSACKING_ACK)
-               tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
+               tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
+                                     &rexmit);
         /* If this ack opens up a zero window, clear backoff.  It was
          * being used to time the probes, and is probably far higher than
          * it needs to be for normal retransmission.
          */
-       if (tcp_send_head(sk))
-               tcp_ack_probe(sk);
+       tcp_ack_probe(sk);
   
         if (tp->tlp_high_seq)
                 tcp_process_tlp_ack(sk, ack, flag);
@@@ -3696,7 -3642,8 +3642,8 @@@ old_ack
         if (TCP_SKB_CB(skb)->sacked) {
                 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
                                                 &sack_state);
-               tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
+               tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
+                                     &rexmit);
                 tcp_xmit_recovery(sk, rexmit);
         }
   
@@@ -3721,6 -3668,21 +3668,21 @@@ static void tcp_parse_fastopen_option(i
         foc->exp = exp_opt;
   }
   
+ static void smc_parse_options(const struct tcphdr *th,
+                             struct tcp_options_received *opt_rx,
+                             const unsigned char *ptr,
+                             int opsize)
+ {
+ #if IS_ENABLED(CONFIG_SMC)
+       if (static_branch_unlikely(&tcp_have_smc)) {
+               if (th->syn && !(opsize & 1) &&
+                   opsize >= TCPOLEN_EXP_SMC_BASE &&
+                   get_unaligned_be32(ptr) == TCPOPT_SMC_MAGIC)
+                       opt_rx->smc_ok = 1;
+       }
+ #endif
+ }
+ 
   /* Look for tcp options. Normally only called on SYN and SYNACK packets.
    * But, this can also be called on packets in the established flow when
    * the fast version below fails.
@@@ -3828,6 -3790,9 +3790,9 @@@ void tcp_parse_options(const struct ne
                                         tcp_parse_fastopen_option(opsize -
                                                 TCPOLEN_EXP_FASTOPEN_BASE,
                                                 ptr + 2, th->syn, foc, true);
+                               else
+                                       smc_parse_options(th, opt_rx, ptr,
+                                                         opsize);
                                 break;
   
                         }
@@@ -3995,6 -3960,8 +3960,8 @@@ static inline bool tcp_sequence(const s
   /* When we get a reset we do this. */
   void tcp_reset(struct sock *sk)
   {
+       trace_tcp_receive_reset(sk);
+ 
         /* We want the right error as BSD sees it (and indeed as we do). */
         switch (sk->sk_state) {
         case TCP_SYN_SENT:
@@@ -4117,7 -4084,7 +4084,7 @@@ static void tcp_dsack_set(struct sock *
   {
         struct tcp_sock *tp = tcp_sk(sk);
   
-       if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
+       if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
                 int mib_idx;
   
                 if (before(seq, tp->rcv_nxt))
@@@ -4152,7 -4119,7 +4119,7 @@@ static void tcp_send_dupack(struct soc
                 NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
                 tcp_enter_quickack_mode(sk);
   
-               if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
+               if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
                         u32 end_seq = TCP_SKB_CB(skb)->end_seq;
   
                         if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
@@@ -4268,11 -4235,6 +4235,6 @@@ static void tcp_sack_remove(struct tcp_
         tp->rx_opt.num_sacks = num_sacks;
   }
   
- enum tcp_queue {
-       OOO_QUEUE,
-       RCV_QUEUE,
- };
- 
   /**
    * tcp_try_coalesce - try to merge skb to prior one
    * @sk: socket
@@@ -4288,7 -4250,6 +4250,6 @@@
    * Returns true if caller should free @from instead of queueing it
    */
   static bool tcp_try_coalesce(struct sock *sk,
-                            enum tcp_queue dest,
                              struct sk_buff *to,
                              struct sk_buff *from,
                              bool *fragstolen)
@@@ -4313,10 -4274,7 +4274,7 @@@
   
         if (TCP_SKB_CB(from)->has_rxtstamp) {
                 TCP_SKB_CB(to)->has_rxtstamp = true;
-               if (dest == OOO_QUEUE)
-                       TCP_SKB_CB(to)->swtstamp = TCP_SKB_CB(from)->swtstamp;
-               else
-                       to->tstamp = from->tstamp;
+               to->tstamp = from->tstamp;
         }
   
         return true;
@@@ -4341,7 -4299,7 +4299,7 @@@ static void tcp_ofo_queue(struct sock *
   
         p = rb_first(&tp->out_of_order_queue);
         while (p) {
-               skb = rb_entry(p, struct sk_buff, rbnode);
+               skb = rb_to_skb(p);
                 if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
                         break;
   
@@@ -4353,9 -4311,6 +4311,6 @@@
                 }
                 p = rb_next(p);
                 rb_erase(&skb->rbnode, &tp->out_of_order_queue);
-               /* Replace tstamp which was stomped by rbnode */
-               if (TCP_SKB_CB(skb)->has_rxtstamp)
-                       skb->tstamp = TCP_SKB_CB(skb)->swtstamp;
   
                 if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {
                         SOCK_DEBUG(sk, "ofo packet was already received\n");
@@@ -4367,8 -4322,7 +4322,7 @@@
                            TCP_SKB_CB(skb)->end_seq);
   
                 tail = skb_peek_tail(&sk->sk_receive_queue);
-               eaten = tail && tcp_try_coalesce(sk, RCV_QUEUE,
-                                                tail, skb, &fragstolen);
+               eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
                 tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
                 fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
                 if (!eaten)
@@@ -4409,7 -4363,7 +4363,7 @@@ static int tcp_try_rmem_schedule(struc
   static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
   {
         struct tcp_sock *tp = tcp_sk(sk);
-       struct rb_node **p, *q, *parent;
+       struct rb_node **p, *parent;
         struct sk_buff *skb1;
         u32 seq, end_seq;
         bool fragstolen;
@@@ -4422,10 -4376,6 +4376,6 @@@
                 return;
         }
   
-       /* Stash tstamp to avoid being stomped on by rbnode */
-       if (TCP_SKB_CB(skb)->has_rxtstamp)
-               TCP_SKB_CB(skb)->swtstamp = skb->tstamp;
- 
         /* Disable header prediction. */
         tp->pred_flags = 0;
         inet_csk_schedule_ack(sk);
@@@ -4453,7 -4403,7 +4403,7 @@@
         /* In the typical case, we are adding an skb to the end of the list.
          * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup.
          */
-       if (tcp_try_coalesce(sk, OOO_QUEUE, tp->ooo_last_skb,
+       if (tcp_try_coalesce(sk, tp->ooo_last_skb,
                              skb, &fragstolen)) {
   coalesce_done:
                 tcp_grow_window(sk, skb);
@@@ -4472,7 -4422,7 +4422,7 @@@
         parent = NULL;
         while (*p) {
                 parent = *p;
-               skb1 = rb_entry(parent, struct sk_buff, rbnode);
+               skb1 = rb_to_skb(parent);
                 if (before(seq, TCP_SKB_CB(skb1)->seq)) {
                         p = &parent->rb_left;
                         continue;
@@@ -4504,7 -4454,7 +4454,7 @@@
                                 __kfree_skb(skb1);
                                 goto merge_right;
                         }
-               } else if (tcp_try_coalesce(sk, OOO_QUEUE, skb1,
+               } else if (tcp_try_coalesce(sk, skb1,
                                             skb, &fragstolen)) {
                         goto coalesce_done;
                 }
@@@ -4517,9 -4467,7 +4467,7 @@@ insert
   
   merge_right:
         /* Remove other segments covered by skb. */
-       while ((q = rb_next(&skb->rbnode)) != NULL) {
-               skb1 = rb_entry(q, struct sk_buff, rbnode);
- 
+       while ((skb1 = skb_rb_next(skb)) != NULL) {
                 if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
                         break;
                 if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
@@@ -4534,7 -4482,7 +4482,7 @@@
                 tcp_drop(sk, skb1);
         }
         /* If there is no skb after us, we are the last_skb ! */
-       if (!q)
+       if (!skb1)
                 tp->ooo_last_skb = skb;
   
   add_sack:
@@@ -4556,7 -4504,7 +4504,7 @@@ static int __must_check tcp_queue_rcv(s
   
         __skb_pull(skb, hdrlen);
         eaten = (tail &&
-                tcp_try_coalesce(sk, RCV_QUEUE, tail,
+                tcp_try_coalesce(sk, tail,
                                   skb, fragstolen)) ? 1 : 0;
         tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
         if (!eaten) {
@@@ -4720,7 -4668,7 +4668,7 @@@ static struct sk_buff *tcp_skb_next(str
         if (list)
                 return !skb_queue_is_last(list, skb) ? skb->next : NULL;
   
-       return rb_entry_safe(rb_next(&skb->rbnode), struct sk_buff, rbnode);
+       return skb_rb_next(skb);
   }
   
   static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
@@@ -4741,7 -4689,7 +4689,7 @@@
   }
   
   /* Insert skb into rb tree, ordered by TCP_SKB_CB(skb)->seq */
- static void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb)
+ void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb)
   {
         struct rb_node **p = &root->rb_node;
         struct rb_node *parent = NULL;
@@@ -4749,7 -4697,7 +4697,7 @@@
   
         while (*p) {
                 parent = *p;
-               skb1 = rb_entry(parent, struct sk_buff, rbnode);
+               skb1 = rb_to_skb(parent);
                 if (before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb1)->seq))
                         p = &parent->rb_left;
                 else
@@@ -4796,7 -4744,7 +4744,7 @@@ restart
                  *   overlaps to the next one.
                  */
                 if (!(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)) &&
-                   (tcp_win_from_space(skb->truesize) > skb->len ||
+                   (tcp_win_from_space(sk, skb->truesize) > skb->len ||
                      before(TCP_SKB_CB(skb)->seq, start))) {
                         end_of_skbs = false;
                         break;
@@@ -4868,26 -4816,19 +4816,19 @@@ static void tcp_collapse_ofo_queue(stru
   {
         struct tcp_sock *tp = tcp_sk(sk);
         struct sk_buff *skb, *head;
-       struct rb_node *p;
         u32 start, end;
   
-       p = rb_first(&tp->out_of_order_queue);
-       skb = rb_entry_safe(p, struct sk_buff, rbnode);
+       skb = skb_rb_first(&tp->out_of_order_queue);
   new_range:
         if (!skb) {
-               p = rb_last(&tp->out_of_order_queue);
-               /* Note: This is possible p is NULL here. We do not
-                * use rb_entry_safe(), as ooo_last_skb is valid only
-                * if rbtree is not empty.
-                */
-               tp->ooo_last_skb = rb_entry(p, struct sk_buff, rbnode);
+               tp->ooo_last_skb = skb_rb_last(&tp->out_of_order_queue);
                 return;
         }
         start = TCP_SKB_CB(skb)->seq;
         end = TCP_SKB_CB(skb)->end_seq;
   
         for (head = skb;;) {
-               skb = tcp_skb_next(skb, NULL);
+               skb = skb_rb_next(skb);
   
                 /* Range is terminated when we see a gap or when
                  * we are at the queue end.
@@@ -4930,14 -4871,14 +4871,14 @@@ static bool tcp_prune_ofo_queue(struct 
         do {
                 prev = rb_prev(node);
                 rb_erase(node, &tp->out_of_order_queue);
-               tcp_drop(sk, rb_entry(node, struct sk_buff, rbnode));
+               tcp_drop(sk, rb_to_skb(node));
                 sk_mem_reclaim(sk);
                 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
                     !tcp_under_memory_pressure(sk))
                         break;
                 node = prev;
         } while (node);
-       tp->ooo_last_skb = rb_entry(prev, struct sk_buff, rbnode);
+       tp->ooo_last_skb = rb_to_skb(prev);
   
         /* Reset SACK state.  A conforming SACK implementation will
          * do the same at a timeout based retransmit.  When a connection
@@@ -5112,7 -5053,7 +5053,7 @@@ static void tcp_check_urg(struct sock *
         struct tcp_sock *tp = tcp_sk(sk);
         u32 ptr = ntohs(th->urg_ptr);
   
-       if (ptr && !sysctl_tcp_stdurg)
+       if (ptr && !sock_net(sk)->ipv4.sysctl_tcp_stdurg)
                 ptr--;
         ptr += ntohl(th->seq);
   
@@@ -5532,20 -5473,13 +5473,13 @@@ void tcp_finish_connect(struct sock *sk
                 security_inet_conn_established(sk, skb);
         }
   
-       /* Make sure socket is routed, for correct metrics.  */
-       icsk->icsk_af_ops->rebuild_header(sk);
- 
-       tcp_init_metrics(sk);
-       tcp_call_bpf(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB);
-       tcp_init_congestion_control(sk);
+       tcp_init_transfer(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB);
   
         /* Prevent spurious tcp_cwnd_restart() on first data
          * packet.
          */
         tp->lsndtime = tcp_jiffies32;
   
-       tcp_init_buffer_space(sk);
- 
         if (sock_flag(sk, SOCK_KEEPOPEN))
                 inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
   
@@@ -5559,7 -5493,7 +5493,7 @@@ static bool tcp_rcv_fastopen_synack(str
                                     struct tcp_fastopen_cookie *cookie)
   {
         struct tcp_sock *tp = tcp_sk(sk);
-       struct sk_buff *data = tp->syn_data ? tcp_write_queue_head(sk) : NULL;
+       struct sk_buff *data = tp->syn_data ? tcp_rtx_queue_head(sk) : NULL;
         u16 mss = tp->rx_opt.mss_clamp, try_exp = 0;
         bool syn_drop = false;
   
@@@ -5594,9 -5528,8 +5528,8 @@@
         tcp_fastopen_cache_set(sk, mss, cookie, syn_drop, try_exp);
   
         if (data) { /* Retransmit unacked data in SYN */
-               tcp_for_write_queue_from(data, sk) {
-                       if (data == tcp_send_head(sk) ||
-                           __tcp_retransmit_skb(sk, data, 1))
+               skb_rbtree_walk_from(data) {
+                       if (__tcp_retransmit_skb(sk, data, 1))
                                 break;
                 }
                 tcp_rearm_rto(sk);
@@@ -5614,6 -5547,16 +5547,16 @@@
         return false;
   }
   
+ static void smc_check_reset_syn(struct tcp_sock *tp)
+ {
+ #if IS_ENABLED(CONFIG_SMC)
+       if (static_branch_unlikely(&tcp_have_smc)) {
+               if (tp->syn_smc && !tp->rx_opt.smc_ok)
+                       tp->syn_smc = 0;
+       }
+ #endif
+ }
+ 
   static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
                                          const struct tcphdr *th)
   {
@@@ -5709,10 -5652,6 +5652,6 @@@
                         tp->tcp_header_len = sizeof(struct tcphdr);
                 }
   
-               if (tcp_is_sack(tp) && sysctl_tcp_fack)
-                       tcp_enable_fack(tp);
- 
-               tcp_mtup_init(sk);
                 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
                 tcp_initialize_rcv_mss(sk);
   
@@@ -5721,6 -5660,8 +5660,8 @@@
                  * is initialized. */
                 tp->copied_seq = tp->rcv_nxt;
   
+               smc_check_reset_syn(tp);
+ 
                 smp_mb();
   
                 tcp_finish_connect(sk, skb);
@@@ -5938,15 -5879,18 +5879,18 @@@ int tcp_rcv_state_process(struct sock *
                 if (req) {
                         inet_csk(sk)->icsk_retransmits = 0;
                         reqsk_fastopen_remove(sk, req, false);
+                       /* Re-arm the timer because data may have been sent out.
+                        * This is similar to the regular data transmission case
+                        * when new data has just been ack'ed.
+                        *
+                        * (TFO) - we could try to be more aggressive and
+                        * retransmitting any data sooner based on when they
+                        * are sent out.
+                        */
+                       tcp_rearm_rto(sk);
                 } else {
-                       /* Make sure socket is routed, for correct metrics. */
-                       icsk->icsk_af_ops->rebuild_header(sk);
-                       tcp_call_bpf(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
-                       tcp_init_congestion_control(sk);
- 
-                       tcp_mtup_init(sk);
+                       tcp_init_transfer(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
                         tp->copied_seq = tp->rcv_nxt;
-                       tcp_init_buffer_space(sk);
                 }
                 smp_mb();
                 tcp_set_state(sk, TCP_ESTABLISHED);
@@@ -5966,19 -5910,6 +5910,6 @@@
                 if (tp->rx_opt.tstamp_ok)
                         tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
   
-               if (req) {
-                       /* Re-arm the timer because data may have been sent out.
-                        * This is similar to the regular data transmission case
-                        * when new data has just been ack'ed.
-                        *
-                        * (TFO) - we could try to be more aggressive and
-                        * retransmitting any data sooner based on when they
-                        * are sent out.
-                        */
-                       tcp_rearm_rto(sk);
-               } else
-                       tcp_init_metrics(sk);
- 
                 if (!inet_csk(sk)->icsk_ca_ops->cong_control)
                         tcp_update_pacing_rate(sk);
   
@@@ -6075,6 -6006,7 +6006,7 @@@
         case TCP_LAST_ACK:
                 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
                         break;
+               /* fall through */
         case TCP_FIN_WAIT1:
         case TCP_FIN_WAIT2:
                 /* RFC 793 says to queue data in these states,
@@@ -6183,6 -6115,9 +6115,9 @@@ static void tcp_openreq_init(struct req
         ireq->ir_rmt_port = tcp_hdr(skb)->source;
         ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
         ireq->ir_mark = inet_request_mark(sk, skb);
+ #if IS_ENABLED(CONFIG_SMC)
+       ireq->smc_ok = rx_opt->smc_ok;
+ #endif
   }
   
   struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
@@@ -6358,7 -6293,7 +6293,7 @@@ int tcp_conn_request(struct request_soc
         tcp_openreq_init_rwin(req, sk, dst);
         if (!want_cookie) {
                 tcp_reqsk_record_syn(sk, req, skb);
-               fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc);
+               fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
         }
         if (fastopen_sk) {
                 af_ops->send_synack(fastopen_sk, dst, &fl, req,
diff --combined net/ipv4/tcp_output.c

index 5a42e873d44a8f880d8999e911d64d6f388865fb,76dbe884f2469660028684a46fc19afa000a1353..540b7d92cc70b3ea4f91ecb307840166f7f4dbce
--- 1/net/ipv4/tcp_output.c
--- 2/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@@ -41,40 -41,25 +41,25 @@@
   #include <linux/compiler.h>
   #include <linux/gfp.h>
   #include <linux/module.h>
+ #include <linux/static_key.h>
   
- /* People can turn this off for buggy TCP's found in printers etc. */
- int sysctl_tcp_retrans_collapse __read_mostly = 1;
- 
- /* People can turn this on to work with those rare, broken TCPs that
-  * interpret the window field as a signed quantity.
-  */
- int sysctl_tcp_workaround_signed_windows __read_mostly = 0;
- 
- /* Default TSQ limit of four TSO segments */
- int sysctl_tcp_limit_output_bytes __read_mostly = 262144;
- 
- /* This limits the percentage of the congestion window which we
-  * will allow a single TSO frame to consume.  Building TSO frames
-  * which are too large can cause TCP streams to be bursty.
-  */
- int sysctl_tcp_tso_win_divisor __read_mostly = 3;
- 
- /* By default, RFC2861 behavior.  */
- int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
+ #include <trace/events/tcp.h>
   
   static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
                            int push_one, gfp_t gfp);
   
   /* Account for new data that has been sent to the network. */
- static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
+ static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
   {
         struct inet_connection_sock *icsk = inet_csk(sk);
         struct tcp_sock *tp = tcp_sk(sk);
         unsigned int prior_packets = tp->packets_out;
   
-       tcp_advance_send_head(sk, skb);
         tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
   
+       __skb_unlink(skb, &sk->sk_write_queue);
+       tcp_rbtree_insert(&sk->tcp_rtx_queue, skb);
+ 
         tp->packets_out += tcp_skb_pcount(skb);
         if (!prior_packets || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
                 tcp_rearm_rto(sk);
@@@ -203,7 -188,7 +188,7 @@@ u32 tcp_default_init_rwnd(u32 mss
    * be a multiple of mss if possible. We assume here that mss >= 1.
    * This MUST be enforced by all callers.
    */
- void tcp_select_initial_window(int __space, __u32 mss,
+ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
                                __u32 *rcv_wnd, __u32 *window_clamp,
                                int wscale_ok, __u8 *rcv_wscale,
                                __u32 init_rcv_wnd)
@@@ -227,7 -212,7 +212,7 @@@
          * which we interpret as a sign the remote TCP is not
          * misinterpreting the window field as a signed quantity.
          */
-       if (sysctl_tcp_workaround_signed_windows)
+       if (sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)
                 (*rcv_wnd) = min(space, MAX_TCP_WINDOW);
         else
                 (*rcv_wnd) = space;
@@@ -235,7 -220,7 +220,7 @@@
         (*rcv_wscale) = 0;
         if (wscale_ok) {
                 /* Set window scaling on max possible window */
-               space = max_t(u32, space, sysctl_tcp_rmem[2]);
+               space = max_t(u32, space, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
                 space = max_t(u32, space, sysctl_rmem_max);
                 space = min_t(u32, space, *window_clamp);
                 while (space > U16_MAX && (*rcv_wscale) < TCP_MAX_WSCALE) {
@@@ -287,7 -272,8 +272,8 @@@ static u16 tcp_select_window(struct soc
         /* Make sure we do not exceed the maximum possible
          * scaled window.
          */
-       if (!tp->rx_opt.rcv_wscale && sysctl_tcp_workaround_signed_windows)
+       if (!tp->rx_opt.rcv_wscale &&
+           sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)
                 new_win = min(new_win, MAX_TCP_WINDOW);
         else
                 new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
@@@ -395,7 -381,6 +381,6 @@@ static void tcp_ecn_send(struct sock *s
   static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
   {
         skb->ip_summed = CHECKSUM_PARTIAL;
-       skb->csum = 0;
   
         TCP_SKB_CB(skb)->tcp_flags = flags;
         TCP_SKB_CB(skb)->sacked = 0;
@@@ -418,6 -403,22 +403,22 @@@ static inline bool tcp_urg_mode(const s
   #define OPTION_MD5            (1 << 2)
   #define OPTION_WSCALE         (1 << 3)
   #define OPTION_FAST_OPEN_COOKIE       (1 << 8)
+ #define OPTION_SMC            (1 << 9)
+ 
+ static void smc_options_write(__be32 *ptr, u16 *options)
+ {
+ #if IS_ENABLED(CONFIG_SMC)
+       if (static_branch_unlikely(&tcp_have_smc)) {
+               if (unlikely(OPTION_SMC & *options)) {
+                       *ptr++ = htonl((TCPOPT_NOP  << 24) |
+                                      (TCPOPT_NOP  << 16) |
+                                      (TCPOPT_EXP <<  8) |
+                                      (TCPOLEN_EXP_SMC_BASE));
+                       *ptr++ = htonl(TCPOPT_SMC_MAGIC);
+               }
+       }
+ #endif
+ }
   
   struct tcp_out_options {
         u16 options;            /* bit field of OPTION_* */
@@@ -536,6 -537,41 +537,41 @@@ static void tcp_options_write(__be32 *p
                 }
                 ptr += (len + 3) >> 2;
         }
+ 
+       smc_options_write(ptr, &options);
+ }
+ 
+ static void smc_set_option(const struct tcp_sock *tp,
+                          struct tcp_out_options *opts,
+                          unsigned int *remaining)
+ {
+ #if IS_ENABLED(CONFIG_SMC)
+       if (static_branch_unlikely(&tcp_have_smc)) {
+               if (tp->syn_smc) {
+                       if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
+                               opts->options |= OPTION_SMC;
+                               *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
+                       }
+               }
+       }
+ #endif
+ }
+ 
+ static void smc_set_option_cond(const struct tcp_sock *tp,
+                               const struct inet_request_sock *ireq,
+                               struct tcp_out_options *opts,
+                               unsigned int *remaining)
+ {
+ #if IS_ENABLED(CONFIG_SMC)
+       if (static_branch_unlikely(&tcp_have_smc)) {
+               if (tp->syn_smc && ireq->smc_ok) {
+                       if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
+                               opts->options |= OPTION_SMC;
+                               *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
+                       }
+               }
+       }
+ #endif
   }
   
   /* Compute TCP options for SYN packets. This is not the final
@@@ -603,11 -639,14 +639,14 @@@ static unsigned int tcp_syn_options(str
                 }
         }
   
+       smc_set_option(tp, opts, &remaining);
+ 
         return MAX_TCP_OPTION_SPACE - remaining;
   }
   
   /* Set up TCP options for SYN-ACKs. */
- static unsigned int tcp_synack_options(struct request_sock *req,
+ static unsigned int tcp_synack_options(const struct sock *sk,
+                                      struct request_sock *req,
                                        unsigned int mss, struct sk_buff *skb,
                                        struct tcp_out_options *opts,
                                        const struct tcp_md5sig_key *md5,
@@@ -663,6 -702,8 +702,8 @@@
                 }
         }
   
+       smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining);
+ 
         return MAX_TCP_OPTION_SPACE - remaining;
   }
   
@@@ -973,6 -1014,12 +1014,12 @@@ static void tcp_internal_pacing(struct 
                       HRTIMER_MODE_ABS_PINNED);
   }
   
+ static void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb)
+ {
+       skb->skb_mstamp = tp->tcp_mstamp;
+       list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
+ }
+ 
   /* This routine actually transmits TCP packets queued in by
    * tcp_do_sendmsg().  This is used by both the initial
    * transmission and possible later retransmissions.
@@@ -1005,10 -1052,14 +1052,14 @@@ static int tcp_transmit_skb(struct soc
                 TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
                         - tp->snd_una;
                 oskb = skb;
-               if (unlikely(skb_cloned(skb)))
-                       skb = pskb_copy(skb, gfp_mask);
-               else
-                       skb = skb_clone(skb, gfp_mask);
+ 
+               tcp_skb_tsorted_save(oskb) {
+                       if (unlikely(skb_cloned(oskb)))
+                               skb = pskb_copy(oskb, gfp_mask);
+                       else
+                               skb = skb_clone(oskb, gfp_mask);
+               } tcp_skb_tsorted_restore(oskb);
+ 
                 if (unlikely(!skb))
                         return -ENOBUFS;
         }
@@@ -1129,7 -1180,7 +1180,7 @@@
                 err = net_xmit_eval(err);
         }
         if (!err && oskb) {
-               oskb->skb_mstamp = tp->tcp_mstamp;
+               tcp_update_skb_after_send(tp, oskb);
                 tcp_rate_skb_sent(sk, oskb);
         }
         return err;
@@@ -1167,21 -1218,6 +1218,6 @@@ static void tcp_set_skb_tso_segs(struc
         }
   }
   
- /* When a modification to fackets out becomes necessary, we need to check
-  * skb is counted to fackets_out or not.
-  */
- static void tcp_adjust_fackets_out(struct sock *sk, const struct sk_buff *skb,
-                                  int decr)
- {
-       struct tcp_sock *tp = tcp_sk(sk);
- 
-       if (!tp->sacked_out || tcp_is_reno(tp))
-               return;
- 
-       if (after(tcp_highest_sack_seq(tp), TCP_SKB_CB(skb)->seq))
-               tp->fackets_out -= decr;
- }
- 
   /* Pcount in the middle of the write queue got changed, we need to do various
    * tweaks to fix counters
    */
@@@ -1202,11 -1238,9 +1238,9 @@@ static void tcp_adjust_pcount(struct so
         if (tcp_is_reno(tp) && decr > 0)
                 tp->sacked_out -= min_t(u32, tp->sacked_out, decr);
   
-       tcp_adjust_fackets_out(sk, skb, decr);
- 
         if (tp->lost_skb_hint &&
             before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) &&
-           (tcp_is_fack(tp) || (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)))
+           (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
                 tp->lost_cnt_hint -= decr;
   
         tcp_verify_left_out(tp);
@@@ -1241,12 -1275,25 +1275,25 @@@ static void tcp_skb_fragment_eor(struc
         TCP_SKB_CB(skb)->eor = 0;
   }
   
+ /* Insert buff after skb on the write or rtx queue of sk.  */
+ static void tcp_insert_write_queue_after(struct sk_buff *skb,
+                                        struct sk_buff *buff,
+                                        struct sock *sk,
+                                        enum tcp_queue tcp_queue)
+ {
+       if (tcp_queue == TCP_FRAG_IN_WRITE_QUEUE)
+               __skb_queue_after(&sk->sk_write_queue, skb, buff);
+       else
+               tcp_rbtree_insert(&sk->tcp_rtx_queue, buff);
+ }
+ 
   /* Function to create two new TCP segments.  Shrinks the given segment
    * to the specified size and appends a new segment with the rest of the
    * packet to the list.  This won't be called frequently, I hope.
    * Remember, these are still headerless SKBs at this point.
    */
- int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
+ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
+                struct sk_buff *skb, u32 len,
                  unsigned int mss_now, gfp_t gfp)
   {
         struct tcp_sock *tp = tcp_sk(sk);
@@@ -1329,7 -1376,9 +1376,9 @@@
   
         /* Link BUFF into the send queue. */
         __skb_header_release(buff);
-       tcp_insert_write_queue_after(skb, buff, sk);
+       tcp_insert_write_queue_after(skb, buff, sk, tcp_queue);
+       if (tcp_queue == TCP_FRAG_IN_RTX_QUEUE)
+               list_add(&buff->tcp_tsorted_anchor, &skb->tcp_tsorted_anchor);
   
         return 0;
   }
@@@ -1607,7 -1656,7 +1656,7 @@@ static void tcp_cwnd_validate(struct so
                 if (tp->packets_out > tp->snd_cwnd_used)
                         tp->snd_cwnd_used = tp->packets_out;
   
-               if (sysctl_tcp_slow_start_after_idle &&
+               if (sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle &&
                     (s32)(tcp_jiffies32 - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto &&
                     !ca_ops->cong_control)
                         tcp_cwnd_application_limited(sk);
@@@ -1616,10 -1665,10 +1665,10 @@@
                  * is caused by insufficient sender buffer:
                  * 1) just sent some data (see tcp_write_xmit)
                  * 2) not cwnd limited (this else condition)
-                * 3) no more data to send (null tcp_send_head )
+                * 3) no more data to send (tcp_write_queue_empty())
                  * 4) application is hitting buffer limit (SOCK_NOSPACE)
                  */
-               if (!tcp_send_head(sk) && sk->sk_socket &&
+               if (tcp_write_queue_empty(sk) && sk->sk_socket &&
                     test_bit(SOCK_NOSPACE, &sk->sk_socket->flags) &&
                     (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
                         tcp_chrono_start(sk, TCP_CHRONO_SNDBUF_LIMITED);
@@@ -1671,7 -1720,7 +1720,7 @@@ u32 tcp_tso_autosize(const struct sock 
   {
         u32 bytes, segs;
   
-       bytes = min(sk->sk_pacing_rate >> 10,
+       bytes = min(sk->sk_pacing_rate >> sk->sk_pacing_shift,
                     sk->sk_gso_max_size - 1 - MAX_TCP_HEADER);
   
         /* Goal is to send at least one packet per ms,
@@@ -1694,7 -1743,8 +1743,8 @@@ static u32 tcp_tso_segs(struct sock *sk
         u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0;
   
         return tso_segs ? :
-               tcp_tso_autosize(sk, mss_now, sysctl_tcp_min_tso_segs);
+               tcp_tso_autosize(sk, mss_now,
+                                sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
   }
   
   /* Returns the portion of skb which can be sent right away */
@@@ -1815,7 -1865,8 +1865,8 @@@ static bool tcp_snd_wnd_test(const stru
    * know that all the data is in scatter-gather pages, and that the
    * packet has never been sent out before (and thus is not cloned).
    */
- static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
+ static int tso_fragment(struct sock *sk, enum tcp_queue tcp_queue,
+                       struct sk_buff *skb, unsigned int len,
                         unsigned int mss_now, gfp_t gfp)
   {
         struct sk_buff *buff;
@@@ -1824,7 -1875,7 +1875,7 @@@
   
         /* All of a TSO frame must be composed of paged data.  */
         if (skb->len != skb->data_len)
-               return tcp_fragment(sk, skb, len, mss_now, gfp);
+               return tcp_fragment(sk, tcp_queue, skb, len, mss_now, gfp);
   
         buff = sk_stream_alloc_skb(sk, 0, gfp, true);
         if (unlikely(!buff))
@@@ -1860,7 -1911,7 +1911,7 @@@
   
         /* Link BUFF into the send queue. */
         __skb_header_release(buff);
-       tcp_insert_write_queue_after(skb, buff, sk);
+       tcp_insert_write_queue_after(skb, buff, sk, tcp_queue);
   
         return 0;
   }
@@@ -1910,7 -1961,7 +1961,7 @@@ static bool tcp_tso_should_defer(struc
         if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len))
                 goto send_now;
   
-       win_divisor = READ_ONCE(sysctl_tcp_tso_win_divisor);
- -      win_divisor = ACCESS_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tso_win_divisor);
++      win_divisor = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tso_win_divisor);
         if (win_divisor) {
                 u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
   
@@@ -1930,8 -1981,10 +1981,10 @@@
                         goto send_now;
         }
   
-       head = tcp_write_queue_head(sk);
- 
+       /* TODO : use tsorted_sent_queue ? */
+       head = tcp_rtx_queue_head(sk);
+       if (!head)
+               goto send_now;
         age = tcp_stamp_us_delta(tp->tcp_mstamp, head->skb_mstamp);
         /* If next ACK is likely to come too late (half srtt), do not defer */
         if (age < (tp->srtt_us >> 4))
@@@ -2145,18 -2198,18 +2198,18 @@@ static bool tcp_small_queue_check(struc
   {
         unsigned int limit;
   
-       limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10);
-       limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes);
+       limit = max(2 * skb->truesize, sk->sk_pacing_rate >> sk->sk_pacing_shift);
+       limit = min_t(u32, limit,
+                     sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
         limit <<= factor;
   
         if (refcount_read(&sk->sk_wmem_alloc) > limit) {
-               /* Always send the 1st or 2nd skb in write queue.
+               /* Always send skb if rtx queue is empty.
                  * No need to wait for TX completion to call us back,
                  * after softirq/tasklet schedule.
                  * This helps when TX completions are delayed too much.
                  */
-               if (skb == sk->sk_write_queue.next ||
-                   skb->prev == sk->sk_write_queue.next)
+               if (tcp_rtx_queue_empty(sk))
                         return false;
   
                 set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags);
@@@ -2207,7 -2260,7 +2260,7 @@@ void tcp_chrono_stop(struct sock *sk, c
          * it's the "most interesting" or current chrono we are
          * tracking and starts busy chrono if we have pending data.
          */
-       if (tcp_write_queue_empty(sk))
+       if (tcp_rtx_and_write_queues_empty(sk))
                 tcp_chrono_set(tp, TCP_CHRONO_UNSPEC);
         else if (type == tp->chrono_type)
                 tcp_chrono_set(tp, TCP_CHRONO_BUSY);
@@@ -2263,7 -2316,7 +2316,7 @@@ static bool tcp_write_xmit(struct sock 
   
                 if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
                         /* "skb_mstamp" is used as a start point for the retransmit timer */
-                       skb->skb_mstamp = tp->tcp_mstamp;
+                       tcp_update_skb_after_send(tp, skb);
                         goto repair; /* Skip network transmission */
                 }
   
@@@ -2302,7 -2355,8 +2355,8 @@@
                                                     nonagle);
   
                 if (skb->len > limit &&
-                   unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
+                   unlikely(tso_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE,
+                                         skb, limit, mss_now, gfp)))
                         break;
   
                 if (test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags))
@@@ -2342,7 -2396,7 +2396,7 @@@ repair
                 tcp_cwnd_validate(sk, is_cwnd_limited);
                 return false;
         }
-       return !tp->packets_out && tcp_send_head(sk);
+       return !tp->packets_out && !tcp_write_queue_empty(sk);
   }
   
   bool tcp_schedule_loss_probe(struct sock *sk)
@@@ -2350,6 -2404,7 +2404,7 @@@
         struct inet_connection_sock *icsk = inet_csk(sk);
         struct tcp_sock *tp = tcp_sk(sk);
         u32 timeout, rto_delta_us;
+       int early_retrans;
   
         /* Don't do any loss probe on a Fast Open connection before 3WHS
          * finishes.
@@@ -2357,16 -2412,17 +2412,17 @@@
         if (tp->fastopen_rsk)
                 return false;
   
+       early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans;
         /* Schedule a loss probe in 2*RTT for SACK capable connections
          * in Open state, that are either limited by cwnd or application.
          */
-       if ((sysctl_tcp_early_retrans != 3 && sysctl_tcp_early_retrans != 4) ||
+       if ((early_retrans != 3 && early_retrans != 4) ||
             !tp->packets_out || !tcp_is_sack(tp) ||
             icsk->icsk_ca_state != TCP_CA_Open)
                 return false;
   
         if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) &&
-            tcp_send_head(sk))
+            !tcp_write_queue_empty(sk))
                 return false;
   
         /* Probe timeout is 2*rtt. Add minimum RTO to account
@@@ -2419,18 -2475,14 +2475,14 @@@ void tcp_send_loss_probe(struct sock *s
         int mss = tcp_current_mss(sk);
   
         skb = tcp_send_head(sk);
-       if (skb) {
-               if (tcp_snd_wnd_test(tp, skb, mss)) {
-                       pcount = tp->packets_out;
-                       tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC);
-                       if (tp->packets_out > pcount)
-                               goto probe_sent;
-                       goto rearm_timer;
-               }
-               skb = tcp_write_queue_prev(sk, skb);
-       } else {
-               skb = tcp_write_queue_tail(sk);
+       if (skb && tcp_snd_wnd_test(tp, skb, mss)) {
+               pcount = tp->packets_out;
+               tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC);
+               if (tp->packets_out > pcount)
+                       goto probe_sent;
+               goto rearm_timer;
         }
+       skb = skb_rb_last(&sk->tcp_rtx_queue);
   
         /* At most one outstanding TLP retransmission. */
         if (tp->tlp_high_seq)
@@@ -2448,10 -2500,11 +2500,11 @@@
                 goto rearm_timer;
   
         if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) {
-               if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss,
+               if (unlikely(tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
+                                         (pcount - 1) * mss, mss,
                                           GFP_ATOMIC)))
                         goto rearm_timer;
-               skb = tcp_write_queue_next(sk, skb);
+               skb = skb_rb_next(skb);
         }
   
         if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
@@@ -2651,7 -2704,7 +2704,7 @@@ void tcp_skb_collapse_tstamp(struct sk_
   static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
   {
         struct tcp_sock *tp = tcp_sk(sk);
-       struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
+       struct sk_buff *next_skb = skb_rb_next(skb);
         int skb_size, next_skb_size;
   
         skb_size = skb->len;
@@@ -2668,8 -2721,6 +2721,6 @@@
         }
         tcp_highest_sack_replace(sk, next_skb, skb);
   
-       tcp_unlink_write_queue(next_skb, sk);
- 
         if (next_skb->ip_summed == CHECKSUM_PARTIAL)
                 skb->ip_summed = CHECKSUM_PARTIAL;
   
@@@ -2697,7 -2748,7 +2748,7 @@@
   
         tcp_skb_collapse_tstamp(skb, next_skb);
   
-       sk_wmem_free_skb(sk, next_skb);
+       tcp_rtx_queue_unlink_and_free(next_skb, sk);
         return true;
   }
   
@@@ -2708,8 -2759,6 +2759,6 @@@ static bool tcp_can_collapse(const stru
                 return false;
         if (skb_cloned(skb))
                 return false;
-       if (skb == tcp_send_head(sk))
-               return false;
         /* Some heuristics for collapsing over SACK'd could be invented */
         if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
                 return false;
@@@ -2727,12 -2776,12 +2776,12 @@@ static void tcp_retrans_try_collapse(st
         struct sk_buff *skb = to, *tmp;
         bool first = true;
   
-       if (!sysctl_tcp_retrans_collapse)
+       if (!sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse)
                 return;
         if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
                 return;
   
-       tcp_for_write_queue_from_safe(skb, tmp, sk) {
+       skb_rbtree_walk_from_safe(skb, tmp) {
                 if (!tcp_can_collapse(sk, skb))
                         break;
   
@@@ -2807,7 -2856,8 +2856,8 @@@ int __tcp_retransmit_skb(struct sock *s
   
         len = cur_mss * segs;
         if (skb->len > len) {
-               if (tcp_fragment(sk, skb, len, cur_mss, GFP_ATOMIC))
+               if (tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb, len,
+                                cur_mss, GFP_ATOMIC))
                         return -ENOMEM; /* We'll try again later. */
         } else {
                 if (skb_unclone(skb, GFP_ATOMIC))
@@@ -2841,11 -2891,14 +2891,14 @@@
                      skb_headroom(skb) >= 0xFFFF)) {
                 struct sk_buff *nskb;
   
-               nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC);
-               err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
-                            -ENOBUFS;
+               tcp_skb_tsorted_save(skb) {
+                       nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC);
+                       err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
+                                    -ENOBUFS;
+               } tcp_skb_tsorted_restore(skb);
+ 
                 if (!err) {
-                       skb->skb_mstamp = tp->tcp_mstamp;
+                       tcp_update_skb_after_send(tp, skb);
                         tcp_rate_skb_sent(sk, skb);
                 }
         } else {
@@@ -2854,6 -2907,7 +2907,7 @@@
   
         if (likely(!err)) {
                 TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
+               trace_tcp_retransmit_skb(sk, skb);
         } else if (err != -EBUSY) {
                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
         }
@@@ -2890,36 -2944,25 +2944,25 @@@ int tcp_retransmit_skb(struct sock *sk
    * retransmitted data is acknowledged.  It tries to continue
    * resending the rest of the retransmit queue, until either
    * we've sent it all or the congestion window limit is reached.
-  * If doing SACK, the first ACK which comes back for a timeout
-  * based retransmit packet might feed us FACK information again.
-  * If so, we use it to avoid unnecessarily retransmissions.
    */
   void tcp_xmit_retransmit_queue(struct sock *sk)
   {
         const struct inet_connection_sock *icsk = inet_csk(sk);
+       struct sk_buff *skb, *rtx_head, *hole = NULL;
         struct tcp_sock *tp = tcp_sk(sk);
-       struct sk_buff *skb;
-       struct sk_buff *hole = NULL;
         u32 max_segs;
         int mib_idx;
   
         if (!tp->packets_out)
                 return;
   
-       if (tp->retransmit_skb_hint) {
-               skb = tp->retransmit_skb_hint;
-       } else {
-               skb = tcp_write_queue_head(sk);
-       }
- 
+       rtx_head = tcp_rtx_queue_head(sk);
+       skb = tp->retransmit_skb_hint ?: rtx_head;
         max_segs = tcp_tso_segs(sk, tcp_current_mss(sk));
-       tcp_for_write_queue_from(skb, sk) {
+       skb_rbtree_walk_from(skb) {
                 __u8 sacked;
                 int segs;
   
-               if (skb == tcp_send_head(sk))
-                       break;
- 
                 if (tcp_pacing_check(sk))
                         break;
   
@@@ -2964,7 -3007,7 +3007,7 @@@
                 if (tcp_in_cwnd_reduction(sk))
                         tp->prr_out += tcp_skb_pcount(skb);
   
-               if (skb == tcp_write_queue_head(sk) &&
+               if (skb == rtx_head &&
                     icsk->icsk_pending != ICSK_TIME_REO_TIMEOUT)
                         inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
                                                   inet_csk(sk)->icsk_rto,
@@@ -3006,12 -3049,15 +3049,15 @@@ void tcp_send_fin(struct sock *sk
          * Note: in the latter case, FIN packet will be sent after a timeout,
          * as TCP stack thinks it has already been transmitted.
          */
-       if (tskb && (tcp_send_head(sk) || tcp_under_memory_pressure(sk))) {
+       if (!tskb && tcp_under_memory_pressure(sk))
+               tskb = skb_rb_last(&sk->tcp_rtx_queue);
+ 
+       if (tskb) {
   coalesce:
                 TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN;
                 TCP_SKB_CB(tskb)->end_seq++;
                 tp->write_seq++;
-               if (!tcp_send_head(sk)) {
+               if (tcp_write_queue_empty(sk)) {
                         /* This means tskb was already sent.
                          * Pretend we included the FIN on previous transmit.
                          * We need to set tp->snd_nxt to the value it would have
@@@ -3028,6 -3074,7 +3074,7 @@@
                                 goto coalesce;
                         return;
                 }
+               INIT_LIST_HEAD(&skb->tcp_tsorted_anchor);
                 skb_reserve(skb, MAX_TCP_HEADER);
                 sk_forced_mem_schedule(sk, skb->truesize);
                 /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
@@@ -3064,6 -3111,11 +3111,11 @@@ void tcp_send_active_reset(struct sock 
         /* Send it off. */
         if (tcp_transmit_skb(sk, skb, 0, priority))
                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
+ 
+       /* skb of trace_tcp_send_reset() keeps the skb that caused RST,
+        * skb here is different to the troublesome skb, so use NULL
+        */
+       trace_tcp_send_reset(sk, NULL);
   }
   
   /* Send a crossed SYN-ACK during socket establishment.
@@@ -3076,20 -3128,24 +3128,24 @@@ int tcp_send_synack(struct sock *sk
   {
         struct sk_buff *skb;
   
-       skb = tcp_write_queue_head(sk);
+       skb = tcp_rtx_queue_head(sk);
         if (!skb || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
-               pr_debug("%s: wrong queue state\n", __func__);
+               pr_err("%s: wrong queue state\n", __func__);
                 return -EFAULT;
         }
         if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) {
                 if (skb_cloned(skb)) {
-                       struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
+                       struct sk_buff *nskb;
+ 
+                       tcp_skb_tsorted_save(skb) {
+                               nskb = skb_copy(skb, GFP_ATOMIC);
+                       } tcp_skb_tsorted_restore(skb);
                         if (!nskb)
                                 return -ENOMEM;
-                       tcp_unlink_write_queue(skb, sk);
+                       INIT_LIST_HEAD(&nskb->tcp_tsorted_anchor);
+                       tcp_rtx_queue_unlink_and_free(skb, sk);
                         __skb_header_release(nskb);
-                       __tcp_add_write_queue_head(sk, nskb);
-                       sk_wmem_free_skb(sk, skb);
+                       tcp_rbtree_insert(&sk->tcp_rtx_queue, nskb);
                         sk->sk_wmem_queued += nskb->truesize;
                         sk_mem_charge(sk, nskb->truesize);
                         skb = nskb;
@@@ -3166,8 -3222,8 +3222,8 @@@ struct sk_buff *tcp_make_synack(const s
         md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req));
   #endif
         skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4);
-       tcp_header_size = tcp_synack_options(req, mss, skb, &opts, md5, foc) +
-                         sizeof(*th);
+       tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5,
+                                            foc) + sizeof(*th);
   
         skb_push(skb, tcp_header_size);
         skb_reset_transport_header(skb);
@@@ -3268,7 -3324,7 +3324,7 @@@ static void tcp_connect_init(struct soc
         if (rcv_wnd == 0)
                 rcv_wnd = dst_metric(dst, RTAX_INITRWND);
   
-       tcp_select_initial_window(tcp_full_space(sk),
+       tcp_select_initial_window(sk, tcp_full_space(sk),
                                   tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
                                   &tp->rcv_wnd,
                                   &tp->window_clamp,
@@@ -3307,7 -3363,6 +3363,6 @@@ static void tcp_connect_queue_skb(struc
   
         tcb->end_seq += skb->len;
         __skb_header_release(skb);
-       __tcp_add_write_queue_tail(sk, skb);
         sk->sk_wmem_queued += skb->truesize;
         sk_mem_charge(sk, skb->truesize);
         tp->write_seq = tcb->end_seq;
@@@ -3355,6 -3410,7 +3410,7 @@@ static int tcp_send_syn_data(struct soc
                 int copied = copy_from_iter(skb_put(syn_data, space), space,
                                             &fo->data->msg_iter);
                 if (unlikely(!copied)) {
+                       tcp_skb_tsorted_anchor_cleanup(syn_data);
                         kfree_skb(syn_data);
                         goto fallback;
                 }
@@@ -3385,12 -3441,13 +3441,13 @@@
         TCP_SKB_CB(syn_data)->tcp_flags = TCPHDR_ACK | TCPHDR_PSH;
         if (!err) {
                 tp->syn_data = (fo->copied > 0);
+               tcp_rbtree_insert(&sk->tcp_rtx_queue, syn_data);
                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT);
                 goto done;
         }
   
-       /* data was not sent, this is our new send_head */
-       sk->sk_send_head = syn_data;
+       /* data was not sent, put it in write_queue */
+       __skb_queue_tail(&sk->sk_write_queue, syn_data);
         tp->packets_out -= tcp_skb_pcount(syn_data);
   
   fallback:
@@@ -3433,6 -3490,7 +3490,7 @@@ int tcp_connect(struct sock *sk
         tp->retrans_stamp = tcp_time_stamp(tp);
         tcp_connect_queue_skb(sk, buff);
         tcp_ecn_send_syn(sk, buff);
+       tcp_rbtree_insert(&sk->tcp_rtx_queue, buff);
   
         /* Send off SYN; include data in Fast Open. */
         err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) :
@@@ -3627,7 -3685,8 +3685,8 @@@ int tcp_write_wakeup(struct sock *sk, i
                     skb->len > mss) {
                         seg_size = min(seg_size, mss);
                         TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
-                       if (tcp_fragment(sk, skb, seg_size, mss, GFP_ATOMIC))
+                       if (tcp_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE,
+                                        skb, seg_size, mss, GFP_ATOMIC))
                                 return -1;
                 } else if (!tcp_skb_pcount(skb))
                         tcp_set_skb_tso_segs(skb, mss);
@@@ -3657,7 -3716,7 +3716,7 @@@ void tcp_send_probe0(struct sock *sk
   
         err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE);
   
-       if (tp->packets_out || !tcp_send_head(sk)) {
+       if (tp->packets_out || tcp_write_queue_empty(sk)) {
                 /* Cancel probe timer, if it is not required. */
                 icsk->icsk_probes_out = 0;
                 icsk->icsk_backoff = 0;
@@@ -3698,6 -3757,7 +3757,7 @@@ int tcp_rtx_synack(const struct sock *s
                 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
                 if (unlikely(tcp_passive_fastopen(sk)))
                         tcp_sk(sk)->total_retrans++;
+               trace_tcp_retransmit_synack(sk, req);
         }
         return res;
   }
diff --combined net/ipv4/udp.c

index 02ec9a3493033cf044b31724c340ce0cfa9add20,a6699af0553968e41d69b4201459e696b050badc..e4ff25c947c5e5b21ac4986d0327339f4f60d321
--- 1/net/ipv4/udp.c
--- 2/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@@ -1209,8 -1209,7 +1209,7 @@@ static void udp_rmem_release(struct soc
         if (likely(partial)) {
                 up->forward_deficit += size;
                 size = up->forward_deficit;
-               if (size < (sk->sk_rcvbuf >> 2) &&
-                   !skb_queue_empty(&up->reader_queue))
+               if (size < (sk->sk_rcvbuf >> 2))
                         return;
         } else {
                 size += up->forward_deficit;
@@@ -1853,7 -1852,7 +1852,7 @@@ static int udp_queue_rcv_skb(struct soc
                  */
   
                 /* if we're overly short, let UDP handle it */
- -              encap_rcv = ACCESS_ONCE(up->encap_rcv);
+ +              encap_rcv = READ_ONCE(up->encap_rcv);
                 if (encap_rcv) {
                         int ret;
   
@@@ -2298,7 -2297,7 +2297,7 @@@ void udp_destroy_sock(struct sock *sk
         unlock_sock_fast(sk, slow);
         if (static_key_false(&udp_encap_needed) && up->encap_type) {
                 void (*encap_destroy)(struct sock *sk);
- -              encap_destroy = ACCESS_ONCE(up->encap_destroy);
+ +              encap_destroy = READ_ONCE(up->encap_destroy);
                 if (encap_destroy)
                         encap_destroy(sk);
         }
diff --combined net/ipv6/ah6.c

index 3bd9d806b506f603fcf546d054b6316b6c3ac35c,37bb33fbc742542dd9b99b7189188cfa7bf12048..78c974391567683ef08799d5c99dc217ff2789f9
--- 1/net/ipv6/ah6.c
--- 2/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@@ -271,6 -271,7 +271,7 @@@ static int ipv6_clear_mutable_options(s
                 case NEXTHDR_DEST:
                         if (dir == XFRM_POLICY_OUT)
                                 ipv6_rearrange_destopt(iph, exthdr.opth);
+                       /* fall through */
                 case NEXTHDR_HOP:
                         if (!zero_out_mutable_opts(exthdr.opth)) {
                                 net_dbg_ratelimited("overrun %sopts\n",
@@@ -443,7 -444,7 +444,7 @@@ static int ah6_output(struct xfrm_stat
                 if (err == -EINPROGRESS)
                         goto out;
   
- -              if (err == -EBUSY)
+ +              if (err == -ENOSPC)
                         err = NET_XMIT_DROP;
                 goto out_free;
         }
diff --combined net/ipv6/esp6.c

index c04d995df37c36f5ca2432de7392c66c52360879,4000b71bfdc5757c554e8964722417cad1a729e1..a902ff8f59be3ed7e1f28afc234a0e56eca4e684
--- 1/net/ipv6/esp6.c
--- 2/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@@ -396,7 -396,7 +396,7 @@@ int esp6_output_tail(struct xfrm_state 
         case -EINPROGRESS:
                 goto error;
   
- -      case -EBUSY:
+ +      case -ENOSPC:
                 err = NET_XMIT_DROP;
                 break;
   
@@@ -483,8 -483,8 +483,8 @@@ static inline int esp_remove_trailer(st
                 goto out;
         }
   
-       if (skb_copy_bits(skb, skb->len - alen - 2, nexthdr, 2))
-               BUG();
+       ret = skb_copy_bits(skb, skb->len - alen - 2, nexthdr, 2);
+       BUG_ON(ret);
   
         ret = -EINVAL;
         padlen = nexthdr[0];
@@@ -559,14 -559,14 +559,14 @@@ static void esp_input_restore_header(st
   static void esp_input_set_header(struct sk_buff *skb, __be32 *seqhi)
   {
         struct xfrm_state *x = xfrm_input_state(skb);
-       struct ip_esp_hdr *esph = (struct ip_esp_hdr *)skb->data;
   
         /* For ESN we move the header forward by 4 bytes to
          * accomodate the high bits.  We will move it back after
          * decryption.
          */
         if ((x->props.flags & XFRM_STATE_ESN)) {
-               esph = skb_push(skb, 4);
+               struct ip_esp_hdr *esph = skb_push(skb, 4);
+ 
                 *seqhi = esph->spi;
                 esph->spi = esph->seq_no;
                 esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi;
diff --combined net/ipv6/ip6_tunnel.c

index dab94655415741873e869176d2f06520085d8729,00882fdb12239fe719e1f9669512a9ce3edc0d90..3d3092adf1d2d5962b5fc87bdf08419762d1b1ee
--- 1/net/ipv6/ip6_tunnel.c
--- 2/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@@ -471,15 -471,16 +471,16 @@@ static in
   ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
             u8 *type, u8 *code, int *msg, __u32 *info, int offset)
   {
-       const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) skb->data;
-       struct ip6_tnl *t;
-       int rel_msg = 0;
+       const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb->data;
+       struct net *net = dev_net(skb->dev);
         u8 rel_type = ICMPV6_DEST_UNREACH;
         u8 rel_code = ICMPV6_ADDR_UNREACH;
-       u8 tproto;
         __u32 rel_info = 0;
-       __u16 len;
+       struct ip6_tnl *t;
         int err = -ENOENT;
+       int rel_msg = 0;
+       u8 tproto;
+       __u16 len;
   
         /* If the packet doesn't contain the original IPv6 header we are
            in trouble since we might need the source address for further
@@@ -490,16 -491,15 +491,15 @@@
         if (!t)
                 goto out;
   
- -      tproto = ACCESS_ONCE(t->parms.proto);
+ +      tproto = READ_ONCE(t->parms.proto);
         if (tproto != ipproto && tproto != 0)
                 goto out;
   
         err = 0;
   
         switch (*type) {
-               __u32 teli;
                 struct ipv6_tlv_tnl_enc_lim *tel;
-               __u32 mtu;
+               __u32 mtu, teli;
         case ICMPV6_DEST_UNREACH:
                 net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n",
                                     t->parms.name);
@@@ -530,11 -530,11 +530,11 @@@
                 }
                 break;
         case ICMPV6_PKT_TOOBIG:
+               ip6_update_pmtu(skb, net, htonl(*info), 0, 0,
+                               sock_net_uid(net, NULL));
                 mtu = *info - offset;
                 if (mtu < IPV6_MIN_MTU)
                         mtu = IPV6_MIN_MTU;
-               t->dev->mtu = mtu;
- 
                 len = sizeof(*ipv6h) + ntohs(ipv6h->payload_len);
                 if (len > mtu) {
                         rel_type = ICMPV6_PKT_TOOBIG;
@@@ -543,6 -543,10 +543,10 @@@
                         rel_msg = 1;
                 }
                 break;
+       case NDISC_REDIRECT:
+               ip6_redirect(skb, net, skb->dev->ifindex, 0,
+                            sock_net_uid(net, NULL));
+               break;
         }
   
         *type = rel_type;
@@@ -559,13 -563,12 +563,12 @@@ static in
   ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
            u8 type, u8 code, int offset, __be32 info)
   {
-       int rel_msg = 0;
-       u8 rel_type = type;
-       u8 rel_code = code;
         __u32 rel_info = ntohl(info);
-       int err;
-       struct sk_buff *skb2;
         const struct iphdr *eiph;
+       struct sk_buff *skb2;
+       int err, rel_msg = 0;
+       u8 rel_type = type;
+       u8 rel_code = code;
         struct rtable *rt;
         struct flowi4 fl4;
   
@@@ -590,9 -593,6 +593,6 @@@
                 rel_type = ICMP_DEST_UNREACH;
                 rel_code = ICMP_FRAG_NEEDED;
                 break;
-       case NDISC_REDIRECT:
-               rel_type = ICMP_REDIRECT;
-               rel_code = ICMP_REDIR_HOST;
         default:
                 return 0;
         }
@@@ -611,33 -611,26 +611,26 @@@
         eiph = ip_hdr(skb2);
   
         /* Try to guess incoming interface */
-       rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
-                                  eiph->saddr, 0,
-                                  0, 0,
-                                  IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
+       rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, eiph->saddr,
+                                  0, 0, 0, IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
         if (IS_ERR(rt))
                 goto out;
   
         skb2->dev = rt->dst.dev;
+       ip_rt_put(rt);
   
         /* route "incoming" packet */
         if (rt->rt_flags & RTCF_LOCAL) {
-               ip_rt_put(rt);
-               rt = NULL;
                 rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
-                                          eiph->daddr, eiph->saddr,
-                                          0, 0,
-                                          IPPROTO_IPIP,
-                                          RT_TOS(eiph->tos), 0);
-               if (IS_ERR(rt) ||
-                   rt->dst.dev->type != ARPHRD_TUNNEL) {
+                                          eiph->daddr, eiph->saddr, 0, 0,
+                                          IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
+               if (IS_ERR(rt) || rt->dst.dev->type != ARPHRD_TUNNEL) {
                         if (!IS_ERR(rt))
                                 ip_rt_put(rt);
                         goto out;
                 }
                 skb_dst_set(skb2, &rt->dst);
         } else {
-               ip_rt_put(rt);
                 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos,
                                    skb2->dev) ||
                     skb_dst(skb2)->dev->type != ARPHRD_TUNNEL)
@@@ -649,10 -642,9 +642,9 @@@
                 if (rel_info > dst_mtu(skb_dst(skb2)))
                         goto out;
   
-               skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2, rel_info);
+               skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2,
+                                               rel_info);
         }
-       if (rel_type == ICMP_REDIRECT)
-               skb_dst(skb2)->ops->redirect(skb_dst(skb2), NULL, skb2);
   
         icmp_send(skb2, rel_type, rel_code, htonl(rel_info));
   
@@@ -665,11 -657,10 +657,10 @@@ static in
   ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
            u8 type, u8 code, int offset, __be32 info)
   {
-       int rel_msg = 0;
+       __u32 rel_info = ntohl(info);
+       int err, rel_msg = 0;
         u8 rel_type = type;
         u8 rel_code = code;
-       __u32 rel_info = ntohl(info);
-       int err;
   
         err = ip6_tnl_err(skb, IPPROTO_IPV6, opt, &rel_type, &rel_code,
                           &rel_msg, &rel_info, offset);
@@@ -769,7 -760,8 +760,8 @@@ int ip6_tnl_rcv_ctl(struct ip6_tnl *t
   
                 if ((ipv6_addr_is_multicast(laddr) ||
                      likely(ipv6_chk_addr(net, laddr, ldev, 0))) &&
-                   likely(!ipv6_chk_addr(net, raddr, NULL, 0)))
+                   ((p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) ||
+                    likely(!ipv6_chk_addr(net, raddr, NULL, 0))))
                         ret = 1;
         }
         return ret;
@@@ -899,7 -891,7 +891,7 @@@ static int ipxip6_rcv(struct sk_buff *s
         t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr);
   
         if (t) {
- -              u8 tproto = ACCESS_ONCE(t->parms.proto);
+ +              u8 tproto = READ_ONCE(t->parms.proto);
   
                 if (tproto != ipproto && tproto != 0)
                         goto drop;
@@@ -999,7 -991,8 +991,8 @@@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t
                 if (unlikely(!ipv6_chk_addr(net, laddr, ldev, 0)))
                         pr_warn("%s xmit: Local address not yet configured!\n",
                                 p->name);
-               else if (!ipv6_addr_is_multicast(raddr) &&
+               else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) &&
+                        !ipv6_addr_is_multicast(raddr) &&
                          unlikely(ipv6_chk_addr(net, raddr, NULL, 0)))
                         pr_warn("%s xmit: Routing loop! Remote address found on this node!\n",
                                 p->name);
@@@ -1233,7 -1226,7 +1226,7 @@@ ip4ip6_tnl_xmit(struct sk_buff *skb, st
   
         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
   
- -      tproto = ACCESS_ONCE(t->parms.proto);
+ +      tproto = READ_ONCE(t->parms.proto);
         if (tproto != IPPROTO_IPIP && tproto != 0)
                 return -1;
   
@@@ -1303,7 -1296,7 +1296,7 @@@ ip6ip6_tnl_xmit(struct sk_buff *skb, st
         u8 tproto;
         int err;
   
- -      tproto = ACCESS_ONCE(t->parms.proto);
+ +      tproto = READ_ONCE(t->parms.proto);
         if ((tproto != IPPROTO_IPV6 && tproto != 0) ||
             ip6_tnl_addr_conflict(t, ipv6h))
                 return -1;
@@@ -2168,17 -2161,16 +2161,16 @@@ static struct xfrm6_tunnel ip6ip6_handl
         .priority       =       1,
   };
   
- static void __net_exit ip6_tnl_destroy_tunnels(struct net *net)
+ static void __net_exit ip6_tnl_destroy_tunnels(struct net *net, struct list_head *list)
   {
         struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
         struct net_device *dev, *aux;
         int h;
         struct ip6_tnl *t;
-       LIST_HEAD(list);
   
         for_each_netdev_safe(net, dev, aux)
                 if (dev->rtnl_link_ops == &ip6_link_ops)
-                       unregister_netdevice_queue(dev, &list);
+                       unregister_netdevice_queue(dev, list);
   
         for (h = 0; h < IP6_TUNNEL_HASH_SIZE; h++) {
                 t = rtnl_dereference(ip6n->tnls_r_l[h]);
@@@ -2187,12 -2179,10 +2179,10 @@@
                          * been added to the list by the previous loop.
                          */
                         if (!net_eq(dev_net(t->dev), net))
-                               unregister_netdevice_queue(t->dev, &list);
+                               unregister_netdevice_queue(t->dev, list);
                         t = rtnl_dereference(t->next);
                 }
         }
- 
-       unregister_netdevice_many(&list);
   }
   
   static int __net_init ip6_tnl_init_net(struct net *net)
@@@ -2236,16 -2226,21 +2226,21 @@@ err_alloc_dev
         return err;
   }
   
- static void __net_exit ip6_tnl_exit_net(struct net *net)
+ static void __net_exit ip6_tnl_exit_batch_net(struct list_head *net_list)
   {
+       struct net *net;
+       LIST_HEAD(list);
+ 
         rtnl_lock();
-       ip6_tnl_destroy_tunnels(net);
+       list_for_each_entry(net, net_list, exit_list)
+               ip6_tnl_destroy_tunnels(net, &list);
+       unregister_netdevice_many(&list);
         rtnl_unlock();
   }
   
   static struct pernet_operations ip6_tnl_net_ops = {
         .init = ip6_tnl_init_net,
-       .exit = ip6_tnl_exit_net,
+       .exit_batch = ip6_tnl_exit_batch_net,
         .id   = &ip6_tnl_net_id,
         .size = sizeof(struct ip6_tnl_net),
   };
diff --combined net/mac80211/sta_info.c

index 214d2ba02877d2fcb45980786528a4e650c9644d,9673e157bf8fd5be8f85277dfd41c0b46d634e17..a3060e55122c666eb3eedb6c8c93714e0783cab8
--- 1/net/mac80211/sta_info.c
--- 2/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@@ -329,10 -329,12 +329,12 @@@ struct sta_info *sta_info_alloc(struct 
                 sta->mesh = kzalloc(sizeof(*sta->mesh), gfp);
                 if (!sta->mesh)
                         goto free;
+               sta->mesh->plink_sta = sta;
                 spin_lock_init(&sta->mesh->plink_lock);
                 if (ieee80211_vif_is_mesh(&sdata->vif) &&
                     !sdata->u.mesh.user_mpm)
-                       init_timer(&sta->mesh->plink_timer);
+                       timer_setup(&sta->mesh->plink_timer, mesh_plink_timer,
+                                   0);
                 sta->mesh->nonpeer_pm = NL80211_MESH_POWER_ACTIVE;
         }
   #endif
@@@ -515,6 -517,31 +517,31 @@@ static int sta_info_insert_drv_state(st
         return err;
   }
   
+ static void
+ ieee80211_recalc_p2p_go_ps_allowed(struct ieee80211_sub_if_data *sdata)
+ {
+       struct ieee80211_local *local = sdata->local;
+       bool allow_p2p_go_ps = sdata->vif.p2p;
+       struct sta_info *sta;
+ 
+       rcu_read_lock();
+       list_for_each_entry_rcu(sta, &local->sta_list, list) {
+               if (sdata != sta->sdata ||
+                   !test_sta_flag(sta, WLAN_STA_ASSOC))
+                       continue;
+               if (!sta->sta.support_p2p_ps) {
+                       allow_p2p_go_ps = false;
+                       break;
+               }
+       }
+       rcu_read_unlock();
+ 
+       if (allow_p2p_go_ps != sdata->vif.bss_conf.allow_p2p_go_ps) {
+               sdata->vif.bss_conf.allow_p2p_go_ps = allow_p2p_go_ps;
+               ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_P2P_PS);
+       }
+ }
+ 
   /*
    * should be called with sta_mtx locked
    * this function replaces the mutex lock
@@@ -561,6 -588,13 +588,13 @@@ static int sta_info_insert_finish(struc
                 goto out_remove;
   
         set_sta_flag(sta, WLAN_STA_INSERTED);
+ 
+       if (sta->sta_state >= IEEE80211_STA_ASSOC) {
+               ieee80211_recalc_min_chandef(sta->sdata);
+               if (!sta->sta.support_p2p_ps)
+                       ieee80211_recalc_p2p_go_ps_allowed(sta->sdata);
+       }
+ 
         /* accept BA sessions now */
         clear_sta_flag(sta, WLAN_STA_BLOCK_BA);
   
@@@ -1788,31 -1822,6 +1822,6 @@@ void ieee80211_sta_set_buffered(struct 
   }
   EXPORT_SYMBOL(ieee80211_sta_set_buffered);
   
- static void
- ieee80211_recalc_p2p_go_ps_allowed(struct ieee80211_sub_if_data *sdata)
- {
-       struct ieee80211_local *local = sdata->local;
-       bool allow_p2p_go_ps = sdata->vif.p2p;
-       struct sta_info *sta;
- 
-       rcu_read_lock();
-       list_for_each_entry_rcu(sta, &local->sta_list, list) {
-               if (sdata != sta->sdata ||
-                   !test_sta_flag(sta, WLAN_STA_ASSOC))
-                       continue;
-               if (!sta->sta.support_p2p_ps) {
-                       allow_p2p_go_ps = false;
-                       break;
-               }
-       }
-       rcu_read_unlock();
- 
-       if (allow_p2p_go_ps != sdata->vif.bss_conf.allow_p2p_go_ps) {
-               sdata->vif.bss_conf.allow_p2p_go_ps = allow_p2p_go_ps;
-               ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_P2P_PS);
-       }
- }
- 
   int sta_info_move_state(struct sta_info *sta,
                         enum ieee80211_sta_state new_state)
   {
@@@ -2008,7 -2017,7 +2017,7 @@@ static void sta_stats_decode_rate(struc
   
   static int sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo)
   {
- -      u16 rate = ACCESS_ONCE(sta_get_last_rx_stats(sta)->last_rate);
+ +      u16 rate = READ_ONCE(sta_get_last_rx_stats(sta)->last_rate);
   
         if (rate == STA_STATS_RATE_INVALID)
                 return -EINVAL;
diff --combined net/netfilter/ipvs/ip_vs_conn.c

index 3a43b3470331bccc4b83a72e593ae3a3ac0c9f18,f73561ca982d01750a88b8b82dc4c732de835d92..3e053cb300709cfb09b93364b33da69c5f9dff2a
--- 1/net/netfilter/ipvs/ip_vs_conn.c
--- 2/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@@ -104,7 -104,7 +104,7 @@@ static inline void ct_write_unlock_bh(u
         spin_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
   }
   
- -static void ip_vs_conn_expire(unsigned long data);
+ +static void ip_vs_conn_expire(struct timer_list *t);
   
   /*
    *    Returns hash value for IPVS connection entry
@@@ -185,7 -185,7 +185,7 @@@ static inline int ip_vs_conn_hash(struc
                 hlist_add_head_rcu(&cp->c_list, &ip_vs_conn_tab[hash]);
                 ret = 1;
         } else {
-               pr_err("%s(): request for already hashed, called from %pF\n",
+               pr_err("%s(): request for already hashed, called from %pS\n",
                        __func__, __builtin_return_address(0));
                 ret = 0;
         }
@@@ -457,7 -457,7 +457,7 @@@ EXPORT_SYMBOL_GPL(ip_vs_conn_out_get_pr
   static void __ip_vs_conn_put_notimer(struct ip_vs_conn *cp)
   {
         __ip_vs_conn_put(cp);
- -      ip_vs_conn_expire((unsigned long)cp);
+ +      ip_vs_conn_expire(&cp->timer);
   }
   
   /*
@@@ -817,9 -817,9 +817,9 @@@ static void ip_vs_conn_rcu_free(struct 
         kmem_cache_free(ip_vs_conn_cachep, cp);
   }
   
- -static void ip_vs_conn_expire(unsigned long data)
+ +static void ip_vs_conn_expire(struct timer_list *t)
   {
- -      struct ip_vs_conn *cp = (struct ip_vs_conn *)data;
+ +      struct ip_vs_conn *cp = from_timer(cp, t, timer);
         struct netns_ipvs *ipvs = cp->ipvs;
   
         /*
@@@ -909,7 -909,7 +909,7 @@@ ip_vs_conn_new(const struct ip_vs_conn_
         }
   
         INIT_HLIST_NODE(&cp->c_list);
- -      setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
+ +      timer_setup(&cp->timer, ip_vs_conn_expire, 0);
         cp->ipvs           = ipvs;
         cp->af             = p->af;
         cp->daf            = dest_af;
diff --combined net/netfilter/ipvs/ip_vs_ctl.c

index b47e266c6eca88d98df1e67efb28e1923f9497a5,fac8c802b4eaf0605403c1ebf48ac82ef41775c6..fff213eacf2aeda24f15d07eac1b1d4f64df1e34
--- 1/net/netfilter/ipvs/ip_vs_ctl.c
--- 2/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@@ -300,7 -300,7 +300,7 @@@ static int ip_vs_svc_hash(struct ip_vs_
         unsigned int hash;
   
         if (svc->flags & IP_VS_SVC_F_HASHED) {
-               pr_err("%s(): request for already hashed, called from %pF\n",
+               pr_err("%s(): request for already hashed, called from %pS\n",
                        __func__, __builtin_return_address(0));
                 return 0;
         }
@@@ -334,7 -334,7 +334,7 @@@
   static int ip_vs_svc_unhash(struct ip_vs_service *svc)
   {
         if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
-               pr_err("%s(): request for unhash flagged, called from %pF\n",
+               pr_err("%s(): request for unhash flagged, called from %pS\n",
                        __func__, __builtin_return_address(0));
                 return 0;
         }
@@@ -1146,9 -1146,9 +1146,9 @@@ ip_vs_del_dest(struct ip_vs_service *sv
         return 0;
   }
   
- -static void ip_vs_dest_trash_expire(unsigned long data)
+ +static void ip_vs_dest_trash_expire(struct timer_list *t)
   {
- -      struct netns_ipvs *ipvs = (struct netns_ipvs *)data;
+ +      struct netns_ipvs *ipvs = from_timer(ipvs, t, dest_trash_timer);
         struct ip_vs_dest *dest, *next;
         unsigned long now = jiffies;
   
@@@ -2034,12 -2034,16 +2034,16 @@@ static int ip_vs_info_seq_show(struct s
                 seq_puts(seq,
                          "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
         } else {
+               struct net *net = seq_file_net(seq);
+               struct netns_ipvs *ipvs = net_ipvs(net);
                 const struct ip_vs_service *svc = v;
                 const struct ip_vs_iter *iter = seq->private;
                 const struct ip_vs_dest *dest;
                 struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler);
                 char *sched_name = sched ? sched->name : "none";
   
+               if (svc->ipvs != ipvs)
+                       return 0;
                 if (iter->table == ip_vs_svc_table) {
   #ifdef CONFIG_IP_VS_IPV6
                         if (svc->af == AF_INET6)
@@@ -4019,7 -4023,8 +4023,7 @@@ int __net_init ip_vs_control_net_init(s
   
         INIT_LIST_HEAD(&ipvs->dest_trash);
         spin_lock_init(&ipvs->dest_trash_lock);
- -      setup_timer(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire,
- -                  (unsigned long) ipvs);
+ +      timer_setup(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire, 0);
         atomic_set(&ipvs->ftpsvc_counter, 0);
         atomic_set(&ipvs->nullsvc_counter, 0);
         atomic_set(&ipvs->conn_out_counter, 0);
diff --combined net/wireless/nl80211.c

index eb866647a27ac3847f2a6eca3709945808dd70e6,fce2cbe6a19390f7059137ba140296c6d4108d21..bb16f1ec766ead1e65fb6e4196a4278ff09a67a3
--- 1/net/wireless/nl80211.c
--- 2/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@@ -2130,6 -2130,15 +2130,15 @@@ static int nl80211_parse_chandef(struc
                 case NL80211_CHAN_HT40MINUS:
                         cfg80211_chandef_create(chandef, chandef->chan,
                                                 chantype);
+                       /* user input for center_freq is incorrect */
+                       if (info->attrs[NL80211_ATTR_CENTER_FREQ1] &&
+                           chandef->center_freq1 != nla_get_u32(
+                                       info->attrs[NL80211_ATTR_CENTER_FREQ1]))
+                               return -EINVAL;
+                       /* center_freq2 must be zero */
+                       if (info->attrs[NL80211_ATTR_CENTER_FREQ2] &&
+                           nla_get_u32(info->attrs[NL80211_ATTR_CENTER_FREQ2]))
+                               return -EINVAL;
                         break;
                 default:
                         return -EINVAL;
@@@ -5677,6 -5686,11 +5686,11 @@@ static int nl80211_req_set_reg(struct s
         }
   }
   
+ static int nl80211_reload_regdb(struct sk_buff *skb, struct genl_info *info)
+ {
+       return reg_reload_regdb();
+ }
+ 
   static int nl80211_get_mesh_config(struct sk_buff *skb,
                                    struct genl_info *info)
   {
@@@ -6618,6 -6632,77 +6632,77 @@@ static bool cfg80211_off_channel_oper_a
         return regulatory_pre_cac_allowed(wdev->wiphy);
   }
   
+ static int
+ nl80211_check_scan_flags(struct wiphy *wiphy, struct wireless_dev *wdev,
+                        void *request, struct nlattr **attrs,
+                        bool is_sched_scan)
+ {
+       u8 *mac_addr, *mac_addr_mask;
+       u32 *flags;
+       enum nl80211_feature_flags randomness_flag;
+ 
+       if (!attrs[NL80211_ATTR_SCAN_FLAGS])
+               return 0;
+ 
+       if (is_sched_scan) {
+               struct cfg80211_sched_scan_request *req = request;
+ 
+               randomness_flag = wdev ?
+                                 NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR :
+                                 NL80211_FEATURE_ND_RANDOM_MAC_ADDR;
+               flags = &req->flags;
+               mac_addr = req->mac_addr;
+               mac_addr_mask = req->mac_addr_mask;
+       } else {
+               struct cfg80211_scan_request *req = request;
+ 
+               randomness_flag = NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR;
+               flags = &req->flags;
+               mac_addr = req->mac_addr;
+               mac_addr_mask = req->mac_addr_mask;
+       }
+ 
+       *flags = nla_get_u32(attrs[NL80211_ATTR_SCAN_FLAGS]);
+ 
+       if ((*flags & NL80211_SCAN_FLAG_LOW_PRIORITY) &&
+           !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN))
+               return -EOPNOTSUPP;
+ 
+       if (*flags & NL80211_SCAN_FLAG_RANDOM_ADDR) {
+               int err;
+ 
+               if (!(wiphy->features & randomness_flag) ||
+                   (wdev && wdev->current_bss))
+                       return -EOPNOTSUPP;
+ 
+               err = nl80211_parse_random_mac(attrs, mac_addr, mac_addr_mask);
+               if (err)
+                       return err;
+       }
+ 
+       if ((*flags & NL80211_SCAN_FLAG_FILS_MAX_CHANNEL_TIME) &&
+           !wiphy_ext_feature_isset(wiphy,
+                                    NL80211_EXT_FEATURE_FILS_MAX_CHANNEL_TIME))
+               return -EOPNOTSUPP;
+ 
+       if ((*flags & NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP) &&
+          !wiphy_ext_feature_isset(wiphy,
+                                   NL80211_EXT_FEATURE_ACCEPT_BCAST_PROBE_RESP))
+               return -EOPNOTSUPP;
+ 
+       if ((*flags & NL80211_SCAN_FLAG_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION) &&
+           !wiphy_ext_feature_isset(wiphy,
+                                    NL80211_EXT_FEATURE_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION))
+               return -EOPNOTSUPP;
+ 
+       if ((*flags & NL80211_SCAN_FLAG_OCE_PROBE_REQ_HIGH_TX_RATE) &&
+           !wiphy_ext_feature_isset(wiphy,
+                                    NL80211_EXT_FEATURE_OCE_PROBE_REQ_HIGH_TX_RATE))
+               return -EOPNOTSUPP;
+ 
+       return 0;
+ }
+ 
   static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
   {
         struct cfg80211_registered_device *rdev = info->user_ptr[0];
@@@ -6823,34 -6908,10 +6908,10 @@@
                         nla_get_flag(info->attrs[NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY]);
         }
   
-       if (info->attrs[NL80211_ATTR_SCAN_FLAGS]) {
-               request->flags = nla_get_u32(
-                       info->attrs[NL80211_ATTR_SCAN_FLAGS]);
-               if ((request->flags & NL80211_SCAN_FLAG_LOW_PRIORITY) &&
-                   !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) {
-                       err = -EOPNOTSUPP;
-                       goto out_free;
-               }
- 
-               if (request->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) {
-                       if (!(wiphy->features &
-                                       NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR)) {
-                               err = -EOPNOTSUPP;
-                               goto out_free;
-                       }
- 
-                       if (wdev->current_bss) {
-                               err = -EOPNOTSUPP;
-                               goto out_free;
-                       }
- 
-                       err = nl80211_parse_random_mac(info->attrs,
-                                                      request->mac_addr,
-                                                      request->mac_addr_mask);
-                       if (err)
-                               goto out_free;
-               }
-       }
+       err = nl80211_check_scan_flags(wiphy, wdev, request, info->attrs,
+                                      false);
+       if (err)
+               goto out_free;
   
         request->no_cck =
                 nla_get_flag(info->attrs[NL80211_ATTR_TX_NO_CCK_RATE]);
@@@ -7298,37 -7359,9 +7359,9 @@@ nl80211_parse_sched_scan(struct wiphy *
                        request->ie_len);
         }
   
-       if (attrs[NL80211_ATTR_SCAN_FLAGS]) {
-               request->flags = nla_get_u32(
-                       attrs[NL80211_ATTR_SCAN_FLAGS]);
-               if ((request->flags & NL80211_SCAN_FLAG_LOW_PRIORITY) &&
-                   !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) {
-                       err = -EOPNOTSUPP;
-                       goto out_free;
-               }
- 
-               if (request->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) {
-                       u32 flg = NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR;
- 
-                       if (!wdev) /* must be net-detect */
-                               flg = NL80211_FEATURE_ND_RANDOM_MAC_ADDR;
- 
-                       if (!(wiphy->features & flg)) {
-                               err = -EOPNOTSUPP;
-                               goto out_free;
-                       }
- 
-                       if (wdev && wdev->current_bss) {
-                               err = -EOPNOTSUPP;
-                               goto out_free;
-                       }
- 
-                       err = nl80211_parse_random_mac(attrs, request->mac_addr,
-                                                      request->mac_addr_mask);
-                       if (err)
-                               goto out_free;
-               }
-       }
+       err = nl80211_check_scan_flags(wiphy, wdev, request, attrs, true);
+       if (err)
+               goto out_free;
   
         if (attrs[NL80211_ATTR_SCHED_SCAN_DELAY])
                 request->delay =
@@@ -8932,8 -8965,14 +8965,14 @@@ static int nl80211_connect(struct sk_bu
   
         if (info->attrs[NL80211_ATTR_USE_MFP]) {
                 connect.mfp = nla_get_u32(info->attrs[NL80211_ATTR_USE_MFP]);
+               if (connect.mfp == NL80211_MFP_OPTIONAL &&
+                   !wiphy_ext_feature_isset(&rdev->wiphy,
+                                            NL80211_EXT_FEATURE_MFP_OPTIONAL))
+                       return -EOPNOTSUPP;
+ 
                 if (connect.mfp != NL80211_MFP_REQUIRED &&
-                   connect.mfp != NL80211_MFP_NO)
+                   connect.mfp != NL80211_MFP_NO &&
+                   connect.mfp != NL80211_MFP_OPTIONAL)
                         return -EINVAL;
         } else {
                 connect.mfp = NL80211_MFP_NO;
@@@ -12684,6 -12723,12 +12723,12 @@@ static const struct genl_ops nl80211_op
                 .policy = nl80211_policy,
                 .flags = GENL_ADMIN_PERM,
         },
+       {
+               .cmd = NL80211_CMD_RELOAD_REGDB,
+               .doit = nl80211_reload_regdb,
+               .policy = nl80211_policy,
+               .flags = GENL_ADMIN_PERM,
+       },
         {
                 .cmd = NL80211_CMD_GET_MESH_CONFIG,
                 .doit = nl80211_get_mesh_config,
@@@ -13812,9 -13857,7 +13857,7 @@@ void nl80211_send_roamed(struct cfg8021
                      info->req_ie)) ||
             (info->resp_ie &&
              nla_put(msg, NL80211_ATTR_RESP_IE, info->resp_ie_len,
-                    info->resp_ie)) ||
-           (info->authorized &&
-            nla_put_flag(msg, NL80211_ATTR_PORT_AUTHORIZED)))
+                    info->resp_ie)))
                 goto nla_put_failure;
   
         genlmsg_end(msg, hdr);
@@@ -13823,6 -13866,36 +13866,36 @@@
                                 NL80211_MCGRP_MLME, gfp);
         return;
   
+  nla_put_failure:
+       genlmsg_cancel(msg, hdr);
+       nlmsg_free(msg);
+ }
+ 
+ void nl80211_send_port_authorized(struct cfg80211_registered_device *rdev,
+                                 struct net_device *netdev, const u8 *bssid)
+ {
+       struct sk_buff *msg;
+       void *hdr;
+ 
+       msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+       if (!msg)
+               return;
+ 
+       hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_PORT_AUTHORIZED);
+       if (!hdr) {
+               nlmsg_free(msg);
+               return;
+       }
+ 
+       if (nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, bssid))
+               goto nla_put_failure;
+ 
+       genlmsg_end(msg, hdr);
+ 
+       genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
+                               NL80211_MCGRP_MLME, GFP_KERNEL);
+       return;
+ 
    nla_put_failure:
         genlmsg_cancel(msg, hdr);
         nlmsg_free(msg);
@@@ -14201,7 -14274,7 +14274,7 @@@ static bool __nl80211_unexpected_frame(
         struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
         struct sk_buff *msg;
         void *hdr;
- -      u32 nlportid = ACCESS_ONCE(wdev->ap_unexpected_nlportid);
+ +      u32 nlportid = READ_ONCE(wdev->ap_unexpected_nlportid);
   
         if (!nlportid)
                 return false;
author	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 15 Nov 2017 19:56:19 +0000 (11:56 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 15 Nov 2017 19:56:19 +0000 (11:56 -0800)
		1	2
MAINTAINERS	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/atm/idt77105.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/atm/iphase.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/bonding/bond_main.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/chelsio/cxgb4/sge.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/intel/i40e/i40e_debugfs.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/intel/i40e/i40e_ethtool.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/intel/i40e/i40e_main.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/intel/igb/e1000_regs.h	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/intel/igb/igb_main.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/mellanox/mlx4/en_tx.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/neterion/vxge/vxge-main.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/sfc/ef10.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/sfc/efx.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/sfc/falcon/efx.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/sfc/falcon/falcon.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/sfc/falcon/nic.h	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/sfc/falcon/tx.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/sfc/farch.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/sfc/ptp.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/sfc/tx.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/sun/niu.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/hamradio/yam.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/tun.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/vxlan.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/wireless/intel/iwlwifi/mvm/ops.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/wireless/intel/iwlwifi/mvm/tx.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/wireless/intel/iwlwifi/pcie/trans.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/wireless/mac80211_hwsim.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/s390/net/qeth_core_main.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/dynamic_queue_limits.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/of.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/rtnetlink.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/net/netfilter/nf_tables.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/events/core.c	patch \|	diff1 \|	diff2 \|	blob \| history
lib/dynamic_queue_limits.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/atm/mpc.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/core/dev.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/core/pktgen.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/decnet/dn_route.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv4/inet_fragment.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv4/route.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv4/tcp_input.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv4/tcp_output.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv4/udp.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv6/ah6.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv6/esp6.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv6/ip6_tunnel.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/mac80211/sta_info.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/netfilter/ipvs/ip_vs_conn.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/netfilter/ipvs/ip_vs_ctl.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/wireless/nl80211.c	patch \|	diff1 \|	diff2 \|	blob \| history