]> git.proxmox.com Git - mirror_ubuntu-disco-kernel.git/commitdiff
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 15 Nov 2017 19:56:19 +0000 (11:56 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 15 Nov 2017 19:56:19 +0000 (11:56 -0800)
Pull networking updates from David Miller:
 "Highlights:

   1) Maintain the TCP retransmit queue using an rbtree, with 1GB
      windows at 100Gb this really has become necessary. From Eric
      Dumazet.

   2) Multi-program support for cgroup+bpf, from Alexei Starovoitov.

   3) Perform broadcast flooding in hardware in mv88e6xxx, from Andrew
      Lunn.

   4) Add meter action support to openvswitch, from Andy Zhou.

   5) Add a data meta pointer for BPF accessible packets, from Daniel
      Borkmann.

   6) Namespace-ify almost all TCP sysctl knobs, from Eric Dumazet.

   7) Turn on Broadcom Tags in b53 driver, from Florian Fainelli.

   8) More work to move the RTNL mutex down, from Florian Westphal.

   9) Add 'bpftool' utility, to help with bpf program introspection.
      From Jakub Kicinski.

  10) Add new 'cpumap' type for XDP_REDIRECT action, from Jesper
      Dangaard Brouer.

  11) Support 'blocks' of transformations in the packet scheduler which
      can span multiple network devices, from Jiri Pirko.

  12) TC flower offload support in cxgb4, from Kumar Sanghvi.

  13) Priority based stream scheduler for SCTP, from Marcelo Ricardo
      Leitner.

  14) Thunderbolt networking driver, from Amir Levy and Mika Westerberg.

  15) Add RED qdisc offloadability, and use it in mlxsw driver. From
      Nogah Frankel.

  16) eBPF based device controller for cgroup v2, from Roman Gushchin.

  17) Add some fundamental tracepoints for TCP, from Song Liu.

  18) Remove garbage collection from ipv6 route layer, this is a
      significant accomplishment. From Wei Wang.

  19) Add multicast route offload support to mlxsw, from Yotam Gigi"

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (2177 commits)
  tcp: highest_sack fix
  geneve: fix fill_info when link down
  bpf: fix lockdep splat
  net: cdc_ncm: GetNtbFormat endian fix
  openvswitch: meter: fix NULL pointer dereference in ovs_meter_cmd_reply_start
  netem: remove unnecessary 64 bit modulus
  netem: use 64 bit divide by rate
  tcp: Namespace-ify sysctl_tcp_default_congestion_control
  net: Protect iterations over net::fib_notifier_ops in fib_seq_sum()
  ipv6: set all.accept_dad to 0 by default
  uapi: fix linux/tls.h userspace compilation error
  usbnet: ipheth: prevent TX queue timeouts when device not ready
  vhost_net: conditionally enable tx polling
  uapi: fix linux/rxrpc.h userspace compilation errors
  net: stmmac: fix LPI transitioning for dwmac4
  atm: horizon: Fix irq release error
  net-sysfs: trigger netlink notification on ifalias change via sysfs
  openvswitch: Using kfree_rcu() to simplify the code
  openvswitch: Make local function ovs_nsh_key_attr_size() static
  openvswitch: Fix return value check in ovs_meter_cmd_features()
  ...

55 files changed:
1  2 
MAINTAINERS
drivers/atm/idt77105.c
drivers/atm/iphase.c
drivers/net/bonding/bond_main.c
drivers/net/ethernet/chelsio/cxgb4/sge.c
drivers/net/ethernet/intel/i40e/i40e_debugfs.c
drivers/net/ethernet/intel/i40e/i40e_ethtool.c
drivers/net/ethernet/intel/i40e/i40e_main.c
drivers/net/ethernet/intel/igb/e1000_regs.h
drivers/net/ethernet/intel/igb/igb_main.c
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
drivers/net/ethernet/mellanox/mlx4/en_tx.c
drivers/net/ethernet/neterion/vxge/vxge-main.c
drivers/net/ethernet/sfc/ef10.c
drivers/net/ethernet/sfc/efx.c
drivers/net/ethernet/sfc/falcon/efx.c
drivers/net/ethernet/sfc/falcon/falcon.c
drivers/net/ethernet/sfc/falcon/nic.h
drivers/net/ethernet/sfc/falcon/tx.c
drivers/net/ethernet/sfc/farch.c
drivers/net/ethernet/sfc/ptp.c
drivers/net/ethernet/sfc/tx.c
drivers/net/ethernet/sun/niu.c
drivers/net/hamradio/yam.c
drivers/net/tun.c
drivers/net/vxlan.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
drivers/net/wireless/intel/iwlwifi/mvm/ops.c
drivers/net/wireless/intel/iwlwifi/mvm/tx.c
drivers/net/wireless/intel/iwlwifi/pcie/trans.c
drivers/net/wireless/mac80211_hwsim.c
drivers/s390/net/qeth_core_main.c
include/linux/dynamic_queue_limits.h
include/linux/of.h
include/linux/rtnetlink.h
include/net/netfilter/nf_tables.h
kernel/events/core.c
lib/dynamic_queue_limits.c
net/atm/mpc.c
net/core/dev.c
net/core/pktgen.c
net/decnet/dn_route.c
net/ipv4/inet_fragment.c
net/ipv4/route.c
net/ipv4/tcp_input.c
net/ipv4/tcp_output.c
net/ipv4/udp.c
net/ipv6/ah6.c
net/ipv6/esp6.c
net/ipv6/ip6_tunnel.c
net/mac80211/sta_info.c
net/netfilter/ipvs/ip_vs_conn.c
net/netfilter/ipvs/ip_vs_ctl.c
net/wireless/nl80211.c

diff --combined MAINTAINERS
index 7e9c887ad951215464705d112377e4e2266a9d76,29aa89a1837bcd089db9388180cc19fb68324562..16e1e6dc89f253338e8307fc9ff296acbb98b8d1
@@@ -527,6 -527,11 +527,6 @@@ W:        http://ez.analog.com/community/linux
  S:    Supported
  F:    drivers/input/misc/adxl34x.c
  
 -AEDSP16 DRIVER
 -M:    Riccardo Facchetti <fizban@tin.it>
 -S:    Maintained
 -F:    sound/oss/aedsp16.c
 -
  AF9013 MEDIA DRIVER
  M:    Antti Palosaari <crope@iki.fi>
  L:    linux-media@vger.kernel.org
@@@ -695,9 -700,9 +695,9 @@@ F: include/linux/altera_uart.
  F:    include/linux/altera_jtaguart.h
  
  AMAZON ETHERNET DRIVERS
- M:    Netanel Belgazal <netanel@annapurnalabs.com>
- R:    Saeed Bishara <saeed@annapurnalabs.com>
- R:    Zorik Machulsky <zorik@annapurnalabs.com>
+ M:    Netanel Belgazal <netanel@amazon.com>
+ R:    Saeed Bishara <saeedb@amazon.com>
+ R:    Zorik Machulsky <zorik@amazon.com>
  L:    netdev@vger.kernel.org
  S:    Supported
  F:    Documentation/networking/ena.txt
@@@ -2024,7 -2029,6 +2024,7 @@@ M:      Masahiro Yamada <yamada.masahiro@soc
  L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-uniphier.git
  S:    Maintained
 +F:    Documentation/devicetree/bindings/gpio/gpio-uniphier.txt
  F:    arch/arm/boot/dts/uniphier*
  F:    arch/arm/include/asm/hardware/cache-uniphier.h
  F:    arch/arm/mach-uniphier/
@@@ -2032,7 -2036,6 +2032,7 @@@ F:      arch/arm/mm/cache-uniphier.
  F:    arch/arm64/boot/dts/socionext/
  F:    drivers/bus/uniphier-system-bus.c
  F:    drivers/clk/uniphier/
 +F:    drivers/gpio/gpio-uniphier.c
  F:    drivers/i2c/busses/i2c-uniphier*
  F:    drivers/irqchip/irq-uniphier-aidet.c
  F:    drivers/pinctrl/uniphier/
@@@ -2244,7 -2247,7 +2244,7 @@@ F:      include/linux/dmaengine.
  F:    include/linux/async_tx.h
  
  AT24 EEPROM DRIVER
 -M:    Wolfram Sang <wsa@the-dreams.de>
 +M:    Bartosz Golaszewski <brgl@bgdev.pl>
  L:    linux-i2c@vger.kernel.org
  S:    Maintained
  F:    drivers/misc/eeprom/at24.c
@@@ -2559,12 -2562,10 +2559,12 @@@ S:   Maintaine
  F:    drivers/net/hamradio/baycom*
  
  BCACHE (BLOCK LAYER CACHE)
 +M:    Michael Lyle <mlyle@lyle.org>
  M:    Kent Overstreet <kent.overstreet@gmail.com>
  L:    linux-bcache@vger.kernel.org
  W:    http://bcache.evilpiepirate.org
 -S:    Orphan
 +C:    irc://irc.oftc.net/bcache
 +S:    Maintained
  F:    drivers/md/bcache/
  
  BDISP ST MEDIA DRIVER
@@@ -2712,6 -2713,7 +2712,7 @@@ L:      linux-kernel@vger.kernel.or
  S:    Supported
  F:    arch/x86/net/bpf_jit*
  F:    Documentation/networking/filter.txt
+ F:    Documentation/bpf/
  F:    include/linux/bpf*
  F:    include/linux/filter.h
  F:    include/uapi/linux/bpf*
@@@ -2724,7 -2726,7 +2725,7 @@@ F:      net/core/filter.
  F:    net/sched/act_bpf.c
  F:    net/sched/cls_bpf.c
  F:    samples/bpf/
- F:    tools/net/bpf*
+ F:    tools/bpf/
  F:    tools/testing/selftests/bpf/
  
  BROADCOM B44 10/100 ETHERNET DRIVER
@@@ -2895,14 -2897,8 +2896,15 @@@ S:    Supporte
  F:    drivers/gpio/gpio-brcmstb.c
  F:    Documentation/devicetree/bindings/gpio/brcm,brcmstb-gpio.txt
  
 +BROADCOM BRCMSTB USB2 and USB3 PHY DRIVER
 +M:    Al Cooper <alcooperx@gmail.com>
 +L:    linux-kernel@vger.kernel.org
 +L:    bcm-kernel-feedback-list@broadcom.com
 +S:    Maintained
 +F:    drivers/phy/broadcom/phy-brcm-usb*
 +
  BROADCOM GENET ETHERNET DRIVER
+ M:    Doug Berger <opendmb@gmail.com>
  M:    Florian Fainelli <f.fainelli@gmail.com>
  L:    netdev@vger.kernel.org
  S:    Supported
@@@ -3088,7 -3084,6 +3090,6 @@@ F:      arch/c6x
  
  CA8210 IEEE-802.15.4 RADIO DRIVER
  M:    Harry Morris <h.morris@cascoda.com>
- M:    linuxdev@cascoda.com
  L:    linux-wpan@vger.kernel.org
  W:    https://github.com/Cascoda/ca8210-linux.git
  S:    Maintained
@@@ -3335,17 -3330,22 +3336,22 @@@ S:   Maintaine
  F:    drivers/auxdisplay/cfag12864bfb.c
  F:    include/linux/cfag12864b.h
  
- CFG80211 and NL80211
+ 802.11 (including CFG80211/NL80211)
  M:    Johannes Berg <johannes@sipsolutions.net>
  L:    linux-wireless@vger.kernel.org
  W:    http://wireless.kernel.org/
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211.git
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211-next.git
  S:    Maintained
+ F:    net/wireless/
  F:    include/uapi/linux/nl80211.h
+ F:    include/linux/ieee80211.h
+ F:    include/net/wext.h
  F:    include/net/cfg80211.h
- F:    net/wireless/*
- X:    net/wireless/wext*
+ F:    include/net/iw_handler.h
+ F:    include/net/ieee80211_radiotap.h
+ F:    Documentation/driver-api/80211/cfg80211.rst
+ F:    Documentation/networking/regulatory.txt
  
  CHAR and MISC DRIVERS
  M:    Arnd Bergmann <arnd@arndb.de>
@@@ -3421,7 -3421,7 +3427,7 @@@ F:      drivers/scsi/snic
  CISCO VIC ETHERNET NIC DRIVER
  M:    Christian Benvenuti <benve@cisco.com>
  M:    Govindarajulu Varadarajan <_govind@gmx.com>
- M:    Neel Patel <neepatel@cisco.com>
+ M:    Parvi Kaustubhi <pkaustub@cisco.com>
  S:    Supported
  F:    drivers/net/ethernet/cisco/enic/
  
@@@ -3450,8 -3450,7 +3456,8 @@@ M:      Thomas Gleixner <tglx@linutronix.de
  L:    linux-kernel@vger.kernel.org
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/core
  S:    Supported
 -F:    drivers/clocksource
 +F:    drivers/clocksource/
 +F:    Documentation/devicetree/bindings/timer/
  
  CMPC ACPI DRIVER
  M:    Thadeu Lima de Souza Cascardo <cascardo@holoscopio.com>
@@@ -3472,7 -3471,7 +3478,7 @@@ COCCINELLE/Semantic Patches (SmPL
  M:    Julia Lawall <Julia.Lawall@lip6.fr>
  M:    Gilles Muller <Gilles.Muller@lip6.fr>
  M:    Nicolas Palix <nicolas.palix@imag.fr>
 -M:    Michal Marek <mmarek@suse.com>
 +M:    Michal Marek <michal.lkml@markovi.net>
  L:    cocci@systeme.lip6.fr (moderated for non-subscribers)
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/mmarek/kbuild.git misc
  W:    http://coccinelle.lip6.fr/
@@@ -3643,8 -3642,6 +3649,8 @@@ F:      drivers/cpufreq/arm_big_little_dt.
  
  CPU POWER MONITORING SUBSYSTEM
  M:    Thomas Renninger <trenn@suse.com>
 +M:    Shuah Khan <shuahkh@osg.samsung.com>
 +M:    Shuah Khan <shuah@kernel.org>
  L:    linux-pm@vger.kernel.org
  S:    Maintained
  F:    tools/power/cpupower/
@@@ -4100,8 -4097,6 +4106,8 @@@ T:      git git://git.kernel.org/pub/scm/lin
  T:    quilt http://people.redhat.com/agk/patches/linux/editing/
  S:    Maintained
  F:    Documentation/device-mapper/
 +F:    drivers/md/Makefile
 +F:    drivers/md/Kconfig
  F:    drivers/md/dm*
  F:    drivers/md/persistent-data/
  F:    include/linux/device-mapper.h
@@@ -4245,7 -4240,7 +4251,7 @@@ S:      Maintaine
  F:    drivers/dma/
  F:    include/linux/dmaengine.h
  F:    Documentation/devicetree/bindings/dma/
 -F:    Documentation/dmaengine/
 +F:    Documentation/driver-api/dmaengine/
  T:    git git://git.infradead.org/users/vkoul/slave-dma.git
  
  DMA MAPPING HELPERS
@@@ -4917,19 -4912,13 +4923,19 @@@ L:   linux-edac@vger.kernel.or
  S:    Maintained
  F:    drivers/edac/highbank*
  
 -EDAC-CAVIUM
 +EDAC-CAVIUM OCTEON
  M:    Ralf Baechle <ralf@linux-mips.org>
  M:    David Daney <david.daney@cavium.com>
  L:    linux-edac@vger.kernel.org
  L:    linux-mips@linux-mips.org
  S:    Supported
  F:    drivers/edac/octeon_edac*
 +
 +EDAC-CAVIUM THUNDERX
 +M:    David Daney <david.daney@cavium.com>
 +M:    Jan Glauber <jglauber@cavium.com>
 +L:    linux-edac@vger.kernel.org
 +S:    Supported
  F:    drivers/edac/thunderx_edac*
  
  EDAC-CORE
@@@ -5230,7 -5219,8 +5236,7 @@@ F:      fs/ext4
  
  Extended Verification Module (EVM)
  M:    Mimi Zohar <zohar@linux.vnet.ibm.com>
 -L:    linux-ima-devel@lists.sourceforge.net
 -L:    linux-security-module@vger.kernel.org
 +L:    linux-integrity@vger.kernel.org
  S:    Supported
  F:    security/integrity/evm/
  
@@@ -5485,7 -5475,7 +5491,7 @@@ F:      include/uapi/linux/fb.
  
  FREESCALE CAAM (Cryptographic Acceleration and Assurance Module) DRIVER
  M:    Horia Geantă <horia.geanta@nxp.com>
 -M:    Dan Douglass <dan.douglass@nxp.com>
 +M:    Aymen Sghaier <aymen.sghaier@nxp.com>
  L:    linux-crypto@vger.kernel.org
  S:    Maintained
  F:    drivers/crypto/caam/
@@@ -5665,7 -5655,6 +5671,7 @@@ T:      git git://git.kernel.org/pub/scm/lin
  S:    Supported
  F:    fs/crypto/
  F:    include/linux/fscrypt*.h
 +F:    Documentation/filesystems/fscrypt.rst
  
  FUJITSU FR-V (FRV) PORT
  S:    Orphan
@@@ -6259,13 -6248,6 +6265,13 @@@ S:    Maintaine
  F:    drivers/net/ethernet/hisilicon/
  F:    Documentation/devicetree/bindings/net/hisilicon*.txt
  
 +HISILICON PMU DRIVER
 +M:    Shaokun Zhang <zhangshaokun@hisilicon.com>
 +W:    http://www.hisilicon.com
 +S:    Supported
 +F:    drivers/perf/hisilicon
 +F:    Documentation/perf/hisi-pmu.txt
 +
  HISILICON ROCE DRIVER
  M:    Lijun Ou <oulijun@huawei.com>
  M:    Wei Hu(Xavier) <xavier.huwei@huawei.com>
@@@ -6865,7 -6847,9 +6871,7 @@@ L:      linux-crypto@vger.kernel.or
  INTEGRITY MEASUREMENT ARCHITECTURE (IMA)
  M:    Mimi Zohar <zohar@linux.vnet.ibm.com>
  M:    Dmitry Kasatkin <dmitry.kasatkin@gmail.com>
 -L:    linux-ima-devel@lists.sourceforge.net
 -L:    linux-ima-user@lists.sourceforge.net
 -L:    linux-security-module@vger.kernel.org
 +L:    linux-integrity@vger.kernel.org
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/zohar/linux-integrity.git
  S:    Supported
  F:    security/integrity/ima/
@@@ -7455,8 -7439,10 +7461,8 @@@ F:     mm/kasan
  F:    scripts/Makefile.kasan
  
  KCONFIG
 -M:    "Yann E. MORIN" <yann.morin.1998@free.fr>
  L:    linux-kbuild@vger.kernel.org
 -T:    git git://gitorious.org/linux-kconfig/linux-kconfig
 -S:    Maintained
 +S:    Orphan
  F:    Documentation/kbuild/kconfig-language.txt
  F:    scripts/kconfig/
  
@@@ -7485,7 -7471,7 +7491,7 @@@ F:      fs/autofs4
  
  KERNEL BUILD + files below scripts/ (unless maintained elsewhere)
  M:    Masahiro Yamada <yamada.masahiro@socionext.com>
 -M:    Michal Marek <mmarek@suse.com>
 +M:    Michal Marek <michal.lkml@markovi.net>
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild.git
  L:    linux-kbuild@vger.kernel.org
  S:    Maintained
@@@ -7646,7 -7632,8 +7652,7 @@@ F:      kernel/kexec
  
  KEYS-ENCRYPTED
  M:    Mimi Zohar <zohar@linux.vnet.ibm.com>
 -M:    David Safford <safford@us.ibm.com>
 -L:    linux-security-module@vger.kernel.org
 +L:    linux-integrity@vger.kernel.org
  L:    keyrings@vger.kernel.org
  S:    Supported
  F:    Documentation/security/keys/trusted-encrypted.rst
@@@ -7654,8 -7641,9 +7660,8 @@@ F:      include/keys/encrypted-type.
  F:    security/keys/encrypted-keys/
  
  KEYS-TRUSTED
 -M:    David Safford <safford@us.ibm.com>
  M:    Mimi Zohar <zohar@linux.vnet.ibm.com>
 -L:    linux-security-module@vger.kernel.org
 +L:    linux-integrity@vger.kernel.org
  L:    keyrings@vger.kernel.org
  S:    Supported
  F:    Documentation/security/keys/trusted-encrypted.rst
@@@ -8231,6 -8219,7 +8237,7 @@@ F:      Documentation/networking/mac80211-in
  F:    include/net/mac80211.h
  F:    net/mac80211/
  F:    drivers/net/wireless/mac80211_hwsim.[ch]
+ F:    Documentation/networking/mac80211_hwsim/README
  
  MAILBOX API
  M:    Jassi Brar <jassisinghbrar@gmail.com>
@@@ -9222,6 -9211,12 +9229,6 @@@ F:     include/linux/dt-bindings/mux
  F:    include/linux/mux/
  F:    drivers/mux/
  
 -MULTISOUND SOUND DRIVER
 -M:    Andrew Veliath <andrewtv@usa.net>
 -S:    Maintained
 -F:    Documentation/sound/oss/MultiSound
 -F:    sound/oss/msnd*
 -
  MULTITECH MULTIPORT CARD (ISICOM)
  S:    Orphan
  F:    drivers/tty/isicom.c
@@@ -9425,6 -9420,7 +9432,7 @@@ M:      Florian Fainelli <f.fainelli@gmail.c
  S:    Maintained
  F:    net/dsa/
  F:    include/net/dsa.h
+ F:    include/linux/dsa/
  F:    drivers/net/dsa/
  
  NETWORKING [GENERAL]
@@@ -9445,8 -9441,8 +9453,8 @@@ F:      include/uapi/linux/in.
  F:    include/uapi/linux/net.h
  F:    include/uapi/linux/netdevice.h
  F:    include/uapi/linux/net_namespace.h
- F:    tools/net/
  F:    tools/testing/selftests/net/
+ F:    lib/net_utils.c
  F:    lib/random32.c
  
  NETWORKING [IPSEC]
@@@ -10048,11 -10044,7 +10056,11 @@@ T: git git://github.com/openrisc/linux.
  L:    openrisc@lists.librecores.org
  W:    http://openrisc.io
  S:    Maintained
 +F:    Documentation/devicetree/bindings/openrisc/
 +F:    Documentation/openrisc/
  F:    arch/openrisc/
 +F:    drivers/irqchip/irq-ompic.c
 +F:    drivers/irqchip/irq-or1k-*
  
  OPENVSWITCH
  M:    Pravin Shelar <pshelar@nicira.com>
@@@ -10070,7 -10062,7 +10078,7 @@@ M:   Stephen Boyd <sboyd@codeaurora.org
  L:    linux-pm@vger.kernel.org
  S:    Maintained
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/vireshk/pm.git
 -F:    drivers/base/power/opp/
 +F:    drivers/opp/
  F:    include/linux/pm_opp.h
  F:    Documentation/power/opp.txt
  F:    Documentation/devicetree/bindings/opp/
@@@ -11061,6 -11053,7 +11069,6 @@@ F:   drivers/mtd/nand/pxa3xx_nand.
  
  QAT DRIVER
  M:    Giovanni Cabiddu <giovanni.cabiddu@intel.com>
 -M:    Salvatore Benedetto <salvatore.benedetto@intel.com>
  L:    qat-linux@intel.com
  S:    Supported
  F:    drivers/crypto/qat/
@@@ -11520,6 -11513,7 +11528,7 @@@ T:   git git://git.kernel.org/pub/scm/lin
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211-next.git
  S:    Maintained
  F:    Documentation/rfkill.txt
+ F:    Documentation/ABI/stable/sysfs-class-rfkill
  F:    net/rfkill/
  
  RHASHTABLE
@@@ -11541,16 -11535,6 +11550,16 @@@ S: Maintaine
  F:    drivers/mtd/nand/r852.c
  F:    drivers/mtd/nand/r852.h
  
 +RISC-V ARCHITECTURE
 +M:    Palmer Dabbelt <palmer@sifive.com>
 +M:    Albert Ou <albert@sifive.com>
 +L:    patches@groups.riscv.org
 +T:    git https://github.com/riscv/riscv-linux
 +S:    Supported
 +F:    arch/riscv/
 +K:    riscv
 +N:    riscv
 +
  ROCCAT DRIVERS
  M:    Stefan Achatz <erazor_de@users.sourceforge.net>
  W:    http://sourceforge.net/projects/roccat/
@@@ -11803,7 -11787,7 +11812,7 @@@ L:   linux-crypto@vger.kernel.or
  L:    linux-samsung-soc@vger.kernel.org
  S:    Maintained
  F:    drivers/crypto/exynos-rng.c
 -F:    Documentation/devicetree/bindings/rng/samsung,exynos-rng4.txt
 +F:    Documentation/devicetree/bindings/crypto/samsung,exynos-rng4.txt
  
  SAMSUNG FRAMEBUFFER DRIVER
  M:    Jingoo Han <jingoohan1@gmail.com>
@@@ -12086,15 -12070,10 +12095,15 @@@ L:        linux-mmc@vger.kernel.or
  S:    Maintained
  F:    drivers/mmc/host/sdhci-spear.c
  
 +SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) TI OMAP DRIVER
 +M:    Kishon Vijay Abraham I <kishon@ti.com>
 +L:    linux-mmc@vger.kernel.org
 +S:    Maintained
 +F:    drivers/mmc/host/sdhci-omap.c
 +
  SECURE ENCRYPTING DEVICE (SED) OPAL DRIVER
  M:    Scott Bauer <scott.bauer@intel.com>
  M:    Jonathan Derrick <jonathan.derrick@intel.com>
 -M:    Rafael Antognolli <rafael.antognolli@intel.com>
  L:    linux-block@vger.kernel.org
  S:    Supported
  F:    block/sed*
@@@ -12495,10 -12474,7 +12504,10 @@@ M: Shaohua Li <shli@kernel.org
  L:    linux-raid@vger.kernel.org
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git
  S:    Supported
 -F:    drivers/md/
 +F:    drivers/md/Makefile
 +F:    drivers/md/Kconfig
 +F:    drivers/md/md*
 +F:    drivers/md/raid*
  F:    include/linux/raid/
  F:    include/uapi/linux/raid/
  
@@@ -12951,16 -12927,9 +12960,16 @@@ F: arch/arc/plat-axs10
  F:    arch/arc/boot/dts/ax*
  F:    Documentation/devicetree/bindings/arc/axs10*
  
 +SYNOPSYS DESIGNWARE APB GPIO DRIVER
 +M:    Hoan Tran <hotran@apm.com>
 +L:    linux-gpio@vger.kernel.org
 +S:    Maintained
 +F:    drivers/gpio/gpio-dwapb.c
 +F:    Documentation/devicetree/bindings/gpio/snps-dwapb-gpio.txt
 +
  SYNOPSYS DESIGNWARE DMAC DRIVER
  M:    Viresh Kumar <vireshk@kernel.org>
 -M:    Andy Shevchenko <andriy.shevchenko@linux.intel.com>
 +R:    Andy Shevchenko <andriy.shevchenko@linux.intel.com>
  S:    Maintained
  F:    include/linux/dma/dw.h
  F:    include/linux/platform_data/dma-dw.h
@@@ -13343,6 -13312,15 +13352,15 @@@ M: Mika Westerberg <mika.westerberg@lin
  M:    Yehezkel Bernat <yehezkel.bernat@intel.com>
  S:    Maintained
  F:    drivers/thunderbolt/
+ F:    include/linux/thunderbolt.h
+ THUNDERBOLT NETWORK DRIVER
+ M:    Michael Jamet <michael.jamet@intel.com>
+ M:    Mika Westerberg <mika.westerberg@linux.intel.com>
+ M:    Yehezkel Bernat <yehezkel.bernat@intel.com>
+ L:    netdev@vger.kernel.org
+ S:    Maintained
+ F:    drivers/net/thunderbolt.c
  
  THUNDERX GPIO DRIVER
  M:    David Daney <david.daney@cavium.com>
@@@ -13799,7 -13777,7 +13817,7 @@@ UDRAW TABLE
  M:    Bastien Nocera <hadess@hadess.net>
  L:    linux-input@vger.kernel.org
  S:    Maintained
 -F:    drivers/hid/hid-udraw.c
 +F:    drivers/hid/hid-udraw-ps3.c
  
  UFS FILESYSTEM
  M:    Evgeniy Dushistov <dushistov@mail.ru>
@@@ -14322,12 -14300,15 +14340,15 @@@ S:        Maintaine
  F:    include/linux/virtio_vsock.h
  F:    include/uapi/linux/virtio_vsock.h
  F:    include/uapi/linux/vsockmon.h
+ F:    include/uapi/linux/vm_sockets_diag.h
+ F:    net/vmw_vsock/diag.c
  F:    net/vmw_vsock/af_vsock_tap.c
  F:    net/vmw_vsock/virtio_transport_common.c
  F:    net/vmw_vsock/virtio_transport.c
  F:    drivers/net/vsockmon.c
  F:    drivers/vhost/vsock.c
  F:    drivers/vhost/vsock.h
+ F:    tools/testing/vsock/
  
  VIRTIO CONSOLE DRIVER
  M:    Amit Shah <amit@kernel.org>
@@@ -14368,7 -14349,6 +14389,7 @@@ L:   virtualization@lists.linux-foundatio
  L:    kvm@vger.kernel.org
  S:    Supported
  F:    drivers/s390/virtio/
 +F:    arch/s390/include/uapi/asm/virtio-ccw.h
  
  VIRTIO GPU DRIVER
  M:    David Airlie <airlied@linux.ie>
@@@ -14591,7 -14571,6 +14612,6 @@@ L:   wil6210@qca.qualcomm.co
  S:    Supported
  W:    http://wireless.kernel.org/en/users/Drivers/wil6210
  F:    drivers/net/wireless/ath/wil6210/
- F:    include/uapi/linux/wil6210_uapi.h
  
  WIMAX STACK
  M:    Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
@@@ -14642,7 -14621,6 +14662,7 @@@ F:   Documentation/devicetree/bindings/ex
  F:    Documentation/devicetree/bindings/regulator/arizona-regulator.txt
  F:    Documentation/devicetree/bindings/mfd/arizona.txt
  F:    Documentation/devicetree/bindings/mfd/wm831x.txt
 +F:    Documentation/devicetree/bindings/sound/wlf,arizona.txt
  F:    arch/arm/mach-s3c64xx/mach-crag6410*
  F:    drivers/clk/clk-wm83*.c
  F:    drivers/extcon/extcon-arizona.c
diff --combined drivers/atm/idt77105.c
index 57af9fd198e4e756b4646e5153f3f51b6b741626,d781b3f87693855b0b8d07634ab07d11f868669b..909744eb7bab419eec2dc71e2c79c87231812ce3
@@@ -49,8 -49,8 +49,8 @@@ static void idt77105_stats_timer_func(u
  static void idt77105_restart_timer_func(unsigned long);
  
  
 -static DEFINE_TIMER(stats_timer, idt77105_stats_timer_func, 0, 0);
 -static DEFINE_TIMER(restart_timer, idt77105_restart_timer_func, 0, 0);
 +static DEFINE_TIMER(stats_timer, idt77105_stats_timer_func);
 +static DEFINE_TIMER(restart_timer, idt77105_restart_timer_func);
  static int start_timer = 1;
  static struct idt77105_priv *idt77105_all = NULL;
  
@@@ -306,11 -306,9 +306,9 @@@ static int idt77105_start(struct atm_de
        if (start_timer) {
                start_timer = 0;
                  
-               setup_timer(&stats_timer, idt77105_stats_timer_func, 0UL);
                stats_timer.expires = jiffies+IDT77105_STATS_TIMER_PERIOD;
                add_timer(&stats_timer);
                  
-               setup_timer(&restart_timer, idt77105_restart_timer_func, 0UL);
                restart_timer.expires = jiffies+IDT77105_RESTART_TIMER_PERIOD;
                add_timer(&restart_timer);
        }
diff --combined drivers/atm/iphase.c
index ad6b582c268e1c2dbc6f20051eb49170b18dd8be,a785c6e697579579e2009a1a1cdc32eeae5bcca4..12f646760b6827e3ddf8a63290cfb22a08e51d0b
@@@ -76,7 -76,7 +76,7 @@@ static IADEV *ia_dev[8]
  static struct atm_dev *_ia_dev[8];
  static int iadev_count;
  static void ia_led_timer(unsigned long arg);
 -static DEFINE_TIMER(ia_timer, ia_led_timer, 0, 0);
 +static DEFINE_TIMER(ia_timer, ia_led_timer);
  static int IA_TX_BUF = DFL_TX_BUFFERS, IA_TX_BUF_SZ = DFL_TX_BUF_SZ;
  static int IA_RX_BUF = DFL_RX_BUFFERS, IA_RX_BUF_SZ = DFL_RX_BUF_SZ;
  static uint IADebugFlag = /* IF_IADBG_ERR | IF_IADBG_CBR| IF_IADBG_INIT_ADAPTER
@@@ -880,7 -880,7 +880,7 @@@ static void ia_phy_write(struct iadev_p
  
  static void ia_suni_pm7345_init_ds3(struct iadev_priv *iadev)
  {
-       static const struct ia_reg suni_ds3_init [] = {
+       static const struct ia_reg suni_ds3_init[] = {
                { SUNI_DS3_FRM_INTR_ENBL,       0x17 },
                { SUNI_DS3_FRM_CFG,             0x01 },
                { SUNI_DS3_TRAN_CFG,            0x01 },
  
  static void ia_suni_pm7345_init_e3(struct iadev_priv *iadev)
  {
-       static const struct ia_reg suni_e3_init [] = {
+       static const struct ia_reg suni_e3_init[] = {
                { SUNI_E3_FRM_FRAM_OPTIONS,             0x04 },
                { SUNI_E3_FRM_MAINT_OPTIONS,            0x20 },
                { SUNI_E3_FRM_FRAM_INTR_ENBL,           0x1d },
  
  static void ia_suni_pm7345_init(struct iadev_priv *iadev)
  {
-       static const struct ia_reg suni_init [] = {
+       static const struct ia_reg suni_init[] = {
                /* Enable RSOP loss of signal interrupt. */
                { SUNI_INTR_ENBL,               0x28 },
                /* Clear error counters. */
index 08a4f57cf40966d1ca45cb51c1b8131aa80d65f2,99a3b0cd5bd690e04cec718739920f76890f3a8f..c669554d70bb7c7ba2fe3091ed1c58bd3026229f
@@@ -1167,7 -1167,7 +1167,7 @@@ static rx_handler_result_t bond_handle_
        slave = bond_slave_get_rcu(skb->dev);
        bond = slave->bond;
  
 -      recv_probe = ACCESS_ONCE(bond->recv_probe);
 +      recv_probe = READ_ONCE(bond->recv_probe);
        if (recv_probe) {
                ret = recv_probe(skb, bond, slave);
                if (ret == RX_HANDLER_CONSUMED) {
@@@ -1217,25 -1217,21 +1217,21 @@@ static enum netdev_lag_tx_type bond_lag
        }
  }
  
- static int bond_master_upper_dev_link(struct bonding *bond, struct slave *slave)
+ static int bond_master_upper_dev_link(struct bonding *bond, struct slave *slave,
+                                     struct netlink_ext_ack *extack)
  {
        struct netdev_lag_upper_info lag_upper_info;
-       int err;
  
        lag_upper_info.tx_type = bond_lag_tx_type(bond);
-       err = netdev_master_upper_dev_link(slave->dev, bond->dev, slave,
-                                          &lag_upper_info);
-       if (err)
-               return err;
-       rtmsg_ifinfo(RTM_NEWLINK, slave->dev, IFF_SLAVE, GFP_KERNEL);
-       return 0;
+       return netdev_master_upper_dev_link(slave->dev, bond->dev, slave,
+                                           &lag_upper_info, extack);
  }
  
  static void bond_upper_dev_unlink(struct bonding *bond, struct slave *slave)
  {
        netdev_upper_dev_unlink(slave->dev, bond->dev);
        slave->dev->flags &= ~IFF_SLAVE;
-       rtmsg_ifinfo(RTM_NEWLINK, slave->dev, IFF_SLAVE, GFP_KERNEL);
  }
  
  static struct slave *bond_alloc_slave(struct bonding *bond)
@@@ -1328,7 -1324,8 +1324,8 @@@ void bond_lower_state_changed(struct sl
  }
  
  /* enslave device <slave> to bond device <master> */
- int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
+ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
+                struct netlink_ext_ack *extack)
  {
        struct bonding *bond = netdev_priv(bond_dev);
        const struct net_device_ops *slave_ops = slave_dev->netdev_ops;
  
        /* already in-use? */
        if (netdev_is_rx_handler_busy(slave_dev)) {
+               NL_SET_ERR_MSG(extack, "Device is in use and cannot be enslaved");
                netdev_err(bond_dev,
                           "Error: Device is in use and cannot be enslaved\n");
                return -EBUSY;
        }
  
        if (bond_dev == slave_dev) {
+               NL_SET_ERR_MSG(extack, "Cannot enslave bond to itself.");
                netdev_err(bond_dev, "cannot enslave bond to itself.\n");
                return -EPERM;
        }
                netdev_dbg(bond_dev, "%s is NETIF_F_VLAN_CHALLENGED\n",
                           slave_dev->name);
                if (vlan_uses_dev(bond_dev)) {
+                       NL_SET_ERR_MSG(extack, "Can not enslave VLAN challenged device to VLAN enabled bond");
                        netdev_err(bond_dev, "Error: cannot enslave VLAN challenged slave %s on VLAN enabled bond %s\n",
                                   slave_dev->name, bond_dev->name);
                        return -EPERM;
         * enslaving it; the old ifenslave will not.
         */
        if (slave_dev->flags & IFF_UP) {
+               NL_SET_ERR_MSG(extack, "Device can not be enslaved while up");
                netdev_err(bond_dev, "%s is up - this may be due to an out of date ifenslave\n",
                           slave_dev->name);
                return -EPERM;
                                                 bond_dev);
                }
        } else if (bond_dev->type != slave_dev->type) {
+               NL_SET_ERR_MSG(extack, "Device type is different from other slaves");
                netdev_err(bond_dev, "%s ether type (%d) is different from other slaves (%d), can not enslave it\n",
                           slave_dev->name, slave_dev->type, bond_dev->type);
                return -EINVAL;
  
        if (slave_dev->type == ARPHRD_INFINIBAND &&
            BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {
+               NL_SET_ERR_MSG(extack, "Only active-backup mode is supported for infiniband slaves");
                netdev_warn(bond_dev, "Type (%d) supports only active-backup mode\n",
                            slave_dev->type);
                res = -EOPNOTSUPP;
                                bond->params.fail_over_mac = BOND_FOM_ACTIVE;
                                netdev_warn(bond_dev, "Setting fail_over_mac to active for active-backup mode\n");
                        } else {
+                               NL_SET_ERR_MSG(extack, "Slave device does not support setting the MAC address, but fail_over_mac is not set to active");
                                netdev_err(bond_dev, "The slave device specified does not support setting the MAC address, but fail_over_mac is not set to active\n");
                                res = -EOPNOTSUPP;
                                goto err_undo_flags;
                goto err_detach;
        }
  
-       res = bond_master_upper_dev_link(bond, new_slave);
+       res = bond_master_upper_dev_link(bond, new_slave, extack);
        if (res) {
                netdev_dbg(bond_dev, "Error %d calling bond_master_upper_dev_link\n", res);
                goto err_unregister;
@@@ -2492,7 -2496,8 +2496,8 @@@ int bond_arp_rcv(const struct sk_buff *
        struct slave *curr_active_slave, *curr_arp_slave;
        unsigned char *arp_ptr;
        __be32 sip, tip;
-       int alen, is_arp = skb->protocol == __cpu_to_be16(ETH_P_ARP);
+       int is_arp = skb->protocol == __cpu_to_be16(ETH_P_ARP);
+       unsigned int alen;
  
        if (!slave_do_arp_validate(bond, slave)) {
                if ((slave_do_arp_validate_only(bond) && is_arp) ||
@@@ -3073,7 -3078,16 +3078,16 @@@ static int bond_slave_netdev_event(unsi
                break;
        case NETDEV_UP:
        case NETDEV_CHANGE:
-               bond_update_speed_duplex(slave);
+               /* For 802.3ad mode only:
+                * Getting invalid Speed/Duplex values here will put slave
+                * in weird state. So mark it as link-down for the time
+                * being and let link-monitoring (miimon) set it right when
+                * correct speeds/duplex are available.
+                */
+               if (bond_update_speed_duplex(slave) &&
+                   BOND_MODE(bond) == BOND_MODE_8023AD)
+                       slave->link = BOND_LINK_DOWN;
                if (BOND_MODE(bond) == BOND_MODE_8023AD)
                        bond_3ad_adapter_speed_duplex_changed(slave);
                /* Fallthrough */
@@@ -3483,7 -3497,7 +3497,7 @@@ static int bond_do_ioctl(struct net_dev
        switch (cmd) {
        case BOND_ENSLAVE_OLD:
        case SIOCBONDENSLAVE:
-               res = bond_enslave(bond_dev, slave_dev);
+               res = bond_enslave(bond_dev, slave_dev, NULL);
                break;
        case BOND_RELEASE_OLD:
        case SIOCBONDRELEASE:
@@@ -3811,7 -3825,7 +3825,7 @@@ static int bond_xmit_roundrobin(struct 
                else
                        bond_xmit_slave_id(bond, skb, 0);
        } else {
 -              int slave_cnt = ACCESS_ONCE(bond->slave_cnt);
 +              int slave_cnt = READ_ONCE(bond->slave_cnt);
  
                if (likely(slave_cnt)) {
                        slave_id = bond_rr_gen_slave_id(bond);
@@@ -3973,7 -3987,7 +3987,7 @@@ static int bond_3ad_xor_xmit(struct sk_
        unsigned int count;
  
        slaves = rcu_dereference(bond->slave_arr);
 -      count = slaves ? ACCESS_ONCE(slaves->count) : 0;
 +      count = slaves ? READ_ONCE(slaves->count) : 0;
        if (likely(count)) {
                slave = slaves->arr[bond_xmit_hash(bond, skb) % count];
                bond_dev_queue_xmit(bond, skb, slave->dev);
index fe5cedd96a248da0f24dfd766b1398bbb1d5085a,486b01fe23bd6de0f44b2cbd7a81f71ada4c0ef8..922f2f93778930226c364dc743e0cc8429c54b0e
@@@ -405,7 -405,7 +405,7 @@@ void free_tx_desc(struct adapter *adap
   */
  static inline int reclaimable(const struct sge_txq *q)
  {
 -      int hw_cidx = ntohs(ACCESS_ONCE(q->stat->cidx));
 +      int hw_cidx = ntohs(READ_ONCE(q->stat->cidx));
        hw_cidx -= q->cidx;
        return hw_cidx < 0 ? hw_cidx + q->size : hw_cidx;
  }
@@@ -1375,7 -1375,7 +1375,7 @@@ out_free:       dev_kfree_skb_any(skb)
   */
  static inline void reclaim_completed_tx_imm(struct sge_txq *q)
  {
 -      int hw_cidx = ntohs(ACCESS_ONCE(q->stat->cidx));
 +      int hw_cidx = ntohs(READ_ONCE(q->stat->cidx));
        int reclaim = hw_cidx - q->cidx;
  
        if (reclaim < 0)
@@@ -1537,13 -1537,7 +1537,13 @@@ int t4_mgmt_tx(struct adapter *adap, st
   */
  static inline int is_ofld_imm(const struct sk_buff *skb)
  {
 -      return skb->len <= MAX_IMM_TX_PKT_LEN;
 +      struct work_request_hdr *req = (struct work_request_hdr *)skb->data;
 +      unsigned long opcode = FW_WR_OP_G(ntohl(req->wr_hi));
 +
 +      if (opcode == FW_CRYPTO_LOOKASIDE_WR)
 +              return skb->len <= SGE_MAX_WR_LEN;
 +      else
 +              return skb->len <= MAX_IMM_TX_PKT_LEN;
  }
  
  /**
@@@ -2589,11 -2583,11 +2589,11 @@@ irq_handler_t t4_intr_handler(struct ad
        return t4_intr_intx;
  }
  
- static void sge_rx_timer_cb(unsigned long data)
+ static void sge_rx_timer_cb(struct timer_list *t)
  {
        unsigned long m;
        unsigned int i;
-       struct adapter *adap = (struct adapter *)data;
+       struct adapter *adap = from_timer(adap, t, sge.rx_timer);
        struct sge *s = &adap->sge;
  
        for (i = 0; i < BITS_TO_LONGS(s->egr_sz); i++)
@@@ -2626,11 -2620,11 +2626,11 @@@ done
        mod_timer(&s->rx_timer, jiffies + RX_QCHECK_PERIOD);
  }
  
- static void sge_tx_timer_cb(unsigned long data)
+ static void sge_tx_timer_cb(struct timer_list *t)
  {
        unsigned long m;
        unsigned int i, budget;
-       struct adapter *adap = (struct adapter *)data;
+       struct adapter *adap = from_timer(adap, t, sge.tx_timer);
        struct sge *s = &adap->sge;
  
        for (i = 0; i < BITS_TO_LONGS(s->egr_sz); i++)
@@@ -3464,8 -3458,8 +3464,8 @@@ int t4_sge_init(struct adapter *adap
        /* Set up timers used for recuring callbacks to process RX and TX
         * administrative tasks.
         */
-       setup_timer(&s->rx_timer, sge_rx_timer_cb, (unsigned long)adap);
-       setup_timer(&s->tx_timer, sge_tx_timer_cb, (unsigned long)adap);
+       timer_setup(&s->rx_timer, sge_rx_timer_cb, 0);
+       timer_setup(&s->tx_timer, sge_tx_timer_cb, 0);
  
        spin_lock_init(&s->intrq_lock);
  
index 2cb9539c931e51f7a18696db9a307e3709c6d2f4,2b8bbc84e34f2b055ae6d999aa13603748a308b5..4c3b4243cf652a2102ac0a7e6a21b2ac0e0386e3
@@@ -264,7 -264,7 +264,7 @@@ static void i40e_dbg_dump_vsi_seid(stru
                 vsi->rx_buf_failed, vsi->rx_page_failed);
        rcu_read_lock();
        for (i = 0; i < vsi->num_queue_pairs; i++) {
 -              struct i40e_ring *rx_ring = ACCESS_ONCE(vsi->rx_rings[i]);
 +              struct i40e_ring *rx_ring = READ_ONCE(vsi->rx_rings[i]);
  
                if (!rx_ring)
                        continue;
                         rx_ring->netdev,
                         rx_ring->rx_bi);
                dev_info(&pf->pdev->dev,
-                        "    rx_rings[%i]: state = %li, queue_index = %d, reg_idx = %d\n",
-                        i, rx_ring->state,
+                        "    rx_rings[%i]: state = %lu, queue_index = %d, reg_idx = %d\n",
+                        i, *rx_ring->state,
                         rx_ring->queue_index,
                         rx_ring->reg_idx);
                dev_info(&pf->pdev->dev,
                         ITR_IS_DYNAMIC(rx_ring->rx_itr_setting) ? "dynamic" : "fixed");
        }
        for (i = 0; i < vsi->num_queue_pairs; i++) {
 -              struct i40e_ring *tx_ring = ACCESS_ONCE(vsi->tx_rings[i]);
 +              struct i40e_ring *tx_ring = READ_ONCE(vsi->tx_rings[i]);
  
                if (!tx_ring)
                        continue;
                         tx_ring->netdev,
                         tx_ring->tx_bi);
                dev_info(&pf->pdev->dev,
-                        "    tx_rings[%i]: state = %li, queue_index = %d, reg_idx = %d\n",
-                        i, tx_ring->state,
+                        "    tx_rings[%i]: state = %lu, queue_index = %d, reg_idx = %d\n",
+                        i, *tx_ring->state,
                         tx_ring->queue_index,
                         tx_ring->reg_idx);
                dev_info(&pf->pdev->dev,
@@@ -798,8 -798,7 +798,7 @@@ static ssize_t i40e_dbg_command_write(s
                 */
                if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) {
                        pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
-                       i40e_do_reset_safe(pf,
-                                          BIT_ULL(__I40E_PF_RESET_REQUESTED));
+                       i40e_do_reset_safe(pf, I40E_PF_RESET_FLAG);
                }
  
                vsi = i40e_vsi_setup(pf, I40E_VSI_VMDQ2, vsi_seid, 0);
index e9e04a485e0a765e392afef2f943f8bd84bb3400,dc9b8dcf4a1ee1bca390d101a192a3b6467dfb8d..5f6cf7212d4fc230b11fc6fe79fb6427f2b8c2b0
@@@ -227,6 -227,8 +227,8 @@@ static const struct i40e_priv_flags i40
        I40E_PRIV_FLAG("veb-stats", I40E_FLAG_VEB_STATS_ENABLED, 0),
        I40E_PRIV_FLAG("hw-atr-eviction", I40E_FLAG_HW_ATR_EVICT_ENABLED, 0),
        I40E_PRIV_FLAG("legacy-rx", I40E_FLAG_LEGACY_RX, 0),
+       I40E_PRIV_FLAG("disable-source-pruning",
+                      I40E_FLAG_SOURCE_PRUNING_DISABLED, 0),
  };
  
  #define I40E_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40e_gstrings_priv_flags)
@@@ -251,428 -253,557 +253,557 @@@ static void i40e_partition_setting_comp
  
  /**
   * i40e_phy_type_to_ethtool - convert the phy_types to ethtool link modes
-  * @phy_types: PHY types to convert
-  * @supported: pointer to the ethtool supported variable to fill in
-  * @advertising: pointer to the ethtool advertising variable to fill in
+  * @pf: PF struct with phy_types
+  * @ks: ethtool link ksettings struct to fill out
   *
   **/
- static void i40e_phy_type_to_ethtool(struct i40e_pf *pf, u32 *supported,
-                                    u32 *advertising)
+ static void i40e_phy_type_to_ethtool(struct i40e_pf *pf,
+                                    struct ethtool_link_ksettings *ks)
  {
        struct i40e_link_status *hw_link_info = &pf->hw.phy.link_info;
        u64 phy_types = pf->hw.phy.phy_types;
  
-       *supported = 0x0;
-       *advertising = 0x0;
+       ethtool_link_ksettings_zero_link_mode(ks, supported);
+       ethtool_link_ksettings_zero_link_mode(ks, advertising);
  
        if (phy_types & I40E_CAP_PHY_TYPE_SGMII) {
-               *supported |= SUPPORTED_Autoneg |
-                             SUPPORTED_1000baseT_Full;
-               *advertising |= ADVERTISED_Autoneg;
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    1000baseT_Full);
                if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
-                       *advertising |= ADVERTISED_1000baseT_Full;
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            1000baseT_Full);
                if (pf->hw_features & I40E_HW_100M_SGMII_CAPABLE) {
-                       *supported |= SUPPORTED_100baseT_Full;
-                       *advertising |= ADVERTISED_100baseT_Full;
+                       ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                            100baseT_Full);
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            100baseT_Full);
                }
        }
        if (phy_types & I40E_CAP_PHY_TYPE_XAUI ||
            phy_types & I40E_CAP_PHY_TYPE_XFI ||
            phy_types & I40E_CAP_PHY_TYPE_SFI ||
            phy_types & I40E_CAP_PHY_TYPE_10GBASE_SFPP_CU ||
-           phy_types & I40E_CAP_PHY_TYPE_10GBASE_AOC)
-               *supported |= SUPPORTED_10000baseT_Full;
-       if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1_CU ||
-           phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1 ||
-           phy_types & I40E_CAP_PHY_TYPE_10GBASE_T ||
-           phy_types & I40E_CAP_PHY_TYPE_10GBASE_SR ||
-           phy_types & I40E_CAP_PHY_TYPE_10GBASE_LR) {
-               *supported |= SUPPORTED_Autoneg |
-                             SUPPORTED_10000baseT_Full;
-               *advertising |= ADVERTISED_Autoneg;
+           phy_types & I40E_CAP_PHY_TYPE_10GBASE_AOC) {
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    10000baseT_Full);
+               if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            10000baseT_Full);
+       }
+       if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_T) {
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    10000baseT_Full);
                if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
-                       *advertising |= ADVERTISED_10000baseT_Full;
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            10000baseT_Full);
        }
        if (phy_types & I40E_CAP_PHY_TYPE_XLAUI ||
            phy_types & I40E_CAP_PHY_TYPE_XLPPI ||
            phy_types & I40E_CAP_PHY_TYPE_40GBASE_AOC)
-               *supported |= SUPPORTED_40000baseCR4_Full;
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    40000baseCR4_Full);
        if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_CR4_CU ||
            phy_types & I40E_CAP_PHY_TYPE_40GBASE_CR4) {
-               *supported |= SUPPORTED_Autoneg |
-                             SUPPORTED_40000baseCR4_Full;
-               *advertising |= ADVERTISED_Autoneg;
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    40000baseCR4_Full);
                if (hw_link_info->requested_speeds & I40E_LINK_SPEED_40GB)
-                       *advertising |= ADVERTISED_40000baseCR4_Full;
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            40000baseCR4_Full);
        }
        if (phy_types & I40E_CAP_PHY_TYPE_100BASE_TX) {
-               *supported |= SUPPORTED_Autoneg |
-                             SUPPORTED_100baseT_Full;
-               *advertising |= ADVERTISED_Autoneg;
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    100baseT_Full);
                if (hw_link_info->requested_speeds & I40E_LINK_SPEED_100MB)
-                       *advertising |= ADVERTISED_100baseT_Full;
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            100baseT_Full);
        }
-       if (phy_types & I40E_CAP_PHY_TYPE_1000BASE_T ||
-           phy_types & I40E_CAP_PHY_TYPE_1000BASE_SX ||
-           phy_types & I40E_CAP_PHY_TYPE_1000BASE_LX ||
-           phy_types & I40E_CAP_PHY_TYPE_1000BASE_T_OPTICAL) {
-               *supported |= SUPPORTED_Autoneg |
-                             SUPPORTED_1000baseT_Full;
-               *advertising |= ADVERTISED_Autoneg;
+       if (phy_types & I40E_CAP_PHY_TYPE_1000BASE_T) {
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    1000baseT_Full);
                if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
-                       *advertising |= ADVERTISED_1000baseT_Full;
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            1000baseT_Full);
        }
        if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_SR4)
-               *supported |= SUPPORTED_40000baseSR4_Full;
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    40000baseSR4_Full);
        if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_LR4)
-               *supported |= SUPPORTED_40000baseLR4_Full;
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    40000baseLR4_Full);
        if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_KR4) {
-               *supported |= SUPPORTED_40000baseKR4_Full |
-                             SUPPORTED_Autoneg;
-               *advertising |= ADVERTISED_40000baseKR4_Full |
-                               ADVERTISED_Autoneg;
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    40000baseLR4_Full);
+               ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                    40000baseLR4_Full);
        }
        if (phy_types & I40E_CAP_PHY_TYPE_20GBASE_KR2) {
-               *supported |= SUPPORTED_20000baseKR2_Full |
-                             SUPPORTED_Autoneg;
-               *advertising |= ADVERTISED_Autoneg;
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    20000baseKR2_Full);
                if (hw_link_info->requested_speeds & I40E_LINK_SPEED_20GB)
-                       *advertising |= ADVERTISED_20000baseKR2_Full;
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            20000baseKR2_Full);
        }
-       if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_KR) {
-               if (!(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER))
-                       *supported |= SUPPORTED_10000baseKR_Full |
-                                     SUPPORTED_Autoneg;
-               *advertising |= ADVERTISED_Autoneg;
+       if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_KX4) {
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    10000baseKX4_Full);
                if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
-                       if (!(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER))
-                               *advertising |= ADVERTISED_10000baseKR_Full;
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            10000baseKX4_Full);
        }
-       if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_KX4) {
-               *supported |= SUPPORTED_10000baseKX4_Full |
-                             SUPPORTED_Autoneg;
-               *advertising |= ADVERTISED_Autoneg;
+       if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_KR &&
+           !(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER)) {
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    10000baseKR_Full);
                if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
-                       *advertising |= ADVERTISED_10000baseKX4_Full;
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            10000baseKR_Full);
        }
-       if (phy_types & I40E_CAP_PHY_TYPE_1000BASE_KX) {
-               if (!(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER))
-                       *supported |= SUPPORTED_1000baseKX_Full |
-                                     SUPPORTED_Autoneg;
-               *advertising |= ADVERTISED_Autoneg;
+       if (phy_types & I40E_CAP_PHY_TYPE_1000BASE_KX &&
+           !(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER)) {
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    1000baseKX_Full);
                if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
-                       if (!(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER))
-                               *advertising |= ADVERTISED_1000baseKX_Full;
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            1000baseKX_Full);
        }
-       if (phy_types & I40E_CAP_PHY_TYPE_25GBASE_KR ||
-           phy_types & I40E_CAP_PHY_TYPE_25GBASE_CR ||
-           phy_types & I40E_CAP_PHY_TYPE_25GBASE_SR ||
+       /* need to add 25G PHY types */
+       if (phy_types & I40E_CAP_PHY_TYPE_25GBASE_KR) {
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    25000baseKR_Full);
+               if (hw_link_info->requested_speeds & I40E_LINK_SPEED_25GB)
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            25000baseKR_Full);
+       }
+       if (phy_types & I40E_CAP_PHY_TYPE_25GBASE_CR) {
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    25000baseCR_Full);
+               if (hw_link_info->requested_speeds & I40E_LINK_SPEED_25GB)
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            25000baseCR_Full);
+       }
+       if (phy_types & I40E_CAP_PHY_TYPE_25GBASE_SR ||
            phy_types & I40E_CAP_PHY_TYPE_25GBASE_LR) {
-               *supported |= SUPPORTED_Autoneg;
-               *advertising |= ADVERTISED_Autoneg;
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    25000baseSR_Full);
+               if (hw_link_info->requested_speeds & I40E_LINK_SPEED_25GB)
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            25000baseSR_Full);
+       }
+       if (phy_types & I40E_CAP_PHY_TYPE_25GBASE_AOC ||
+           phy_types & I40E_CAP_PHY_TYPE_25GBASE_ACC) {
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    25000baseCR_Full);
+               if (hw_link_info->requested_speeds & I40E_LINK_SPEED_25GB)
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            25000baseCR_Full);
+       }
+       /* need to add new 10G PHY types */
+       if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1 ||
+           phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1_CU) {
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    10000baseCR_Full);
+               if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            10000baseCR_Full);
+       }
+       if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_SR) {
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    10000baseSR_Full);
+               if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            10000baseSR_Full);
+       }
+       if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_LR) {
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    10000baseLR_Full);
+               if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            10000baseLR_Full);
+       }
+       if (phy_types & I40E_CAP_PHY_TYPE_1000BASE_SX ||
+           phy_types & I40E_CAP_PHY_TYPE_1000BASE_LX ||
+           phy_types & I40E_CAP_PHY_TYPE_1000BASE_T_OPTICAL) {
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    1000baseX_Full);
+               if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            1000baseX_Full);
+       }
+       /* Autoneg PHY types */
+       if (phy_types & I40E_CAP_PHY_TYPE_SGMII ||
+           phy_types & I40E_CAP_PHY_TYPE_40GBASE_KR4 ||
+           phy_types & I40E_CAP_PHY_TYPE_40GBASE_CR4_CU ||
+           phy_types & I40E_CAP_PHY_TYPE_40GBASE_CR4 ||
+           phy_types & I40E_CAP_PHY_TYPE_25GBASE_SR ||
+           phy_types & I40E_CAP_PHY_TYPE_25GBASE_LR ||
+           phy_types & I40E_CAP_PHY_TYPE_25GBASE_KR ||
+           phy_types & I40E_CAP_PHY_TYPE_25GBASE_CR ||
+           phy_types & I40E_CAP_PHY_TYPE_20GBASE_KR2 ||
+           phy_types & I40E_CAP_PHY_TYPE_10GBASE_T ||
+           phy_types & I40E_CAP_PHY_TYPE_10GBASE_SR ||
+           phy_types & I40E_CAP_PHY_TYPE_10GBASE_LR ||
+           phy_types & I40E_CAP_PHY_TYPE_10GBASE_KX4 ||
+           phy_types & I40E_CAP_PHY_TYPE_10GBASE_KR ||
+           phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1_CU ||
+           phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1 ||
+           phy_types & I40E_CAP_PHY_TYPE_1000BASE_T_OPTICAL ||
+           phy_types & I40E_CAP_PHY_TYPE_1000BASE_T ||
+           phy_types & I40E_CAP_PHY_TYPE_1000BASE_SX ||
+           phy_types & I40E_CAP_PHY_TYPE_1000BASE_LX ||
+           phy_types & I40E_CAP_PHY_TYPE_1000BASE_KX ||
+           phy_types & I40E_CAP_PHY_TYPE_100BASE_TX) {
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    Autoneg);
+               ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                    Autoneg);
        }
  }
  
  /**
   * i40e_get_settings_link_up - Get the Link settings for when link is up
   * @hw: hw structure
-  * @ecmd: ethtool command to fill in
+  * @ks: ethtool ksettings to fill in
   * @netdev: network interface device structure
-  *
+  * @pf: pointer to physical function struct
   **/
  static void i40e_get_settings_link_up(struct i40e_hw *hw,
-                                     struct ethtool_link_ksettings *cmd,
+                                     struct ethtool_link_ksettings *ks,
                                      struct net_device *netdev,
                                      struct i40e_pf *pf)
  {
        struct i40e_link_status *hw_link_info = &hw->phy.link_info;
+       struct ethtool_link_ksettings cap_ksettings;
        u32 link_speed = hw_link_info->link_speed;
-       u32 e_advertising = 0x0;
-       u32 e_supported = 0x0;
-       u32 supported, advertising;
-       ethtool_convert_link_mode_to_legacy_u32(&supported,
-                                               cmd->link_modes.supported);
-       ethtool_convert_link_mode_to_legacy_u32(&advertising,
-                                               cmd->link_modes.advertising);
  
        /* Initialize supported and advertised settings based on phy settings */
        switch (hw_link_info->phy_type) {
        case I40E_PHY_TYPE_40GBASE_CR4:
        case I40E_PHY_TYPE_40GBASE_CR4_CU:
-               supported = SUPPORTED_Autoneg |
-                           SUPPORTED_40000baseCR4_Full;
-               advertising = ADVERTISED_Autoneg |
-                             ADVERTISED_40000baseCR4_Full;
+               ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    40000baseCR4_Full);
+               ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+               ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                    40000baseCR4_Full);
                break;
        case I40E_PHY_TYPE_XLAUI:
        case I40E_PHY_TYPE_XLPPI:
        case I40E_PHY_TYPE_40GBASE_AOC:
-               supported = SUPPORTED_40000baseCR4_Full;
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    40000baseCR4_Full);
                break;
        case I40E_PHY_TYPE_40GBASE_SR4:
-               supported = SUPPORTED_40000baseSR4_Full;
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    40000baseSR4_Full);
                break;
        case I40E_PHY_TYPE_40GBASE_LR4:
-               supported = SUPPORTED_40000baseLR4_Full;
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    40000baseLR4_Full);
                break;
+       case I40E_PHY_TYPE_25GBASE_SR:
+       case I40E_PHY_TYPE_25GBASE_LR:
        case I40E_PHY_TYPE_10GBASE_SR:
        case I40E_PHY_TYPE_10GBASE_LR:
        case I40E_PHY_TYPE_1000BASE_SX:
        case I40E_PHY_TYPE_1000BASE_LX:
-               supported = SUPPORTED_10000baseT_Full;
+               ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+               ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    25000baseSR_Full);
+               ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                    25000baseSR_Full);
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    10000baseSR_Full);
+               ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                    10000baseSR_Full);
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    10000baseLR_Full);
+               ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                    10000baseLR_Full);
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    1000baseX_Full);
+               ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                    1000baseX_Full);
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    10000baseT_Full);
                if (hw_link_info->module_type[2] &
                    I40E_MODULE_TYPE_1000BASE_SX ||
                    hw_link_info->module_type[2] &
                    I40E_MODULE_TYPE_1000BASE_LX) {
-                       supported |= SUPPORTED_1000baseT_Full;
+                       ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                            1000baseT_Full);
                        if (hw_link_info->requested_speeds &
                            I40E_LINK_SPEED_1GB)
-                               advertising |= ADVERTISED_1000baseT_Full;
+                               ethtool_link_ksettings_add_link_mode(
+                                    ks, advertising, 1000baseT_Full);
                }
                if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
-                       advertising |= ADVERTISED_10000baseT_Full;
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            10000baseT_Full);
                break;
        case I40E_PHY_TYPE_10GBASE_T:
        case I40E_PHY_TYPE_1000BASE_T:
        case I40E_PHY_TYPE_100BASE_TX:
-               supported = SUPPORTED_Autoneg |
-                           SUPPORTED_10000baseT_Full |
-                           SUPPORTED_1000baseT_Full |
-                           SUPPORTED_100baseT_Full;
-               advertising = ADVERTISED_Autoneg;
+               ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    10000baseT_Full);
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    1000baseT_Full);
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    100baseT_Full);
+               ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
                if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
-                       advertising |= ADVERTISED_10000baseT_Full;
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            10000baseT_Full);
                if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
-                       advertising |= ADVERTISED_1000baseT_Full;
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            1000baseT_Full);
                if (hw_link_info->requested_speeds & I40E_LINK_SPEED_100MB)
-                       advertising |= ADVERTISED_100baseT_Full;
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            100baseT_Full);
                break;
        case I40E_PHY_TYPE_1000BASE_T_OPTICAL:
-               supported = SUPPORTED_Autoneg |
-                           SUPPORTED_1000baseT_Full;
-               advertising = ADVERTISED_Autoneg |
-                             ADVERTISED_1000baseT_Full;
+               ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    1000baseT_Full);
+               ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+               ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                    1000baseT_Full);
                break;
        case I40E_PHY_TYPE_10GBASE_CR1_CU:
        case I40E_PHY_TYPE_10GBASE_CR1:
-               supported = SUPPORTED_Autoneg |
-                           SUPPORTED_10000baseT_Full;
-               advertising = ADVERTISED_Autoneg |
-                             ADVERTISED_10000baseT_Full;
+               ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    10000baseT_Full);
+               ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+               ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                    10000baseT_Full);
                break;
        case I40E_PHY_TYPE_XAUI:
        case I40E_PHY_TYPE_XFI:
        case I40E_PHY_TYPE_SFI:
        case I40E_PHY_TYPE_10GBASE_SFPP_CU:
        case I40E_PHY_TYPE_10GBASE_AOC:
-               supported = SUPPORTED_10000baseT_Full;
-               advertising = SUPPORTED_10000baseT_Full;
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    10000baseT_Full);
+               if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            10000baseT_Full);
                break;
        case I40E_PHY_TYPE_SGMII:
-               supported = SUPPORTED_Autoneg |
-                           SUPPORTED_1000baseT_Full;
+               ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    1000baseT_Full);
                if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
-                       advertising |= ADVERTISED_1000baseT_Full;
+                       ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                            1000baseT_Full);
                if (pf->hw_features & I40E_HW_100M_SGMII_CAPABLE) {
-                       supported |= SUPPORTED_100baseT_Full;
+                       ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                            100baseT_Full);
                        if (hw_link_info->requested_speeds &
                            I40E_LINK_SPEED_100MB)
-                               advertising |= ADVERTISED_100baseT_Full;
+                               ethtool_link_ksettings_add_link_mode(
+                                     ks, advertising, 100baseT_Full);
                }
                break;
        case I40E_PHY_TYPE_40GBASE_KR4:
+       case I40E_PHY_TYPE_25GBASE_KR:
        case I40E_PHY_TYPE_20GBASE_KR2:
        case I40E_PHY_TYPE_10GBASE_KR:
        case I40E_PHY_TYPE_10GBASE_KX4:
        case I40E_PHY_TYPE_1000BASE_KX:
-               supported |= SUPPORTED_40000baseKR4_Full |
-                            SUPPORTED_20000baseKR2_Full |
-                            SUPPORTED_10000baseKR_Full |
-                            SUPPORTED_10000baseKX4_Full |
-                            SUPPORTED_1000baseKX_Full |
-                            SUPPORTED_Autoneg;
-               advertising |= ADVERTISED_40000baseKR4_Full |
-                              ADVERTISED_20000baseKR2_Full |
-                              ADVERTISED_10000baseKR_Full |
-                              ADVERTISED_10000baseKX4_Full |
-                              ADVERTISED_1000baseKX_Full |
-                              ADVERTISED_Autoneg;
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    40000baseKR4_Full);
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    25000baseKR_Full);
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    20000baseKR2_Full);
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    10000baseKR_Full);
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    10000baseKX4_Full);
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    1000baseKX_Full);
+               ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+               ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                    40000baseKR4_Full);
+               ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                    25000baseKR_Full);
+               ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                    20000baseKR2_Full);
+               ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                    10000baseKR_Full);
+               ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                    10000baseKX4_Full);
+               ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                    1000baseKX_Full);
+               ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
                break;
-       case I40E_PHY_TYPE_25GBASE_KR:
        case I40E_PHY_TYPE_25GBASE_CR:
-       case I40E_PHY_TYPE_25GBASE_SR:
-       case I40E_PHY_TYPE_25GBASE_LR:
-               supported = SUPPORTED_Autoneg;
-               advertising = ADVERTISED_Autoneg;
-               /* TODO: add speeds when ethtool is ready to support*/
+               ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+               ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    25000baseCR_Full);
+               ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                    25000baseCR_Full);
+               break;
+       case I40E_PHY_TYPE_25GBASE_AOC:
+       case I40E_PHY_TYPE_25GBASE_ACC:
+               ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+               ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    25000baseCR_Full);
+               ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                    25000baseCR_Full);
+               ethtool_link_ksettings_add_link_mode(ks, supported,
+                                                    10000baseCR_Full);
+               ethtool_link_ksettings_add_link_mode(ks, advertising,
+                                                    10000baseCR_Full);
                break;
        default:
                /* if we got here and link is up something bad is afoot */
-               netdev_info(netdev, "WARNING: Link is up but PHY type 0x%x is not recognized.\n",
+               netdev_info(netdev,
+                           "WARNING: Link is up but PHY type 0x%x is not recognized.\n",
                            hw_link_info->phy_type);
        }
  
        /* Now that we've worked out everything that could be supported by the
-        * current PHY type, get what is supported by the NVM and them to
-        * get what is truly supported
+        * current PHY type, get what is supported by the NVM and intersect
+        * them to get what is truly supported
         */
-       i40e_phy_type_to_ethtool(pf, &e_supported,
-                                &e_advertising);
-       supported = supported & e_supported;
-       advertising = advertising & e_advertising;
+       memset(&cap_ksettings, 0, sizeof(struct ethtool_link_ksettings));
+       i40e_phy_type_to_ethtool(pf, &cap_ksettings);
+       ethtool_intersect_link_masks(ks, &cap_ksettings);
  
        /* Set speed and duplex */
        switch (link_speed) {
        case I40E_LINK_SPEED_40GB:
-               cmd->base.speed = SPEED_40000;
+               ks->base.speed = SPEED_40000;
                break;
        case I40E_LINK_SPEED_25GB:
- #ifdef SPEED_25000
-               cmd->base.speed = SPEED_25000;
- #else
-               netdev_info(netdev,
-                           "Speed is 25G, display not supported by this version of ethtool.\n");
- #endif
+               ks->base.speed = SPEED_25000;
                break;
        case I40E_LINK_SPEED_20GB:
-               cmd->base.speed = SPEED_20000;
+               ks->base.speed = SPEED_20000;
                break;
        case I40E_LINK_SPEED_10GB:
-               cmd->base.speed = SPEED_10000;
+               ks->base.speed = SPEED_10000;
                break;
        case I40E_LINK_SPEED_1GB:
-               cmd->base.speed = SPEED_1000;
+               ks->base.speed = SPEED_1000;
                break;
        case I40E_LINK_SPEED_100MB:
-               cmd->base.speed = SPEED_100;
+               ks->base.speed = SPEED_100;
                break;
        default:
                break;
        }
-       cmd->base.duplex = DUPLEX_FULL;
-       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
-                                               supported);
-       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
-                                               advertising);
+       ks->base.duplex = DUPLEX_FULL;
  }
  
  /**
   * i40e_get_settings_link_down - Get the Link settings for when link is down
   * @hw: hw structure
-  * @ecmd: ethtool command to fill in
+  * @ks: ethtool ksettings to fill in
+  * @pf: pointer to physical function struct
   *
   * Reports link settings that can be determined when link is down
   **/
  static void i40e_get_settings_link_down(struct i40e_hw *hw,
-                                       struct ethtool_link_ksettings *cmd,
+                                       struct ethtool_link_ksettings *ks,
                                        struct i40e_pf *pf)
  {
-       u32 supported, advertising;
        /* link is down and the driver needs to fall back on
         * supported phy types to figure out what info to display
         */
-       i40e_phy_type_to_ethtool(pf, &supported, &advertising);
-       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
-                                               supported);
-       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
-                                               advertising);
+       i40e_phy_type_to_ethtool(pf, ks);
  
        /* With no link speed and duplex are unknown */
-       cmd->base.speed = SPEED_UNKNOWN;
-       cmd->base.duplex = DUPLEX_UNKNOWN;
+       ks->base.speed = SPEED_UNKNOWN;
+       ks->base.duplex = DUPLEX_UNKNOWN;
  }
  
  /**
-  * i40e_get_settings - Get Link Speed and Duplex settings
+  * i40e_get_link_ksettings - Get Link Speed and Duplex settings
   * @netdev: network interface device structure
-  * @ecmd: ethtool command
+  * @ks: ethtool ksettings
   *
   * Reports speed/duplex settings based on media_type
   **/
  static int i40e_get_link_ksettings(struct net_device *netdev,
-                                  struct ethtool_link_ksettings *cmd)
+                                  struct ethtool_link_ksettings *ks)
  {
        struct i40e_netdev_priv *np = netdev_priv(netdev);
        struct i40e_pf *pf = np->vsi->back;
        struct i40e_hw *hw = &pf->hw;
        struct i40e_link_status *hw_link_info = &hw->phy.link_info;
        bool link_up = hw_link_info->link_info & I40E_AQ_LINK_UP;
-       u32 advertising;
+       ethtool_link_ksettings_zero_link_mode(ks, supported);
+       ethtool_link_ksettings_zero_link_mode(ks, advertising);
  
        if (link_up)
-               i40e_get_settings_link_up(hw, cmd, netdev, pf);
+               i40e_get_settings_link_up(hw, ks, netdev, pf);
        else
-               i40e_get_settings_link_down(hw, cmd, pf);
+               i40e_get_settings_link_down(hw, ks, pf);
  
        /* Now set the settings that don't rely on link being up/down */
        /* Set autoneg settings */
-       cmd->base.autoneg = ((hw_link_info->an_info & I40E_AQ_AN_COMPLETED) ?
-                         AUTONEG_ENABLE : AUTONEG_DISABLE);
+       ks->base.autoneg = ((hw_link_info->an_info & I40E_AQ_AN_COMPLETED) ?
+                           AUTONEG_ENABLE : AUTONEG_DISABLE);
  
+       /* Set media type settings */
        switch (hw->phy.media_type) {
        case I40E_MEDIA_TYPE_BACKPLANE:
-               ethtool_link_ksettings_add_link_mode(cmd, supported,
-                                                    Autoneg);
-               ethtool_link_ksettings_add_link_mode(cmd, supported,
-                                                    Backplane);
-               ethtool_link_ksettings_add_link_mode(cmd, advertising,
-                                                    Autoneg);
-               ethtool_link_ksettings_add_link_mode(cmd, advertising,
+               ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+               ethtool_link_ksettings_add_link_mode(ks, supported, Backplane);
+               ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+               ethtool_link_ksettings_add_link_mode(ks, advertising,
                                                     Backplane);
-               cmd->base.port = PORT_NONE;
+               ks->base.port = PORT_NONE;
                break;
        case I40E_MEDIA_TYPE_BASET:
-               ethtool_link_ksettings_add_link_mode(cmd, supported, TP);
-               ethtool_link_ksettings_add_link_mode(cmd, advertising, TP);
-               cmd->base.port = PORT_TP;
+               ethtool_link_ksettings_add_link_mode(ks, supported, TP);
+               ethtool_link_ksettings_add_link_mode(ks, advertising, TP);
+               ks->base.port = PORT_TP;
                break;
        case I40E_MEDIA_TYPE_DA:
        case I40E_MEDIA_TYPE_CX4:
-               ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE);
-               ethtool_link_ksettings_add_link_mode(cmd, advertising, FIBRE);
-               cmd->base.port = PORT_DA;
+               ethtool_link_ksettings_add_link_mode(ks, supported, FIBRE);
+               ethtool_link_ksettings_add_link_mode(ks, advertising, FIBRE);
+               ks->base.port = PORT_DA;
                break;
        case I40E_MEDIA_TYPE_FIBER:
-               ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE);
-               cmd->base.port = PORT_FIBRE;
+               ethtool_link_ksettings_add_link_mode(ks, supported, FIBRE);
+               ks->base.port = PORT_FIBRE;
                break;
        case I40E_MEDIA_TYPE_UNKNOWN:
        default:
-               cmd->base.port = PORT_OTHER;
+               ks->base.port = PORT_OTHER;
                break;
        }
  
        /* Set flow control settings */
-       ethtool_link_ksettings_add_link_mode(cmd, supported, Pause);
+       ethtool_link_ksettings_add_link_mode(ks, supported, Pause);
  
        switch (hw->fc.requested_mode) {
        case I40E_FC_FULL:
-               ethtool_link_ksettings_add_link_mode(cmd, advertising,
-                                                    Pause);
+               ethtool_link_ksettings_add_link_mode(ks, advertising, Pause);
                break;
        case I40E_FC_TX_PAUSE:
-               ethtool_link_ksettings_add_link_mode(cmd, advertising,
+               ethtool_link_ksettings_add_link_mode(ks, advertising,
                                                     Asym_Pause);
                break;
        case I40E_FC_RX_PAUSE:
-               ethtool_link_ksettings_add_link_mode(cmd, advertising,
-                                                    Pause);
-               ethtool_link_ksettings_add_link_mode(cmd, advertising,
+               ethtool_link_ksettings_add_link_mode(ks, advertising, Pause);
+               ethtool_link_ksettings_add_link_mode(ks, advertising,
                                                     Asym_Pause);
                break;
        default:
-               ethtool_convert_link_mode_to_legacy_u32(
-                       &advertising, cmd->link_modes.advertising);
-               advertising &= ~(ADVERTISED_Pause | ADVERTISED_Asym_Pause);
-               ethtool_convert_legacy_u32_to_link_mode(
-                       cmd->link_modes.advertising, advertising);
+               ethtool_link_ksettings_del_link_mode(ks, advertising, Pause);
+               ethtool_link_ksettings_del_link_mode(ks, advertising,
+                                                    Asym_Pause);
                break;
        }
  
  }
  
  /**
-  * i40e_set_settings - Set Speed and Duplex
+  * i40e_set_link_ksettings - Set Speed and Duplex
   * @netdev: network interface device structure
-  * @ecmd: ethtool command
+  * @ks: ethtool ksettings
   *
   * Set speed/duplex per media_types advertised/forced
   **/
  static int i40e_set_link_ksettings(struct net_device *netdev,
-                                  const struct ethtool_link_ksettings *cmd)
+                                  const struct ethtool_link_ksettings *ks)
  {
        struct i40e_netdev_priv *np = netdev_priv(netdev);
        struct i40e_aq_get_phy_abilities_resp abilities;
+       struct ethtool_link_ksettings safe_ks;
+       struct ethtool_link_ksettings copy_ks;
        struct i40e_aq_set_phy_config config;
        struct i40e_pf *pf = np->vsi->back;
        struct i40e_vsi *vsi = np->vsi;
        struct i40e_hw *hw = &pf->hw;
-       struct ethtool_link_ksettings safe_cmd;
-       struct ethtool_link_ksettings copy_cmd;
+       bool autoneg_changed = false;
        i40e_status status = 0;
-       bool change = false;
        int timeout = 50;
        int err = 0;
-       u32 autoneg;
-       u32 advertise;
-       u32 tmp;
+       u8 autoneg;
  
        /* Changing port settings is not supported if this isn't the
         * port's controlling PF
                i40e_partition_setting_complaint(pf);
                return -EOPNOTSUPP;
        }
        if (vsi != pf->vsi[pf->lan_vsi])
                return -EOPNOTSUPP;
        if (hw->phy.media_type != I40E_MEDIA_TYPE_BASET &&
            hw->phy.media_type != I40E_MEDIA_TYPE_FIBER &&
            hw->phy.media_type != I40E_MEDIA_TYPE_BACKPLANE &&
            hw->phy.media_type != I40E_MEDIA_TYPE_DA &&
            hw->phy.link_info.link_info & I40E_AQ_LINK_UP)
                return -EOPNOTSUPP;
        if (hw->device_id == I40E_DEV_ID_KX_B ||
            hw->device_id == I40E_DEV_ID_KX_C ||
            hw->device_id == I40E_DEV_ID_20G_KR2 ||
                return -EOPNOTSUPP;
        }
  
-       /* copy the cmd to copy_cmd to avoid modifying the origin */
-       memcpy(&copy_cmd, cmd, sizeof(struct ethtool_link_ksettings));
+       /* copy the ksettings to copy_ks to avoid modifying the origin */
+       memcpy(&copy_ks, ks, sizeof(struct ethtool_link_ksettings));
  
-       /* get our own copy of the bits to check against */
-       memset(&safe_cmd, 0, sizeof(struct ethtool_link_ksettings));
-       i40e_get_link_ksettings(netdev, &safe_cmd);
+       /* save autoneg out of ksettings */
+       autoneg = copy_ks.base.autoneg;
  
-       /* save autoneg and speed out of cmd */
-       autoneg = cmd->base.autoneg;
-       ethtool_convert_link_mode_to_legacy_u32(&advertise,
-                                               cmd->link_modes.advertising);
+       memset(&safe_ks, 0, sizeof(safe_ks));
+       /* Get link modes supported by hardware and check against modes
+        * requested by the user.  Return an error if unsupported mode was set.
+        */
+       i40e_phy_type_to_ethtool(pf, &safe_ks);
+       if (!bitmap_subset(copy_ks.link_modes.advertising,
+                          safe_ks.link_modes.supported,
+                          __ETHTOOL_LINK_MODE_MASK_NBITS))
+               return -EINVAL;
  
-       /* set autoneg and speed back to what they currently are */
-       copy_cmd.base.autoneg = safe_cmd.base.autoneg;
-       ethtool_convert_link_mode_to_legacy_u32(
-               &tmp, safe_cmd.link_modes.advertising);
-       ethtool_convert_legacy_u32_to_link_mode(
-               copy_cmd.link_modes.advertising, tmp);
+       /* get our own copy of the bits to check against */
+       memset(&safe_ks, 0, sizeof(struct ethtool_link_ksettings));
+       safe_ks.base.cmd = copy_ks.base.cmd;
+       safe_ks.base.link_mode_masks_nwords =
+               copy_ks.base.link_mode_masks_nwords;
+       i40e_get_link_ksettings(netdev, &safe_ks);
  
-       copy_cmd.base.cmd = safe_cmd.base.cmd;
+       /* set autoneg back to what it currently is */
+       copy_ks.base.autoneg = safe_ks.base.autoneg;
  
-       /* If copy_cmd and safe_cmd are not the same now, then they are
-        * trying to set something that we do not support
+       /* If copy_ks.base and safe_ks.base are not the same now, then they are
+        * trying to set something that we do not support.
         */
-       if (memcmp(&copy_cmd, &safe_cmd, sizeof(struct ethtool_link_ksettings)))
+       if (memcmp(&copy_ks.base, &safe_ks.base,
+                  sizeof(struct ethtool_link_settings)))
                return -EOPNOTSUPP;
  
        while (test_and_set_bit(__I40E_CONFIG_BUSY, pf->state)) {
                /* If autoneg was not already enabled */
                if (!(hw->phy.link_info.an_info & I40E_AQ_AN_COMPLETED)) {
                        /* If autoneg is not supported, return error */
-                       if (!ethtool_link_ksettings_test_link_mode(
-                                   &safe_cmd, supported, Autoneg)) {
+                       if (!ethtool_link_ksettings_test_link_mode(&safe_ks,
+                                                                  supported,
+                                                                  Autoneg)) {
                                netdev_info(netdev, "Autoneg not supported on this phy\n");
                                err = -EINVAL;
                                goto done;
                        /* Autoneg is allowed to change */
                        config.abilities = abilities.abilities |
                                           I40E_AQ_PHY_ENABLE_AN;
-                       change = true;
+                       autoneg_changed = true;
                }
        } else {
                /* If autoneg is currently enabled */
                        /* If autoneg is supported 10GBASE_T is the only PHY
                         * that can disable it, so otherwise return error
                         */
-                       if (ethtool_link_ksettings_test_link_mode(
-                                   &safe_cmd, supported, Autoneg) &&
+                       if (ethtool_link_ksettings_test_link_mode(&safe_ks,
+                                                                 supported,
+                                                                 Autoneg) &&
                            hw->phy.link_info.phy_type !=
                            I40E_PHY_TYPE_10GBASE_T) {
                                netdev_info(netdev, "Autoneg cannot be disabled on this phy\n");
                        /* Autoneg is allowed to change */
                        config.abilities = abilities.abilities &
                                           ~I40E_AQ_PHY_ENABLE_AN;
-                       change = true;
+                       autoneg_changed = true;
                }
        }
  
-       ethtool_convert_link_mode_to_legacy_u32(&tmp,
-                                               safe_cmd.link_modes.supported);
-       if (advertise & ~tmp) {
-               err = -EINVAL;
-               goto done;
-       }
-       if (advertise & ADVERTISED_100baseT_Full)
+       if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+                                                 100baseT_Full))
                config.link_speed |= I40E_LINK_SPEED_100MB;
-       if (advertise & ADVERTISED_1000baseT_Full ||
-           advertise & ADVERTISED_1000baseKX_Full)
+       if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+                                                 1000baseT_Full) ||
+           ethtool_link_ksettings_test_link_mode(ks, advertising,
+                                                 1000baseX_Full) ||
+           ethtool_link_ksettings_test_link_mode(ks, advertising,
+                                                 1000baseKX_Full))
                config.link_speed |= I40E_LINK_SPEED_1GB;
-       if (advertise & ADVERTISED_10000baseT_Full ||
-           advertise & ADVERTISED_10000baseKX4_Full ||
-           advertise & ADVERTISED_10000baseKR_Full)
+       if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+                                                 10000baseT_Full) ||
+           ethtool_link_ksettings_test_link_mode(ks, advertising,
+                                                 10000baseKX4_Full) ||
+           ethtool_link_ksettings_test_link_mode(ks, advertising,
+                                                 10000baseKR_Full) ||
+           ethtool_link_ksettings_test_link_mode(ks, advertising,
+                                                 10000baseCR_Full) ||
+           ethtool_link_ksettings_test_link_mode(ks, advertising,
+                                                 10000baseSR_Full))
                config.link_speed |= I40E_LINK_SPEED_10GB;
-       if (advertise & ADVERTISED_20000baseKR2_Full)
+       if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+                                                 20000baseKR2_Full))
                config.link_speed |= I40E_LINK_SPEED_20GB;
-       if (advertise & ADVERTISED_40000baseKR4_Full ||
-           advertise & ADVERTISED_40000baseCR4_Full ||
-           advertise & ADVERTISED_40000baseSR4_Full ||
-           advertise & ADVERTISED_40000baseLR4_Full)
+       if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+                                                 25000baseCR_Full) ||
+           ethtool_link_ksettings_test_link_mode(ks, advertising,
+                                                 25000baseKR_Full) ||
+           ethtool_link_ksettings_test_link_mode(ks, advertising,
+                                                 25000baseSR_Full))
+               config.link_speed |= I40E_LINK_SPEED_25GB;
+       if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+                                                 40000baseKR4_Full) ||
+           ethtool_link_ksettings_test_link_mode(ks, advertising,
+                                                 40000baseCR4_Full) ||
+           ethtool_link_ksettings_test_link_mode(ks, advertising,
+                                                 40000baseSR4_Full) ||
+           ethtool_link_ksettings_test_link_mode(ks, advertising,
+                                                 40000baseLR4_Full))
                config.link_speed |= I40E_LINK_SPEED_40GB;
  
        /* If speed didn't get set, set it to what it currently is.
         */
        if (!config.link_speed)
                config.link_speed = abilities.link_speed;
-       if (change || (abilities.link_speed != config.link_speed)) {
+       if (autoneg_changed || abilities.link_speed != config.link_speed) {
                /* copy over the rest of the abilities */
                config.phy_type = abilities.phy_type;
                config.phy_type_ext = abilities.phy_type_ext;
                /* make the aq call */
                status = i40e_aq_set_phy_config(hw, &config, NULL);
                if (status) {
-                       netdev_info(netdev, "Set phy config failed, err %s aq_err %s\n",
+                       netdev_info(netdev,
+                                   "Set phy config failed, err %s aq_err %s\n",
                                    i40e_stat_str(hw, status),
                                    i40e_aq_str(hw, hw->aq.asq_last_status));
                        err = -EAGAIN;
  
                status = i40e_update_link_info(hw);
                if (status)
-                       netdev_dbg(netdev, "Updating link info failed with err %s aq_err %s\n",
+                       netdev_dbg(netdev,
+                                  "Updating link info failed with err %s aq_err %s\n",
                                   i40e_stat_str(hw, status),
                                   i40e_aq_str(hw, hw->aq.asq_last_status));
  
@@@ -1570,7 -1722,7 +1722,7 @@@ static void i40e_get_ethtool_stats(stru
        }
        rcu_read_lock();
        for (j = 0; j < vsi->num_queue_pairs; j++) {
 -              tx_ring = ACCESS_ONCE(vsi->tx_rings[j]);
 +              tx_ring = READ_ONCE(vsi->tx_rings[j]);
  
                if (!tx_ring)
                        continue;
@@@ -2008,7 -2160,9 +2160,9 @@@ static int i40e_set_phys_id(struct net_
                if (!(pf->hw_features & I40E_HW_PHY_CONTROLS_LEDS)) {
                        pf->led_status = i40e_led_get(hw);
                } else {
-                       i40e_aq_set_phy_debug(hw, I40E_PHY_DEBUG_ALL, NULL);
+                       if (!(hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE))
+                               i40e_aq_set_phy_debug(hw, I40E_PHY_DEBUG_ALL,
+                                                     NULL);
                        ret = i40e_led_get_phy(hw, &temp_status,
                                               &pf->phy_led_val);
                        pf->led_status = temp_status;
                        ret = i40e_led_set_phy(hw, false, pf->led_status,
                                               (pf->phy_led_val |
                                               I40E_PHY_LED_MODE_ORIG));
-                       i40e_aq_set_phy_debug(hw, 0, NULL);
+                       if (!(hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE))
+                               i40e_aq_set_phy_debug(hw, 0, NULL);
                }
                break;
        default:
@@@ -2071,14 -2226,13 +2226,13 @@@ static int __i40e_get_coalesce(struct n
        ec->tx_max_coalesced_frames_irq = vsi->work_limit;
        ec->rx_max_coalesced_frames_irq = vsi->work_limit;
  
-       /* rx and tx usecs has per queue value. If user doesn't specify the queue,
-        * return queue 0's value to represent.
+       /* rx and tx usecs has per queue value. If user doesn't specify the
+        * queue, return queue 0's value to represent.
         */
-       if (queue < 0) {
+       if (queue < 0)
                queue = 0;
-       } else if (queue >= vsi->num_queue_pairs) {
+       else if (queue >= vsi->num_queue_pairs)
                return -EINVAL;
-       }
  
        rx_ring = vsi->rx_rings[queue];
        tx_ring = vsi->tx_rings[queue];
        ec->rx_coalesce_usecs = rx_ring->rx_itr_setting & ~I40E_ITR_DYNAMIC;
        ec->tx_coalesce_usecs = tx_ring->tx_itr_setting & ~I40E_ITR_DYNAMIC;
  
        /* we use the _usecs_high to store/set the interrupt rate limit
         * that the hardware supports, that almost but not quite
         * fits the original intent of the ethtool variable,
@@@ -2142,7 -2295,6 +2295,6 @@@ static int i40e_get_per_queue_coalesce(
   *
   * Change the ITR settings for a specific queue.
   **/
  static void i40e_set_itr_per_queue(struct i40e_vsi *vsi,
                                   struct ethtool_coalesce *ec,
                                   int queue)
@@@ -2264,8 -2416,8 +2416,8 @@@ static int __i40e_set_coalesce(struct n
                           vsi->int_rate_limit);
        }
  
-       /* rx and tx usecs has per queue value. If user doesn't specify the queue,
-        * apply to all queues.
+       /* rx and tx usecs has per queue value. If user doesn't specify the
+        * queue, apply to all queues.
         */
        if (queue < 0) {
                for (i = 0; i < vsi->num_queue_pairs; i++)
@@@ -2647,7 -2799,7 +2799,7 @@@ static int i40e_get_rxnfc(struct net_de
  
        switch (cmd->cmd) {
        case ETHTOOL_GRXRINGS:
-               cmd->data = vsi->num_queue_pairs;
+               cmd->data = vsi->rss_size;
                ret = 0;
                break;
        case ETHTOOL_GRXFH:
@@@ -3892,6 -4044,12 +4044,12 @@@ static int i40e_set_channels(struct net
        if (vsi->type != I40E_VSI_MAIN)
                return -EINVAL;
  
+       /* We do not support setting channels via ethtool when TCs are
+        * configured through mqprio
+        */
+       if (pf->flags & I40E_FLAG_TC_MQPRIO)
+               return -EINVAL;
        /* verify they are not requesting separate vectors */
        if (!count || ch->rx_count || ch->tx_count)
                return -EINVAL;
@@@ -3959,6 -4117,16 +4117,16 @@@ static u32 i40e_get_rxfh_indir_size(str
        return I40E_HLUT_ARRAY_SIZE;
  }
  
+ /**
+  * i40e_get_rxfh - get the rx flow hash indirection table
+  * @netdev: network interface device structure
+  * @indir: indirection table
+  * @key: hash key
+  * @hfunc: hash function
+  *
+  * Reads the indirection table directly from the hardware. Returns 0 on
+  * success.
+  **/
  static int i40e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
                         u8 *hfunc)
  {
@@@ -4090,7 -4258,7 +4258,7 @@@ static int i40e_set_priv_flags(struct n
        struct i40e_netdev_priv *np = netdev_priv(dev);
        struct i40e_vsi *vsi = np->vsi;
        struct i40e_pf *pf = vsi->back;
-       u64 orig_flags, new_flags, changed_flags;
+       u32 orig_flags, new_flags, changed_flags;
        u32 i, j;
  
        orig_flags = READ_ONCE(pf->flags);
@@@ -4142,12 -4310,12 +4310,12 @@@ flags_complete
                return -EOPNOTSUPP;
  
        /* Compare and exchange the new flags into place. If we failed, that
-        * is if cmpxchg64 returns anything but the old value, this means that
+        * is if cmpxchg returns anything but the old value, this means that
         * something else has modified the flags variable since we copied it
         * originally. We'll just punt with an error and log something in the
         * message buffer.
         */
-       if (cmpxchg64(&pf->flags, orig_flags, new_flags) != orig_flags) {
+       if (cmpxchg(&pf->flags, orig_flags, new_flags) != orig_flags) {
                dev_warn(&pf->pdev->dev,
                         "Unable to update pf->flags as it was modified by another thread...\n");
                return -EAGAIN;
                        sw_flags = I40E_AQ_SET_SWITCH_CFG_PROMISC;
                valid_flags = I40E_AQ_SET_SWITCH_CFG_PROMISC;
                ret = i40e_aq_set_switch_config(&pf->hw, sw_flags, valid_flags,
-                                               NULL);
+                                               0, NULL);
                if (ret && pf->hw.aq.asq_last_status != I40E_AQ_RC_ESRCH) {
                        dev_info(&pf->pdev->dev,
                                 "couldn't set switch config bits, err %s aq_err %s\n",
        /* Issue reset to cause things to take effect, as additional bits
         * are added we will need to create a mask of bits requiring reset
         */
-       if ((changed_flags & I40E_FLAG_VEB_STATS_ENABLED) ||
-           ((changed_flags & I40E_FLAG_LEGACY_RX) && netif_running(dev)))
+       if (changed_flags & (I40E_FLAG_VEB_STATS_ENABLED |
+                            I40E_FLAG_LEGACY_RX |
+                            I40E_FLAG_SOURCE_PRUNING_DISABLED))
                i40e_do_reset(pf, BIT(__I40E_PF_RESET_REQUESTED), true);
  
        return 0;
  }
  
+ /**
+  * i40e_get_module_info - get (Q)SFP+ module type info
+  * @netdev: network interface device structure
+  * @modinfo: module EEPROM size and layout information structure
+  **/
+ static int i40e_get_module_info(struct net_device *netdev,
+                               struct ethtool_modinfo *modinfo)
+ {
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_vsi *vsi = np->vsi;
+       struct i40e_pf *pf = vsi->back;
+       struct i40e_hw *hw = &pf->hw;
+       u32 sff8472_comp = 0;
+       u32 sff8472_swap = 0;
+       u32 sff8636_rev = 0;
+       i40e_status status;
+       u32 type = 0;
+       /* Check if firmware supports reading module EEPROM. */
+       if (!(hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE)) {
+               netdev_err(vsi->netdev, "Module EEPROM memory read not supported. Please update the NVM image.\n");
+               return -EINVAL;
+       }
+       status = i40e_update_link_info(hw);
+       if (status)
+               return -EIO;
+       if (hw->phy.link_info.phy_type == I40E_PHY_TYPE_EMPTY) {
+               netdev_err(vsi->netdev, "Cannot read module EEPROM memory. No module connected.\n");
+               return -EINVAL;
+       }
+       type = hw->phy.link_info.module_type[0];
+       switch (type) {
+       case I40E_MODULE_TYPE_SFP:
+               status = i40e_aq_get_phy_register(hw,
+                               I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE,
+                               I40E_I2C_EEPROM_DEV_ADDR,
+                               I40E_MODULE_SFF_8472_COMP,
+                               &sff8472_comp, NULL);
+               if (status)
+                       return -EIO;
+               status = i40e_aq_get_phy_register(hw,
+                               I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE,
+                               I40E_I2C_EEPROM_DEV_ADDR,
+                               I40E_MODULE_SFF_8472_SWAP,
+                               &sff8472_swap, NULL);
+               if (status)
+                       return -EIO;
+               /* Check if the module requires address swap to access
+                * the other EEPROM memory page.
+                */
+               if (sff8472_swap & I40E_MODULE_SFF_ADDR_MODE) {
+                       netdev_warn(vsi->netdev, "Module address swap to access page 0xA2 is not supported.\n");
+                       modinfo->type = ETH_MODULE_SFF_8079;
+                       modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
+               } else if (sff8472_comp == 0x00) {
+                       /* Module is not SFF-8472 compliant */
+                       modinfo->type = ETH_MODULE_SFF_8079;
+                       modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
+               } else {
+                       modinfo->type = ETH_MODULE_SFF_8472;
+                       modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+               }
+               break;
+       case I40E_MODULE_TYPE_QSFP_PLUS:
+               /* Read from memory page 0. */
+               status = i40e_aq_get_phy_register(hw,
+                               I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE,
+                               0,
+                               I40E_MODULE_REVISION_ADDR,
+                               &sff8636_rev, NULL);
+               if (status)
+                       return -EIO;
+               /* Determine revision compliance byte */
+               if (sff8636_rev > 0x02) {
+                       /* Module is SFF-8636 compliant */
+                       modinfo->type = ETH_MODULE_SFF_8636;
+                       modinfo->eeprom_len = I40E_MODULE_QSFP_MAX_LEN;
+               } else {
+                       modinfo->type = ETH_MODULE_SFF_8436;
+                       modinfo->eeprom_len = I40E_MODULE_QSFP_MAX_LEN;
+               }
+               break;
+       case I40E_MODULE_TYPE_QSFP28:
+               modinfo->type = ETH_MODULE_SFF_8636;
+               modinfo->eeprom_len = I40E_MODULE_QSFP_MAX_LEN;
+               break;
+       default:
+               netdev_err(vsi->netdev, "Module type unrecognized\n");
+               return -EINVAL;
+       }
+       return 0;
+ }
+ /**
+  * i40e_get_module_eeprom - fills buffer with (Q)SFP+ module memory contents
+  * @netdev: network interface device structure
+  * @ee: EEPROM dump request structure
+  * @data: buffer to be filled with EEPROM contents
+  **/
+ static int i40e_get_module_eeprom(struct net_device *netdev,
+                                 struct ethtool_eeprom *ee,
+                                 u8 *data)
+ {
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_vsi *vsi = np->vsi;
+       struct i40e_pf *pf = vsi->back;
+       struct i40e_hw *hw = &pf->hw;
+       bool is_sfp = false;
+       i40e_status status;
+       u32 value = 0;
+       int i;
+       if (!ee || !ee->len || !data)
+               return -EINVAL;
+       if (hw->phy.link_info.module_type[0] == I40E_MODULE_TYPE_SFP)
+               is_sfp = true;
+       for (i = 0; i < ee->len; i++) {
+               u32 offset = i + ee->offset;
+               u32 addr = is_sfp ? I40E_I2C_EEPROM_DEV_ADDR : 0;
+               /* Check if we need to access the other memory page */
+               if (is_sfp) {
+                       if (offset >= ETH_MODULE_SFF_8079_LEN) {
+                               offset -= ETH_MODULE_SFF_8079_LEN;
+                               addr = I40E_I2C_EEPROM_DEV_ADDR2;
+                       }
+               } else {
+                       while (offset >= ETH_MODULE_SFF_8436_LEN) {
+                               /* Compute memory page number and offset. */
+                               offset -= ETH_MODULE_SFF_8436_LEN / 2;
+                               addr++;
+                       }
+               }
+               status = i40e_aq_get_phy_register(hw,
+                               I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE,
+                               addr, offset, &value, NULL);
+               if (status)
+                       return -EIO;
+               data[i] = value;
+       }
+       return 0;
+ }
  static const struct ethtool_ops i40e_ethtool_ops = {
        .get_drvinfo            = i40e_get_drvinfo,
        .get_regs_len           = i40e_get_regs_len,
        .set_rxfh               = i40e_set_rxfh,
        .get_channels           = i40e_get_channels,
        .set_channels           = i40e_set_channels,
+       .get_module_info        = i40e_get_module_info,
+       .get_module_eeprom      = i40e_get_module_eeprom,
        .get_ts_info            = i40e_get_ts_info,
        .get_priv_flags         = i40e_get_priv_flags,
        .set_priv_flags         = i40e_set_priv_flags,
index de1fcac7834de30173109b38001554b267e697d3,17e6f64299cf94747561fa5fb260faf967a18004..4a964d6e4a9ebcdb7b55b157bb9b6006a5fd2aa8
@@@ -69,6 -69,15 +69,15 @@@ static int i40e_reset(struct i40e_pf *p
  static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired);
  static void i40e_fdir_sb_setup(struct i40e_pf *pf);
  static int i40e_veb_get_bw_info(struct i40e_veb *veb);
+ static int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,
+                                    struct i40e_cloud_filter *filter,
+                                    bool add);
+ static int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
+                                            struct i40e_cloud_filter *filter,
+                                            bool add);
+ static int i40e_get_capabilities(struct i40e_pf *pf,
+                                enum i40e_admin_queue_opc list_type);
  
  /* i40e_pci_tbl - PCI Device ID Table
   *
@@@ -455,7 -464,7 +464,7 @@@ static void i40e_get_netdev_stats_struc
                u64 bytes, packets;
                unsigned int start;
  
 -              tx_ring = ACCESS_ONCE(vsi->tx_rings[i]);
 +              tx_ring = READ_ONCE(vsi->tx_rings[i]);
                if (!tx_ring)
                        continue;
                i40e_get_netdev_stats_struct_tx(tx_ring, stats);
@@@ -599,6 -608,20 +608,20 @@@ static void i40e_stat_update32(struct i
                *stat = (u32)((new_data + BIT_ULL(32)) - *offset);
  }
  
+ /**
+  * i40e_stat_update_and_clear32 - read and clear hw reg, update a 32 bit stat
+  * @hw: ptr to the hardware info
+  * @reg: the hw reg to read and clear
+  * @stat: ptr to the stat
+  **/
+ static void i40e_stat_update_and_clear32(struct i40e_hw *hw, u32 reg, u64 *stat)
+ {
+       u32 new_data = rd32(hw, reg);
+       wr32(hw, reg, 1); /* must write a nonzero value to clear register */
+       *stat += new_data;
+ }
  /**
   * i40e_update_eth_stats - Update VSI-specific ethernet statistics counters.
   * @vsi: the VSI to be updated
@@@ -791,7 -814,7 +814,7 @@@ static void i40e_update_vsi_stats(struc
        rcu_read_lock();
        for (q = 0; q < vsi->num_queue_pairs; q++) {
                /* locate Tx ring */
 -              p = ACCESS_ONCE(vsi->tx_rings[q]);
 +              p = READ_ONCE(vsi->tx_rings[q]);
  
                do {
                        start = u64_stats_fetch_begin_irq(&p->syncp);
@@@ -1040,18 -1063,15 +1063,15 @@@ static void i40e_update_pf_stats(struc
                           &osd->rx_jabber, &nsd->rx_jabber);
  
        /* FDIR stats */
-       i40e_stat_update32(hw,
-                          I40E_GLQF_PCNT(I40E_FD_ATR_STAT_IDX(pf->hw.pf_id)),
-                          pf->stat_offsets_loaded,
-                          &osd->fd_atr_match, &nsd->fd_atr_match);
-       i40e_stat_update32(hw,
-                          I40E_GLQF_PCNT(I40E_FD_SB_STAT_IDX(pf->hw.pf_id)),
-                          pf->stat_offsets_loaded,
-                          &osd->fd_sb_match, &nsd->fd_sb_match);
-       i40e_stat_update32(hw,
-                     I40E_GLQF_PCNT(I40E_FD_ATR_TUNNEL_STAT_IDX(pf->hw.pf_id)),
-                     pf->stat_offsets_loaded,
-                     &osd->fd_atr_tunnel_match, &nsd->fd_atr_tunnel_match);
+       i40e_stat_update_and_clear32(hw,
+                       I40E_GLQF_PCNT(I40E_FD_ATR_STAT_IDX(hw->pf_id)),
+                       &nsd->fd_atr_match);
+       i40e_stat_update_and_clear32(hw,
+                       I40E_GLQF_PCNT(I40E_FD_SB_STAT_IDX(hw->pf_id)),
+                       &nsd->fd_sb_match);
+       i40e_stat_update_and_clear32(hw,
+                       I40E_GLQF_PCNT(I40E_FD_ATR_TUNNEL_STAT_IDX(hw->pf_id)),
+                       &nsd->fd_atr_tunnel_match);
  
        val = rd32(hw, I40E_PRTPM_EEE_STAT);
        nsd->tx_lpi_status =
@@@ -1577,6 -1597,170 +1597,170 @@@ static int i40e_set_mac(struct net_devi
        return 0;
  }
  
+ /**
+  * i40e_config_rss_aq - Prepare for RSS using AQ commands
+  * @vsi: vsi structure
+  * @seed: RSS hash seed
+  **/
+ static int i40e_config_rss_aq(struct i40e_vsi *vsi, const u8 *seed,
+                             u8 *lut, u16 lut_size)
+ {
+       struct i40e_pf *pf = vsi->back;
+       struct i40e_hw *hw = &pf->hw;
+       int ret = 0;
+       if (seed) {
+               struct i40e_aqc_get_set_rss_key_data *seed_dw =
+                       (struct i40e_aqc_get_set_rss_key_data *)seed;
+               ret = i40e_aq_set_rss_key(hw, vsi->id, seed_dw);
+               if (ret) {
+                       dev_info(&pf->pdev->dev,
+                                "Cannot set RSS key, err %s aq_err %s\n",
+                                i40e_stat_str(hw, ret),
+                                i40e_aq_str(hw, hw->aq.asq_last_status));
+                       return ret;
+               }
+       }
+       if (lut) {
+               bool pf_lut = vsi->type == I40E_VSI_MAIN ? true : false;
+               ret = i40e_aq_set_rss_lut(hw, vsi->id, pf_lut, lut, lut_size);
+               if (ret) {
+                       dev_info(&pf->pdev->dev,
+                                "Cannot set RSS lut, err %s aq_err %s\n",
+                                i40e_stat_str(hw, ret),
+                                i40e_aq_str(hw, hw->aq.asq_last_status));
+                       return ret;
+               }
+       }
+       return ret;
+ }
+ /**
+  * i40e_vsi_config_rss - Prepare for VSI(VMDq) RSS if used
+  * @vsi: VSI structure
+  **/
+ static int i40e_vsi_config_rss(struct i40e_vsi *vsi)
+ {
+       struct i40e_pf *pf = vsi->back;
+       u8 seed[I40E_HKEY_ARRAY_SIZE];
+       u8 *lut;
+       int ret;
+       if (!(pf->hw_features & I40E_HW_RSS_AQ_CAPABLE))
+               return 0;
+       if (!vsi->rss_size)
+               vsi->rss_size = min_t(int, pf->alloc_rss_size,
+                                     vsi->num_queue_pairs);
+       if (!vsi->rss_size)
+               return -EINVAL;
+       lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
+       if (!lut)
+               return -ENOMEM;
+       /* Use the user configured hash keys and lookup table if there is one,
+        * otherwise use default
+        */
+       if (vsi->rss_lut_user)
+               memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size);
+       else
+               i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size);
+       if (vsi->rss_hkey_user)
+               memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE);
+       else
+               netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE);
+       ret = i40e_config_rss_aq(vsi, seed, lut, vsi->rss_table_size);
+       kfree(lut);
+       return ret;
+ }
+ /**
+  * i40e_vsi_setup_queue_map_mqprio - Prepares mqprio based tc_config
+  * @vsi: the VSI being configured,
+  * @ctxt: VSI context structure
+  * @enabled_tc: number of traffic classes to enable
+  *
+  * Prepares VSI tc_config to have queue configurations based on MQPRIO options.
+  **/
+ static int i40e_vsi_setup_queue_map_mqprio(struct i40e_vsi *vsi,
+                                          struct i40e_vsi_context *ctxt,
+                                          u8 enabled_tc)
+ {
+       u16 qcount = 0, max_qcount, qmap, sections = 0;
+       int i, override_q, pow, num_qps, ret;
+       u8 netdev_tc = 0, offset = 0;
+       if (vsi->type != I40E_VSI_MAIN)
+               return -EINVAL;
+       sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID;
+       sections |= I40E_AQ_VSI_PROP_SCHED_VALID;
+       vsi->tc_config.numtc = vsi->mqprio_qopt.qopt.num_tc;
+       vsi->tc_config.enabled_tc = enabled_tc ? enabled_tc : 1;
+       num_qps = vsi->mqprio_qopt.qopt.count[0];
+       /* find the next higher power-of-2 of num queue pairs */
+       pow = ilog2(num_qps);
+       if (!is_power_of_2(num_qps))
+               pow++;
+       qmap = (offset << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) |
+               (pow << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT);
+       /* Setup queue offset/count for all TCs for given VSI */
+       max_qcount = vsi->mqprio_qopt.qopt.count[0];
+       for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+               /* See if the given TC is enabled for the given VSI */
+               if (vsi->tc_config.enabled_tc & BIT(i)) {
+                       offset = vsi->mqprio_qopt.qopt.offset[i];
+                       qcount = vsi->mqprio_qopt.qopt.count[i];
+                       if (qcount > max_qcount)
+                               max_qcount = qcount;
+                       vsi->tc_config.tc_info[i].qoffset = offset;
+                       vsi->tc_config.tc_info[i].qcount = qcount;
+                       vsi->tc_config.tc_info[i].netdev_tc = netdev_tc++;
+               } else {
+                       /* TC is not enabled so set the offset to
+                        * default queue and allocate one queue
+                        * for the given TC.
+                        */
+                       vsi->tc_config.tc_info[i].qoffset = 0;
+                       vsi->tc_config.tc_info[i].qcount = 1;
+                       vsi->tc_config.tc_info[i].netdev_tc = 0;
+               }
+       }
+       /* Set actual Tx/Rx queue pairs */
+       vsi->num_queue_pairs = offset + qcount;
+       /* Setup queue TC[0].qmap for given VSI context */
+       ctxt->info.tc_mapping[0] = cpu_to_le16(qmap);
+       ctxt->info.mapping_flags |= cpu_to_le16(I40E_AQ_VSI_QUE_MAP_CONTIG);
+       ctxt->info.queue_mapping[0] = cpu_to_le16(vsi->base_queue);
+       ctxt->info.valid_sections |= cpu_to_le16(sections);
+       /* Reconfigure RSS for main VSI with max queue count */
+       vsi->rss_size = max_qcount;
+       ret = i40e_vsi_config_rss(vsi);
+       if (ret) {
+               dev_info(&vsi->back->pdev->dev,
+                        "Failed to reconfig rss for num_queues (%u)\n",
+                        max_qcount);
+               return ret;
+       }
+       vsi->reconfig_rss = true;
+       dev_dbg(&vsi->back->pdev->dev,
+               "Reconfigured rss with num_queues (%u)\n", max_qcount);
+       /* Find queue count available for channel VSIs and starting offset
+        * for channel VSIs
+        */
+       override_q = vsi->mqprio_qopt.qopt.count[0];
+       if (override_q && override_q < vsi->num_queue_pairs) {
+               vsi->cnt_q_avail = vsi->num_queue_pairs - override_q;
+               vsi->next_base_queue = override_q;
+       }
+       return 0;
+ }
  /**
   * i40e_vsi_setup_queue_map - Setup a VSI queue map based on enabled_tc
   * @vsi: the VSI being setup
@@@ -1615,7 -1799,7 +1799,7 @@@ static void i40e_vsi_setup_queue_map(st
                        numtc = 1;
                }
        } else {
-               /* At least TC0 is enabled in case of non-DCB case */
+               /* At least TC0 is enabled in non-DCB, non-MQPRIO case */
                numtc = 1;
        }
  
@@@ -1765,11 -1949,6 +1949,6 @@@ static void i40e_set_rx_mode(struct net
                vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
                vsi->back->flags |= I40E_FLAG_FILTER_SYNC;
        }
-       /* schedule our worker thread which will take care of
-        * applying the new filter changes
-        */
-       i40e_service_event_schedule(vsi->back);
  }
  
  /**
@@@ -2873,22 -3052,18 +3052,18 @@@ static void i40e_vsi_free_rx_resources(
   **/
  static void i40e_config_xps_tx_ring(struct i40e_ring *ring)
  {
-       struct i40e_vsi *vsi = ring->vsi;
+       int cpu;
  
-       if (!ring->q_vector || !ring->netdev)
+       if (!ring->q_vector || !ring->netdev || ring->ch)
                return;
  
-       if ((vsi->tc_config.numtc <= 1) &&
-           !test_and_set_bit(__I40E_TX_XPS_INIT_DONE, &ring->state)) {
-               netif_set_xps_queue(ring->netdev,
-                                   get_cpu_mask(ring->q_vector->v_idx),
-                                   ring->queue_index);
-       }
+       /* We only initialize XPS once, so as not to overwrite user settings */
+       if (test_and_set_bit(__I40E_TX_XPS_INIT_DONE, ring->state))
+               return;
  
-       /* schedule our worker thread which will take care of
-        * applying the new filter changes
-        */
-       i40e_service_event_schedule(vsi->back);
+       cpu = cpumask_local_spread(ring->q_vector->v_idx, -1);
+       netif_set_xps_queue(ring->netdev, get_cpu_mask(cpu),
+                           ring->queue_index);
  }
  
  /**
@@@ -2942,7 -3117,14 +3117,14 @@@ static int i40e_configure_tx_ring(struc
         * initialization. This has to be done regardless of
         * DCB as by default everything is mapped to TC0.
         */
-       tx_ctx.rdylist = le16_to_cpu(vsi->info.qs_handle[ring->dcb_tc]);
+       if (ring->ch)
+               tx_ctx.rdylist =
+                       le16_to_cpu(ring->ch->info.qs_handle[ring->dcb_tc]);
+       else
+               tx_ctx.rdylist = le16_to_cpu(vsi->info.qs_handle[ring->dcb_tc]);
        tx_ctx.rdylist_act = 0;
  
        /* clear the context in the HMC */
        }
  
        /* Now associate this queue with this PCI function */
-       if (vsi->type == I40E_VSI_VMDQ2) {
-               qtx_ctl = I40E_QTX_CTL_VM_QUEUE;
-               qtx_ctl |= ((vsi->id) << I40E_QTX_CTL_VFVM_INDX_SHIFT) &
-                          I40E_QTX_CTL_VFVM_INDX_MASK;
+       if (ring->ch) {
+               if (ring->ch->type == I40E_VSI_VMDQ2)
+                       qtx_ctl = I40E_QTX_CTL_VM_QUEUE;
+               else
+                       return -EINVAL;
+               qtx_ctl |= (ring->ch->vsi_number <<
+                           I40E_QTX_CTL_VFVM_INDX_SHIFT) &
+                           I40E_QTX_CTL_VFVM_INDX_MASK;
        } else {
-               qtx_ctl = I40E_QTX_CTL_PF_QUEUE;
+               if (vsi->type == I40E_VSI_VMDQ2) {
+                       qtx_ctl = I40E_QTX_CTL_VM_QUEUE;
+                       qtx_ctl |= ((vsi->id) << I40E_QTX_CTL_VFVM_INDX_SHIFT) &
+                                   I40E_QTX_CTL_VFVM_INDX_MASK;
+               } else {
+                       qtx_ctl = I40E_QTX_CTL_PF_QUEUE;
+               }
        }
  
        qtx_ctl |= ((hw->pf_id << I40E_QTX_CTL_PF_INDX_SHIFT) &
@@@ -2998,7 -3191,7 +3191,7 @@@ static int i40e_configure_rx_ring(struc
        struct i40e_hmc_obj_rxq rx_ctx;
        i40e_status err = 0;
  
-       ring->state = 0;
+       bitmap_zero(ring->state, __I40E_RING_STATE_NBITS);
  
        /* clear the context structure first */
        memset(&rx_ctx, 0, sizeof(rx_ctx));
        if (hw->revision_id == 0)
                rx_ctx.lrxqthresh = 0;
        else
-               rx_ctx.lrxqthresh = 2;
+               rx_ctx.lrxqthresh = 1;
        rx_ctx.crcstrip = 1;
        rx_ctx.l2tsel = 1;
        /* this controls whether VLAN is stripped from inner headers */
@@@ -3138,6 -3331,7 +3331,7 @@@ static void i40e_vsi_config_dcb_rings(s
                        rx_ring->dcb_tc = 0;
                        tx_ring->dcb_tc = 0;
                }
+               return;
        }
  
        for (n = 0; n < I40E_MAX_TRAFFIC_CLASS; n++) {
@@@ -3396,15 -3590,14 +3590,14 @@@ void i40e_irq_dynamic_disable_icr0(stru
  /**
   * i40e_irq_dynamic_enable_icr0 - Enable default interrupt generation for icr0
   * @pf: board private structure
-  * @clearpba: true when all pending interrupt events should be cleared
   **/
- void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf, bool clearpba)
+ void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf)
  {
        struct i40e_hw *hw = &pf->hw;
        u32 val;
  
        val = I40E_PFINT_DYN_CTL0_INTENA_MASK   |
-             (clearpba ? I40E_PFINT_DYN_CTL0_CLEARPBA_MASK : 0) |
+             I40E_PFINT_DYN_CTL0_CLEARPBA_MASK |
              (I40E_ITR_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT);
  
        wr32(hw, I40E_PFINT_DYN_CTL0, val);
@@@ -3471,6 -3664,7 +3664,7 @@@ static int i40e_vsi_request_irq_msix(st
        int tx_int_idx = 0;
        int vector, err;
        int irq_num;
+       int cpu;
  
        for (vector = 0; vector < q_vectors; vector++) {
                struct i40e_q_vector *q_vector = vsi->q_vectors[vector];
                q_vector->affinity_notify.notify = i40e_irq_affinity_notify;
                q_vector->affinity_notify.release = i40e_irq_affinity_release;
                irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify);
-               /* get_cpu_mask returns a static constant mask with
-                * a permanent lifetime so it's ok to use here.
+               /* Spread affinity hints out across online CPUs.
+                *
+                * get_cpu_mask returns a static constant mask with
+                * a permanent lifetime so it's ok to pass to
+                * irq_set_affinity_hint without making a copy.
                 */
-               irq_set_affinity_hint(irq_num, get_cpu_mask(q_vector->v_idx));
+               cpu = cpumask_local_spread(q_vector->v_idx, -1);
+               irq_set_affinity_hint(irq_num, get_cpu_mask(cpu));
        }
  
        vsi->irqs_ready = true;
@@@ -3585,7 -3783,7 +3783,7 @@@ static int i40e_vsi_enable_irq(struct i
                for (i = 0; i < vsi->num_q_vectors; i++)
                        i40e_irq_dynamic_enable(vsi, i);
        } else {
-               i40e_irq_dynamic_enable_icr0(pf, true);
+               i40e_irq_dynamic_enable_icr0(pf);
        }
  
        i40e_flush(&pf->hw);
  }
  
  /**
-  * i40e_stop_misc_vector - Stop the vector that handles non-queue events
+  * i40e_free_misc_vector - Free the vector that handles non-queue events
   * @pf: board private structure
   **/
- static void i40e_stop_misc_vector(struct i40e_pf *pf)
+ static void i40e_free_misc_vector(struct i40e_pf *pf)
  {
        /* Disable ICR 0 */
        wr32(&pf->hw, I40E_PFINT_ICR0_ENA, 0);
        i40e_flush(&pf->hw);
+       if (pf->flags & I40E_FLAG_MSIX_ENABLED && pf->msix_entries) {
+               synchronize_irq(pf->msix_entries[0].vector);
+               free_irq(pf->msix_entries[0].vector, pf);
+               clear_bit(__I40E_MISC_IRQ_REQUESTED, pf->state);
+       }
  }
  
  /**
@@@ -3728,7 -3932,7 +3932,7 @@@ enable_intr
        wr32(hw, I40E_PFINT_ICR0_ENA, ena_mask);
        if (!test_bit(__I40E_DOWN, pf->state)) {
                i40e_service_event_schedule(pf);
-               i40e_irq_dynamic_enable_icr0(pf, false);
+               i40e_irq_dynamic_enable_icr0(pf);
        }
  
        return ret;
@@@ -4455,11 -4659,7 +4659,7 @@@ static void i40e_clear_interrupt_scheme
  {
        int i;
  
-       i40e_stop_misc_vector(pf);
-       if (pf->flags & I40E_FLAG_MSIX_ENABLED && pf->msix_entries) {
-               synchronize_irq(pf->msix_entries[0].vector);
-               free_irq(pf->msix_entries[0].vector, pf);
-       }
+       i40e_free_misc_vector(pf);
  
        i40e_put_lump(pf->irq_pile, pf->iwarp_base_vector,
                      I40E_IWARP_IRQ_PILE_ID);
@@@ -4847,6 -5047,24 +5047,24 @@@ static u8 i40e_dcb_get_enabled_tc(struc
        return enabled_tc;
  }
  
+ /**
+  * i40e_mqprio_get_enabled_tc - Get enabled traffic classes
+  * @pf: PF being queried
+  *
+  * Query the current MQPRIO configuration and return the number of
+  * traffic classes enabled.
+  **/
+ static u8 i40e_mqprio_get_enabled_tc(struct i40e_pf *pf)
+ {
+       struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
+       u8 num_tc = vsi->mqprio_qopt.qopt.num_tc;
+       u8 enabled_tc = 1, i;
+       for (i = 1; i < num_tc; i++)
+               enabled_tc |= BIT(i);
+       return enabled_tc;
+ }
  /**
   * i40e_pf_get_num_tc - Get enabled traffic classes for PF
   * @pf: PF being queried
@@@ -4860,7 -5078,10 +5078,10 @@@ static u8 i40e_pf_get_num_tc(struct i40
        u8 num_tc = 0;
        struct i40e_dcbx_config *dcbcfg = &hw->local_dcbx_config;
  
-       /* If DCB is not enabled then always in single TC */
+       if (pf->flags & I40E_FLAG_TC_MQPRIO)
+               return pf->vsi[pf->lan_vsi]->mqprio_qopt.qopt.num_tc;
+       /* If neither MQPRIO nor DCB is enabled, then always use single TC */
        if (!(pf->flags & I40E_FLAG_DCB_ENABLED))
                return 1;
  
   **/
  static u8 i40e_pf_get_tc_map(struct i40e_pf *pf)
  {
-       /* If DCB is not enabled for this PF then just return default TC */
+       if (pf->flags & I40E_FLAG_TC_MQPRIO)
+               return i40e_mqprio_get_enabled_tc(pf);
+       /* If neither MQPRIO nor DCB is enabled for this PF then just return
+        * default TC
+        */
        if (!(pf->flags & I40E_FLAG_DCB_ENABLED))
                return I40E_DEFAULT_TRAFFIC_CLASS;
  
@@@ -4979,6 -5205,16 +5205,16 @@@ static int i40e_vsi_configure_bw_alloc(
        i40e_status ret;
        int i;
  
+       if (vsi->back->flags & I40E_FLAG_TC_MQPRIO)
+               return 0;
+       if (!vsi->mqprio_qopt.qopt.hw) {
+               ret = i40e_set_bw_limit(vsi, vsi->seid, 0);
+               if (ret)
+                       dev_info(&vsi->back->pdev->dev,
+                                "Failed to reset tx rate for vsi->seid %u\n",
+                                vsi->seid);
+               return ret;
+       }
        bw_data.tc_valid_bits = enabled_tc;
        for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
                bw_data.tc_bw_credits[i] = bw_share[i];
@@@ -5041,6 -5277,9 +5277,9 @@@ static void i40e_vsi_config_netdev_tc(s
                                        vsi->tc_config.tc_info[i].qoffset);
        }
  
+       if (pf->flags & I40E_FLAG_TC_MQPRIO)
+               return;
        /* Assign UP2TC map for the VSI */
        for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
                /* Get the actual TC# for the UP */
@@@ -5091,7 -5330,8 +5330,8 @@@ static int i40e_vsi_config_tc(struct i4
        int i;
  
        /* Check if enabled_tc is same as existing or new TCs */
-       if (vsi->tc_config.enabled_tc == enabled_tc)
+       if (vsi->tc_config.enabled_tc == enabled_tc &&
+           vsi->mqprio_qopt.mode != TC_MQPRIO_MODE_CHANNEL)
                return ret;
  
        /* Enable ETS TCs with equal BW Share for now across all VSIs */
        ctxt.vf_num = 0;
        ctxt.uplink_seid = vsi->uplink_seid;
        ctxt.info = vsi->info;
-       i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, false);
+       if (vsi->back->flags & I40E_FLAG_TC_MQPRIO) {
+               ret = i40e_vsi_setup_queue_map_mqprio(vsi, &ctxt, enabled_tc);
+               if (ret)
+                       goto out;
+       } else {
+               i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, false);
+       }
  
+       /* On destroying the qdisc, reset vsi->rss_size, as number of enabled
+        * queues changed.
+        */
+       if (!vsi->mqprio_qopt.qopt.hw && vsi->reconfig_rss) {
+               vsi->rss_size = min_t(int, vsi->back->alloc_rss_size,
+                                     vsi->num_queue_pairs);
+               ret = i40e_vsi_config_rss(vsi);
+               if (ret) {
+                       dev_info(&vsi->back->pdev->dev,
+                                "Failed to reconfig rss for num_queues\n");
+                       return ret;
+               }
+               vsi->reconfig_rss = false;
+       }
        if (vsi->back->flags & I40E_FLAG_IWARP_ENABLED) {
                ctxt.info.valid_sections |=
                                cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID);
                ctxt.info.queueing_opt_flags |= I40E_AQ_VSI_QUE_OPT_TCP_ENA;
        }
  
-       /* Update the VSI after updating the VSI queue-mapping information */
+       /* Update the VSI after updating the VSI queue-mapping
+        * information
+        */
        ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL);
        if (ret) {
                dev_info(&vsi->back->pdev->dev,
  }
  
  /**
-  * i40e_veb_config_tc - Configure TCs for given VEB
-  * @veb: given VEB
-  * @enabled_tc: TC bitmap
+  * i40e_get_link_speed - Returns link speed for the interface
+  * @vsi: VSI to be configured
   *
-  * Configures given TC bitmap for VEB (switching) element
   **/
- int i40e_veb_config_tc(struct i40e_veb *veb, u8 enabled_tc)
+ int i40e_get_link_speed(struct i40e_vsi *vsi)
  {
-       struct i40e_aqc_configure_switching_comp_bw_config_data bw_data = {0};
-       struct i40e_pf *pf = veb->pf;
-       int ret = 0;
-       int i;
-       /* No TCs or already enabled TCs just return */
-       if (!enabled_tc || veb->enabled_tc == enabled_tc)
-               return ret;
-       bw_data.tc_valid_bits = enabled_tc;
-       /* bw_data.absolute_credits is not set (relative) */
-       /* Enable ETS TCs with equal BW Share for now */
-       for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
-               if (enabled_tc & BIT(i))
-                       bw_data.tc_bw_share_credits[i] = 1;
-       }
-       ret = i40e_aq_config_switch_comp_bw_config(&pf->hw, veb->seid,
-                                                  &bw_data, NULL);
-       if (ret) {
-               dev_info(&pf->pdev->dev,
-                        "VEB bw config failed, err %s aq_err %s\n",
-                        i40e_stat_str(&pf->hw, ret),
-                        i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
-               goto out;
-       }
+       struct i40e_pf *pf = vsi->back;
  
-       /* Update the BW information */
-       ret = i40e_veb_get_bw_info(veb);
-       if (ret) {
-               dev_info(&pf->pdev->dev,
-                        "Failed getting veb bw config, err %s aq_err %s\n",
-                        i40e_stat_str(&pf->hw, ret),
-                        i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+       switch (pf->hw.phy.link_info.link_speed) {
+       case I40E_LINK_SPEED_40GB:
+               return 40000;
+       case I40E_LINK_SPEED_25GB:
+               return 25000;
+       case I40E_LINK_SPEED_20GB:
+               return 20000;
+       case I40E_LINK_SPEED_10GB:
+               return 10000;
+       case I40E_LINK_SPEED_1GB:
+               return 1000;
+       default:
+               return -EINVAL;
        }
- out:
-       return ret;
  }
  
- #ifdef CONFIG_I40E_DCB
  /**
-  * i40e_dcb_reconfigure - Reconfigure all VEBs and VSIs
-  * @pf: PF struct
+  * i40e_set_bw_limit - setup BW limit for Tx traffic based on max_tx_rate
+  * @vsi: VSI to be configured
+  * @seid: seid of the channel/VSI
+  * @max_tx_rate: max TX rate to be configured as BW limit
   *
-  * Reconfigure VEB/VSIs on a given PF; it is assumed that
-  * the caller would've quiesce all the VSIs before calling
-  * this function
+  * Helper function to set BW limit for a given VSI
   **/
static void i40e_dcb_reconfigure(struct i40e_pf *pf)
int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate)
  {
-       u8 tc_map = 0;
-       int ret;
-       u8 v;
+       struct i40e_pf *pf = vsi->back;
+       u64 credits = 0;
+       int speed = 0;
+       int ret = 0;
  
-       /* Enable the TCs available on PF to all VEBs */
-       tc_map = i40e_pf_get_tc_map(pf);
-       for (v = 0; v < I40E_MAX_VEB; v++) {
-               if (!pf->veb[v])
+       speed = i40e_get_link_speed(vsi);
+       if (max_tx_rate > speed) {
+               dev_err(&pf->pdev->dev,
+                       "Invalid max tx rate %llu specified for VSI seid %d.",
+                       max_tx_rate, seid);
+               return -EINVAL;
+       }
+       if (max_tx_rate && max_tx_rate < 50) {
+               dev_warn(&pf->pdev->dev,
+                        "Setting max tx rate to minimum usable value of 50Mbps.\n");
+               max_tx_rate = 50;
+       }
+       /* Tx rate credits are in values of 50Mbps, 0 is disabled */
+       credits = max_tx_rate;
+       do_div(credits, I40E_BW_CREDIT_DIVISOR);
+       ret = i40e_aq_config_vsi_bw_limit(&pf->hw, seid, credits,
+                                         I40E_MAX_BW_INACTIVE_ACCUM, NULL);
+       if (ret)
+               dev_err(&pf->pdev->dev,
+                       "Failed set tx rate (%llu Mbps) for vsi->seid %u, err %s aq_err %s\n",
+                       max_tx_rate, seid, i40e_stat_str(&pf->hw, ret),
+                       i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+       return ret;
+ }
+ /**
+  * i40e_remove_queue_channels - Remove queue channels for the TCs
+  * @vsi: VSI to be configured
+  *
+  * Remove queue channels for the TCs
+  **/
+ static void i40e_remove_queue_channels(struct i40e_vsi *vsi)
+ {
+       enum i40e_admin_queue_err last_aq_status;
+       struct i40e_cloud_filter *cfilter;
+       struct i40e_channel *ch, *ch_tmp;
+       struct i40e_pf *pf = vsi->back;
+       struct hlist_node *node;
+       int ret, i;
+       /* Reset rss size that was stored when reconfiguring rss for
+        * channel VSIs with non-power-of-2 queue count.
+        */
+       vsi->current_rss_size = 0;
+       /* perform cleanup for channels if they exist */
+       if (list_empty(&vsi->ch_list))
+               return;
+       list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) {
+               struct i40e_vsi *p_vsi;
+               list_del(&ch->list);
+               p_vsi = ch->parent_vsi;
+               if (!p_vsi || !ch->initialized) {
+                       kfree(ch);
                        continue;
-               ret = i40e_veb_config_tc(pf->veb[v], tc_map);
+               }
+               /* Reset queue contexts */
+               for (i = 0; i < ch->num_queue_pairs; i++) {
+                       struct i40e_ring *tx_ring, *rx_ring;
+                       u16 pf_q;
+                       pf_q = ch->base_queue + i;
+                       tx_ring = vsi->tx_rings[pf_q];
+                       tx_ring->ch = NULL;
+                       rx_ring = vsi->rx_rings[pf_q];
+                       rx_ring->ch = NULL;
+               }
+               /* Reset BW configured for this VSI via mqprio */
+               ret = i40e_set_bw_limit(vsi, ch->seid, 0);
+               if (ret)
+                       dev_info(&vsi->back->pdev->dev,
+                                "Failed to reset tx rate for ch->seid %u\n",
+                                ch->seid);
+               /* delete cloud filters associated with this channel */
+               hlist_for_each_entry_safe(cfilter, node,
+                                         &pf->cloud_filter_list, cloud_node) {
+                       if (cfilter->seid != ch->seid)
+                               continue;
+                       hash_del(&cfilter->cloud_node);
+                       if (cfilter->dst_port)
+                               ret = i40e_add_del_cloud_filter_big_buf(vsi,
+                                                                       cfilter,
+                                                                       false);
+                       else
+                               ret = i40e_add_del_cloud_filter(vsi, cfilter,
+                                                               false);
+                       last_aq_status = pf->hw.aq.asq_last_status;
+                       if (ret)
+                               dev_info(&pf->pdev->dev,
+                                        "Failed to delete cloud filter, err %s aq_err %s\n",
+                                        i40e_stat_str(&pf->hw, ret),
+                                        i40e_aq_str(&pf->hw, last_aq_status));
+                       kfree(cfilter);
+               }
+               /* delete VSI from FW */
+               ret = i40e_aq_delete_element(&vsi->back->hw, ch->seid,
+                                            NULL);
+               if (ret)
+                       dev_err(&vsi->back->pdev->dev,
+                               "unable to remove channel (%d) for parent VSI(%d)\n",
+                               ch->seid, p_vsi->seid);
+               kfree(ch);
+       }
+       INIT_LIST_HEAD(&vsi->ch_list);
+ }
+ /**
+  * i40e_is_any_channel - channel exist or not
+  * @vsi: ptr to VSI to which channels are associated with
+  *
+  * Returns true or false if channel(s) exist for associated VSI or not
+  **/
+ static bool i40e_is_any_channel(struct i40e_vsi *vsi)
+ {
+       struct i40e_channel *ch, *ch_tmp;
+       list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) {
+               if (ch->initialized)
+                       return true;
+       }
+       return false;
+ }
+ /**
+  * i40e_get_max_queues_for_channel
+  * @vsi: ptr to VSI to which channels are associated with
+  *
+  * Helper function which returns max value among the queue counts set on the
+  * channels/TCs created.
+  **/
+ static int i40e_get_max_queues_for_channel(struct i40e_vsi *vsi)
+ {
+       struct i40e_channel *ch, *ch_tmp;
+       int max = 0;
+       list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) {
+               if (!ch->initialized)
+                       continue;
+               if (ch->num_queue_pairs > max)
+                       max = ch->num_queue_pairs;
+       }
+       return max;
+ }
+ /**
+  * i40e_validate_num_queues - validate num_queues w.r.t channel
+  * @pf: ptr to PF device
+  * @num_queues: number of queues
+  * @vsi: the parent VSI
+  * @reconfig_rss: indicates should the RSS be reconfigured or not
+  *
+  * This function validates number of queues in the context of new channel
+  * which is being established and determines if RSS should be reconfigured
+  * or not for parent VSI.
+  **/
+ static int i40e_validate_num_queues(struct i40e_pf *pf, int num_queues,
+                                   struct i40e_vsi *vsi, bool *reconfig_rss)
+ {
+       int max_ch_queues;
+       if (!reconfig_rss)
+               return -EINVAL;
+       *reconfig_rss = false;
+       if (num_queues > I40E_MAX_QUEUES_PER_CH) {
+               dev_err(&pf->pdev->dev,
+                       "Failed to create VMDq VSI. User requested num_queues (%d) > I40E_MAX_QUEUES_PER_VSI (%u)\n",
+                       num_queues, I40E_MAX_QUEUES_PER_CH);
+               return -EINVAL;
+       }
+       if (vsi->current_rss_size) {
+               if (num_queues > vsi->current_rss_size) {
+                       dev_dbg(&pf->pdev->dev,
+                               "Error: num_queues (%d) > vsi's current_size(%d)\n",
+                               num_queues, vsi->current_rss_size);
+                       return -EINVAL;
+               } else if ((num_queues < vsi->current_rss_size) &&
+                          (!is_power_of_2(num_queues))) {
+                       dev_dbg(&pf->pdev->dev,
+                               "Error: num_queues (%d) < vsi's current_size(%d), but not power of 2\n",
+                               num_queues, vsi->current_rss_size);
+                       return -EINVAL;
+               }
+       }
+       if (!is_power_of_2(num_queues)) {
+               /* Find the max num_queues configured for channel if channel
+                * exist.
+                * if channel exist, then enforce 'num_queues' to be more than
+                * max ever queues configured for channel.
+                */
+               max_ch_queues = i40e_get_max_queues_for_channel(vsi);
+               if (num_queues < max_ch_queues) {
+                       dev_dbg(&pf->pdev->dev,
+                               "Error: num_queues (%d) < max queues configured for channel(%d)\n",
+                               num_queues, max_ch_queues);
+                       return -EINVAL;
+               }
+               *reconfig_rss = true;
+       }
+       return 0;
+ }
+ /**
+  * i40e_vsi_reconfig_rss - reconfig RSS based on specified rss_size
+  * @vsi: the VSI being setup
+  * @rss_size: size of RSS, accordingly LUT gets reprogrammed
+  *
+  * This function reconfigures RSS by reprogramming LUTs using 'rss_size'
+  **/
+ static int i40e_vsi_reconfig_rss(struct i40e_vsi *vsi, u16 rss_size)
+ {
+       struct i40e_pf *pf = vsi->back;
+       u8 seed[I40E_HKEY_ARRAY_SIZE];
+       struct i40e_hw *hw = &pf->hw;
+       int local_rss_size;
+       u8 *lut;
+       int ret;
+       if (!vsi->rss_size)
+               return -EINVAL;
+       if (rss_size > vsi->rss_size)
+               return -EINVAL;
+       local_rss_size = min_t(int, vsi->rss_size, rss_size);
+       lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
+       if (!lut)
+               return -ENOMEM;
+       /* Ignoring user configured lut if there is one */
+       i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, local_rss_size);
+       /* Use user configured hash key if there is one, otherwise
+        * use default.
+        */
+       if (vsi->rss_hkey_user)
+               memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE);
+       else
+               netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE);
+       ret = i40e_config_rss(vsi, seed, lut, vsi->rss_table_size);
+       if (ret) {
+               dev_info(&pf->pdev->dev,
+                        "Cannot set RSS lut, err %s aq_err %s\n",
+                        i40e_stat_str(hw, ret),
+                        i40e_aq_str(hw, hw->aq.asq_last_status));
+               kfree(lut);
+               return ret;
+       }
+       kfree(lut);
+       /* Do the update w.r.t. storing rss_size */
+       if (!vsi->orig_rss_size)
+               vsi->orig_rss_size = vsi->rss_size;
+       vsi->current_rss_size = local_rss_size;
+       return ret;
+ }
+ /**
+  * i40e_channel_setup_queue_map - Setup a channel queue map
+  * @pf: ptr to PF device
+  * @vsi: the VSI being setup
+  * @ctxt: VSI context structure
+  * @ch: ptr to channel structure
+  *
+  * Setup queue map for a specific channel
+  **/
+ static void i40e_channel_setup_queue_map(struct i40e_pf *pf,
+                                        struct i40e_vsi_context *ctxt,
+                                        struct i40e_channel *ch)
+ {
+       u16 qcount, qmap, sections = 0;
+       u8 offset = 0;
+       int pow;
+       sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID;
+       sections |= I40E_AQ_VSI_PROP_SCHED_VALID;
+       qcount = min_t(int, ch->num_queue_pairs, pf->num_lan_msix);
+       ch->num_queue_pairs = qcount;
+       /* find the next higher power-of-2 of num queue pairs */
+       pow = ilog2(qcount);
+       if (!is_power_of_2(qcount))
+               pow++;
+       qmap = (offset << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) |
+               (pow << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT);
+       /* Setup queue TC[0].qmap for given VSI context */
+       ctxt->info.tc_mapping[0] = cpu_to_le16(qmap);
+       ctxt->info.up_enable_bits = 0x1; /* TC0 enabled */
+       ctxt->info.mapping_flags |= cpu_to_le16(I40E_AQ_VSI_QUE_MAP_CONTIG);
+       ctxt->info.queue_mapping[0] = cpu_to_le16(ch->base_queue);
+       ctxt->info.valid_sections |= cpu_to_le16(sections);
+ }
+ /**
+  * i40e_add_channel - add a channel by adding VSI
+  * @pf: ptr to PF device
+  * @uplink_seid: underlying HW switching element (VEB) ID
+  * @ch: ptr to channel structure
+  *
+  * Add a channel (VSI) using add_vsi and queue_map
+  **/
+ static int i40e_add_channel(struct i40e_pf *pf, u16 uplink_seid,
+                           struct i40e_channel *ch)
+ {
+       struct i40e_hw *hw = &pf->hw;
+       struct i40e_vsi_context ctxt;
+       u8 enabled_tc = 0x1; /* TC0 enabled */
+       int ret;
+       if (ch->type != I40E_VSI_VMDQ2) {
+               dev_info(&pf->pdev->dev,
+                        "add new vsi failed, ch->type %d\n", ch->type);
+               return -EINVAL;
+       }
+       memset(&ctxt, 0, sizeof(ctxt));
+       ctxt.pf_num = hw->pf_id;
+       ctxt.vf_num = 0;
+       ctxt.uplink_seid = uplink_seid;
+       ctxt.connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL;
+       if (ch->type == I40E_VSI_VMDQ2)
+               ctxt.flags = I40E_AQ_VSI_TYPE_VMDQ2;
+       if (pf->flags & I40E_FLAG_VEB_MODE_ENABLED) {
+               ctxt.info.valid_sections |=
+                    cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
+               ctxt.info.switch_id =
+                  cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB);
+       }
+       /* Set queue map for a given VSI context */
+       i40e_channel_setup_queue_map(pf, &ctxt, ch);
+       /* Now time to create VSI */
+       ret = i40e_aq_add_vsi(hw, &ctxt, NULL);
+       if (ret) {
+               dev_info(&pf->pdev->dev,
+                        "add new vsi failed, err %s aq_err %s\n",
+                        i40e_stat_str(&pf->hw, ret),
+                        i40e_aq_str(&pf->hw,
+                                    pf->hw.aq.asq_last_status));
+               return -ENOENT;
+       }
+       /* Success, update channel */
+       ch->enabled_tc = enabled_tc;
+       ch->seid = ctxt.seid;
+       ch->vsi_number = ctxt.vsi_number;
+       ch->stat_counter_idx = cpu_to_le16(ctxt.info.stat_counter_idx);
+       /* copy just the sections touched not the entire info
+        * since not all sections are valid as returned by
+        * update vsi params
+        */
+       ch->info.mapping_flags = ctxt.info.mapping_flags;
+       memcpy(&ch->info.queue_mapping,
+              &ctxt.info.queue_mapping, sizeof(ctxt.info.queue_mapping));
+       memcpy(&ch->info.tc_mapping, ctxt.info.tc_mapping,
+              sizeof(ctxt.info.tc_mapping));
+       return 0;
+ }
+ static int i40e_channel_config_bw(struct i40e_vsi *vsi, struct i40e_channel *ch,
+                                 u8 *bw_share)
+ {
+       struct i40e_aqc_configure_vsi_tc_bw_data bw_data;
+       i40e_status ret;
+       int i;
+       bw_data.tc_valid_bits = ch->enabled_tc;
+       for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
+               bw_data.tc_bw_credits[i] = bw_share[i];
+       ret = i40e_aq_config_vsi_tc_bw(&vsi->back->hw, ch->seid,
+                                      &bw_data, NULL);
+       if (ret) {
+               dev_info(&vsi->back->pdev->dev,
+                        "Config VSI BW allocation per TC failed, aq_err: %d for new_vsi->seid %u\n",
+                        vsi->back->hw.aq.asq_last_status, ch->seid);
+               return -EINVAL;
+       }
+       for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
+               ch->info.qs_handle[i] = bw_data.qs_handles[i];
+       return 0;
+ }
+ /**
+  * i40e_channel_config_tx_ring - config TX ring associated with new channel
+  * @pf: ptr to PF device
+  * @vsi: the VSI being setup
+  * @ch: ptr to channel structure
+  *
+  * Configure TX rings associated with channel (VSI) since queues are being
+  * from parent VSI.
+  **/
+ static int i40e_channel_config_tx_ring(struct i40e_pf *pf,
+                                      struct i40e_vsi *vsi,
+                                      struct i40e_channel *ch)
+ {
+       i40e_status ret;
+       int i;
+       u8 bw_share[I40E_MAX_TRAFFIC_CLASS] = {0};
+       /* Enable ETS TCs with equal BW Share for now across all VSIs */
+       for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+               if (ch->enabled_tc & BIT(i))
+                       bw_share[i] = 1;
+       }
+       /* configure BW for new VSI */
+       ret = i40e_channel_config_bw(vsi, ch, bw_share);
+       if (ret) {
+               dev_info(&vsi->back->pdev->dev,
+                        "Failed configuring TC map %d for channel (seid %u)\n",
+                        ch->enabled_tc, ch->seid);
+               return ret;
+       }
+       for (i = 0; i < ch->num_queue_pairs; i++) {
+               struct i40e_ring *tx_ring, *rx_ring;
+               u16 pf_q;
+               pf_q = ch->base_queue + i;
+               /* Get to TX ring ptr of main VSI, for re-setup TX queue
+                * context
+                */
+               tx_ring = vsi->tx_rings[pf_q];
+               tx_ring->ch = ch;
+               /* Get the RX ring ptr */
+               rx_ring = vsi->rx_rings[pf_q];
+               rx_ring->ch = ch;
+       }
+       return 0;
+ }
+ /**
+  * i40e_setup_hw_channel - setup new channel
+  * @pf: ptr to PF device
+  * @vsi: the VSI being setup
+  * @ch: ptr to channel structure
+  * @uplink_seid: underlying HW switching element (VEB) ID
+  * @type: type of channel to be created (VMDq2/VF)
+  *
+  * Setup new channel (VSI) based on specified type (VMDq2/VF)
+  * and configures TX rings accordingly
+  **/
+ static inline int i40e_setup_hw_channel(struct i40e_pf *pf,
+                                       struct i40e_vsi *vsi,
+                                       struct i40e_channel *ch,
+                                       u16 uplink_seid, u8 type)
+ {
+       int ret;
+       ch->initialized = false;
+       ch->base_queue = vsi->next_base_queue;
+       ch->type = type;
+       /* Proceed with creation of channel (VMDq2) VSI */
+       ret = i40e_add_channel(pf, uplink_seid, ch);
+       if (ret) {
+               dev_info(&pf->pdev->dev,
+                        "failed to add_channel using uplink_seid %u\n",
+                        uplink_seid);
+               return ret;
+       }
+       /* Mark the successful creation of channel */
+       ch->initialized = true;
+       /* Reconfigure TX queues using QTX_CTL register */
+       ret = i40e_channel_config_tx_ring(pf, vsi, ch);
+       if (ret) {
+               dev_info(&pf->pdev->dev,
+                        "failed to configure TX rings for channel %u\n",
+                        ch->seid);
+               return ret;
+       }
+       /* update 'next_base_queue' */
+       vsi->next_base_queue = vsi->next_base_queue + ch->num_queue_pairs;
+       dev_dbg(&pf->pdev->dev,
+               "Added channel: vsi_seid %u, vsi_number %u, stat_counter_idx %u, num_queue_pairs %u, pf->next_base_queue %d\n",
+               ch->seid, ch->vsi_number, ch->stat_counter_idx,
+               ch->num_queue_pairs,
+               vsi->next_base_queue);
+       return ret;
+ }
+ /**
+  * i40e_setup_channel - setup new channel using uplink element
+  * @pf: ptr to PF device
+  * @type: type of channel to be created (VMDq2/VF)
+  * @uplink_seid: underlying HW switching element (VEB) ID
+  * @ch: ptr to channel structure
+  *
+  * Setup new channel (VSI) based on specified type (VMDq2/VF)
+  * and uplink switching element (uplink_seid)
+  **/
+ static bool i40e_setup_channel(struct i40e_pf *pf, struct i40e_vsi *vsi,
+                              struct i40e_channel *ch)
+ {
+       u8 vsi_type;
+       u16 seid;
+       int ret;
+       if (vsi->type == I40E_VSI_MAIN) {
+               vsi_type = I40E_VSI_VMDQ2;
+       } else {
+               dev_err(&pf->pdev->dev, "unsupported parent vsi type(%d)\n",
+                       vsi->type);
+               return false;
+       }
+       /* underlying switching element */
+       seid = pf->vsi[pf->lan_vsi]->uplink_seid;
+       /* create channel (VSI), configure TX rings */
+       ret = i40e_setup_hw_channel(pf, vsi, ch, seid, vsi_type);
+       if (ret) {
+               dev_err(&pf->pdev->dev, "failed to setup hw_channel\n");
+               return false;
+       }
+       return ch->initialized ? true : false;
+ }
+ /**
+  * i40e_validate_and_set_switch_mode - sets up switch mode correctly
+  * @vsi: ptr to VSI which has PF backing
+  *
+  * Sets up switch mode correctly if it needs to be changed and perform
+  * what are allowed modes.
+  **/
+ static int i40e_validate_and_set_switch_mode(struct i40e_vsi *vsi)
+ {
+       u8 mode;
+       struct i40e_pf *pf = vsi->back;
+       struct i40e_hw *hw = &pf->hw;
+       int ret;
+       ret = i40e_get_capabilities(pf, i40e_aqc_opc_list_dev_capabilities);
+       if (ret)
+               return -EINVAL;
+       if (hw->dev_caps.switch_mode) {
+               /* if switch mode is set, support mode2 (non-tunneled for
+                * cloud filter) for now
+                */
+               u32 switch_mode = hw->dev_caps.switch_mode &
+                                 I40E_SWITCH_MODE_MASK;
+               if (switch_mode >= I40E_CLOUD_FILTER_MODE1) {
+                       if (switch_mode == I40E_CLOUD_FILTER_MODE2)
+                               return 0;
+                       dev_err(&pf->pdev->dev,
+                               "Invalid switch_mode (%d), only non-tunneled mode for cloud filter is supported\n",
+                               hw->dev_caps.switch_mode);
+                       return -EINVAL;
+               }
+       }
+       /* Set Bit 7 to be valid */
+       mode = I40E_AQ_SET_SWITCH_BIT7_VALID;
+       /* Set L4type to both TCP and UDP support */
+       mode |= I40E_AQ_SET_SWITCH_L4_TYPE_BOTH;
+       /* Set cloud filter mode */
+       mode |= I40E_AQ_SET_SWITCH_MODE_NON_TUNNEL;
+       /* Prep mode field for set_switch_config */
+       ret = i40e_aq_set_switch_config(hw, pf->last_sw_conf_flags,
+                                       pf->last_sw_conf_valid_flags,
+                                       mode, NULL);
+       if (ret && hw->aq.asq_last_status != I40E_AQ_RC_ESRCH)
+               dev_err(&pf->pdev->dev,
+                       "couldn't set switch config bits, err %s aq_err %s\n",
+                       i40e_stat_str(hw, ret),
+                       i40e_aq_str(hw,
+                                   hw->aq.asq_last_status));
+       return ret;
+ }
+ /**
+  * i40e_create_queue_channel - function to create channel
+  * @vsi: VSI to be configured
+  * @ch: ptr to channel (it contains channel specific params)
+  *
+  * This function creates channel (VSI) using num_queues specified by user,
+  * reconfigs RSS if needed.
+  **/
+ int i40e_create_queue_channel(struct i40e_vsi *vsi,
+                             struct i40e_channel *ch)
+ {
+       struct i40e_pf *pf = vsi->back;
+       bool reconfig_rss;
+       int err;
+       if (!ch)
+               return -EINVAL;
+       if (!ch->num_queue_pairs) {
+               dev_err(&pf->pdev->dev, "Invalid num_queues requested: %d\n",
+                       ch->num_queue_pairs);
+               return -EINVAL;
+       }
+       /* validate user requested num_queues for channel */
+       err = i40e_validate_num_queues(pf, ch->num_queue_pairs, vsi,
+                                      &reconfig_rss);
+       if (err) {
+               dev_info(&pf->pdev->dev, "Failed to validate num_queues (%d)\n",
+                        ch->num_queue_pairs);
+               return -EINVAL;
+       }
+       /* By default we are in VEPA mode, if this is the first VF/VMDq
+        * VSI to be added switch to VEB mode.
+        */
+       if ((!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) ||
+           (!i40e_is_any_channel(vsi))) {
+               if (!is_power_of_2(vsi->tc_config.tc_info[0].qcount)) {
+                       dev_dbg(&pf->pdev->dev,
+                               "Failed to create channel. Override queues (%u) not power of 2\n",
+                               vsi->tc_config.tc_info[0].qcount);
+                       return -EINVAL;
+               }
+               if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) {
+                       pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
+                       if (vsi->type == I40E_VSI_MAIN) {
+                               if (pf->flags & I40E_FLAG_TC_MQPRIO)
+                                       i40e_do_reset(pf, I40E_PF_RESET_FLAG,
+                                                     true);
+                               else
+                                       i40e_do_reset_safe(pf,
+                                                          I40E_PF_RESET_FLAG);
+                       }
+               }
+               /* now onwards for main VSI, number of queues will be value
+                * of TC0's queue count
+                */
+       }
+       /* By this time, vsi->cnt_q_avail shall be set to non-zero and
+        * it should be more than num_queues
+        */
+       if (!vsi->cnt_q_avail || vsi->cnt_q_avail < ch->num_queue_pairs) {
+               dev_dbg(&pf->pdev->dev,
+                       "Error: cnt_q_avail (%u) less than num_queues %d\n",
+                       vsi->cnt_q_avail, ch->num_queue_pairs);
+               return -EINVAL;
+       }
+       /* reconfig_rss only if vsi type is MAIN_VSI */
+       if (reconfig_rss && (vsi->type == I40E_VSI_MAIN)) {
+               err = i40e_vsi_reconfig_rss(vsi, ch->num_queue_pairs);
+               if (err) {
+                       dev_info(&pf->pdev->dev,
+                                "Error: unable to reconfig rss for num_queues (%u)\n",
+                                ch->num_queue_pairs);
+                       return -EINVAL;
+               }
+       }
+       if (!i40e_setup_channel(pf, vsi, ch)) {
+               dev_info(&pf->pdev->dev, "Failed to setup channel\n");
+               return -EINVAL;
+       }
+       dev_info(&pf->pdev->dev,
+                "Setup channel (id:%u) utilizing num_queues %d\n",
+                ch->seid, ch->num_queue_pairs);
+       /* configure VSI for BW limit */
+       if (ch->max_tx_rate) {
+               u64 credits = ch->max_tx_rate;
+               if (i40e_set_bw_limit(vsi, ch->seid, ch->max_tx_rate))
+                       return -EINVAL;
+               do_div(credits, I40E_BW_CREDIT_DIVISOR);
+               dev_dbg(&pf->pdev->dev,
+                       "Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n",
+                       ch->max_tx_rate,
+                       credits,
+                       ch->seid);
+       }
+       /* in case of VF, this will be main SRIOV VSI */
+       ch->parent_vsi = vsi;
+       /* and update main_vsi's count for queue_available to use */
+       vsi->cnt_q_avail -= ch->num_queue_pairs;
+       return 0;
+ }
+ /**
+  * i40e_configure_queue_channels - Add queue channel for the given TCs
+  * @vsi: VSI to be configured
+  *
+  * Configures queue channel mapping to the given TCs
+  **/
+ static int i40e_configure_queue_channels(struct i40e_vsi *vsi)
+ {
+       struct i40e_channel *ch;
+       u64 max_rate = 0;
+       int ret = 0, i;
+       /* Create app vsi with the TCs. Main VSI with TC0 is already set up */
+       vsi->tc_seid_map[0] = vsi->seid;
+       for (i = 1; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+               if (vsi->tc_config.enabled_tc & BIT(i)) {
+                       ch = kzalloc(sizeof(*ch), GFP_KERNEL);
+                       if (!ch) {
+                               ret = -ENOMEM;
+                               goto err_free;
+                       }
+                       INIT_LIST_HEAD(&ch->list);
+                       ch->num_queue_pairs =
+                               vsi->tc_config.tc_info[i].qcount;
+                       ch->base_queue =
+                               vsi->tc_config.tc_info[i].qoffset;
+                       /* Bandwidth limit through tc interface is in bytes/s,
+                        * change to Mbit/s
+                        */
+                       max_rate = vsi->mqprio_qopt.max_rate[i];
+                       do_div(max_rate, I40E_BW_MBPS_DIVISOR);
+                       ch->max_tx_rate = max_rate;
+                       list_add_tail(&ch->list, &vsi->ch_list);
+                       ret = i40e_create_queue_channel(vsi, ch);
+                       if (ret) {
+                               dev_err(&vsi->back->pdev->dev,
+                                       "Failed creating queue channel with TC%d: queues %d\n",
+                                       i, ch->num_queue_pairs);
+                               goto err_free;
+                       }
+                       vsi->tc_seid_map[i] = ch->seid;
+               }
+       }
+       return ret;
+ err_free:
+       i40e_remove_queue_channels(vsi);
+       return ret;
+ }
+ /**
+  * i40e_veb_config_tc - Configure TCs for given VEB
+  * @veb: given VEB
+  * @enabled_tc: TC bitmap
+  *
+  * Configures given TC bitmap for VEB (switching) element
+  **/
+ int i40e_veb_config_tc(struct i40e_veb *veb, u8 enabled_tc)
+ {
+       struct i40e_aqc_configure_switching_comp_bw_config_data bw_data = {0};
+       struct i40e_pf *pf = veb->pf;
+       int ret = 0;
+       int i;
+       /* No TCs or already enabled TCs just return */
+       if (!enabled_tc || veb->enabled_tc == enabled_tc)
+               return ret;
+       bw_data.tc_valid_bits = enabled_tc;
+       /* bw_data.absolute_credits is not set (relative) */
+       /* Enable ETS TCs with equal BW Share for now */
+       for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+               if (enabled_tc & BIT(i))
+                       bw_data.tc_bw_share_credits[i] = 1;
+       }
+       ret = i40e_aq_config_switch_comp_bw_config(&pf->hw, veb->seid,
+                                                  &bw_data, NULL);
+       if (ret) {
+               dev_info(&pf->pdev->dev,
+                        "VEB bw config failed, err %s aq_err %s\n",
+                        i40e_stat_str(&pf->hw, ret),
+                        i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+               goto out;
+       }
+       /* Update the BW information */
+       ret = i40e_veb_get_bw_info(veb);
+       if (ret) {
+               dev_info(&pf->pdev->dev,
+                        "Failed getting veb bw config, err %s aq_err %s\n",
+                        i40e_stat_str(&pf->hw, ret),
+                        i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+       }
+ out:
+       return ret;
+ }
+ #ifdef CONFIG_I40E_DCB
+ /**
+  * i40e_dcb_reconfigure - Reconfigure all VEBs and VSIs
+  * @pf: PF struct
+  *
+  * Reconfigure VEB/VSIs on a given PF; it is assumed that
+  * the caller would've quiesce all the VSIs before calling
+  * this function
+  **/
+ static void i40e_dcb_reconfigure(struct i40e_pf *pf)
+ {
+       u8 tc_map = 0;
+       int ret;
+       u8 v;
+       /* Enable the TCs available on PF to all VEBs */
+       tc_map = i40e_pf_get_tc_map(pf);
+       for (v = 0; v < I40E_MAX_VEB; v++) {
+               if (!pf->veb[v])
+                       continue;
+               ret = i40e_veb_config_tc(pf->veb[v], tc_map);
+               if (ret) {
+                       dev_info(&pf->pdev->dev,
+                                "Failed configuring TC for VEB seid=%d\n",
+                                pf->veb[v]->seid);
+                       /* Will try to configure as many components */
+               }
+       }
+       /* Update each VSI */
+       for (v = 0; v < pf->num_alloc_vsi; v++) {
+               if (!pf->vsi[v])
+                       continue;
+               /* - Enable all TCs for the LAN VSI
+                * - For all others keep them at TC0 for now
+                */
+               if (v == pf->lan_vsi)
+                       tc_map = i40e_pf_get_tc_map(pf);
+               else
+                       tc_map = I40E_DEFAULT_TRAFFIC_CLASS;
+               ret = i40e_vsi_config_tc(pf->vsi[v], tc_map);
+               if (ret) {
+                       dev_info(&pf->pdev->dev,
+                                "Failed configuring TC for VSI seid=%d\n",
+                                pf->vsi[v]->seid);
+                       /* Will try to configure as many components */
+               } else {
+                       /* Re-configure VSI vectors based on updated TC map */
+                       i40e_vsi_map_rings_to_vectors(pf->vsi[v]);
+                       if (pf->vsi[v]->netdev)
+                               i40e_dcbnl_set_all(pf->vsi[v]);
+               }
+       }
+ }
+ /**
+  * i40e_resume_port_tx - Resume port Tx
+  * @pf: PF struct
+  *
+  * Resume a port's Tx and issue a PF reset in case of failure to
+  * resume.
+  **/
+ static int i40e_resume_port_tx(struct i40e_pf *pf)
+ {
+       struct i40e_hw *hw = &pf->hw;
+       int ret;
+       ret = i40e_aq_resume_port_tx(hw, NULL);
+       if (ret) {
+               dev_info(&pf->pdev->dev,
+                        "Resume Port Tx failed, err %s aq_err %s\n",
+                         i40e_stat_str(&pf->hw, ret),
+                         i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+               /* Schedule PF reset to recover */
+               set_bit(__I40E_PF_RESET_REQUESTED, pf->state);
+               i40e_service_event_schedule(pf);
+       }
+       return ret;
+ }
+ /**
+  * i40e_init_pf_dcb - Initialize DCB configuration
+  * @pf: PF being configured
+  *
+  * Query the current DCB configuration and cache it
+  * in the hardware structure
+  **/
+ static int i40e_init_pf_dcb(struct i40e_pf *pf)
+ {
+       struct i40e_hw *hw = &pf->hw;
+       int err = 0;
+       /* Do not enable DCB for SW1 and SW2 images even if the FW is capable */
+       if (pf->hw_features & I40E_HW_NO_DCB_SUPPORT)
+               goto out;
+       /* Get the initial DCB configuration */
+       err = i40e_init_dcb(hw);
+       if (!err) {
+               /* Device/Function is not DCBX capable */
+               if ((!hw->func_caps.dcb) ||
+                   (hw->dcbx_status == I40E_DCBX_STATUS_DISABLED)) {
+                       dev_info(&pf->pdev->dev,
+                                "DCBX offload is not supported or is disabled for this PF.\n");
+               } else {
+                       /* When status is not DISABLED then DCBX in FW */
+                       pf->dcbx_cap = DCB_CAP_DCBX_LLD_MANAGED |
+                                      DCB_CAP_DCBX_VER_IEEE;
+                       pf->flags |= I40E_FLAG_DCB_CAPABLE;
+                       /* Enable DCB tagging only when more than one TC
+                        * or explicitly disable if only one TC
+                        */
+                       if (i40e_dcb_get_num_tc(&hw->local_dcbx_config) > 1)
+                               pf->flags |= I40E_FLAG_DCB_ENABLED;
+                       else
+                               pf->flags &= ~I40E_FLAG_DCB_ENABLED;
+                       dev_dbg(&pf->pdev->dev,
+                               "DCBX offload is supported for this PF.\n");
+               }
+       } else {
+               dev_info(&pf->pdev->dev,
+                        "Query for DCB configuration failed, err %s aq_err %s\n",
+                        i40e_stat_str(&pf->hw, err),
+                        i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+       }
+ out:
+       return err;
+ }
+ #endif /* CONFIG_I40E_DCB */
+ #define SPEED_SIZE 14
+ #define FC_SIZE 8
+ /**
+  * i40e_print_link_message - print link up or down
+  * @vsi: the VSI for which link needs a message
+  */
+ void i40e_print_link_message(struct i40e_vsi *vsi, bool isup)
+ {
+       enum i40e_aq_link_speed new_speed;
+       struct i40e_pf *pf = vsi->back;
+       char *speed = "Unknown";
+       char *fc = "Unknown";
+       char *fec = "";
+       char *req_fec = "";
+       char *an = "";
+       new_speed = pf->hw.phy.link_info.link_speed;
+       if ((vsi->current_isup == isup) && (vsi->current_speed == new_speed))
+               return;
+       vsi->current_isup = isup;
+       vsi->current_speed = new_speed;
+       if (!isup) {
+               netdev_info(vsi->netdev, "NIC Link is Down\n");
+               return;
+       }
+       /* Warn user if link speed on NPAR enabled partition is not at
+        * least 10GB
+        */
+       if (pf->hw.func_caps.npar_enable &&
+           (pf->hw.phy.link_info.link_speed == I40E_LINK_SPEED_1GB ||
+            pf->hw.phy.link_info.link_speed == I40E_LINK_SPEED_100MB))
+               netdev_warn(vsi->netdev,
+                           "The partition detected link speed that is less than 10Gbps\n");
+       switch (pf->hw.phy.link_info.link_speed) {
+       case I40E_LINK_SPEED_40GB:
+               speed = "40 G";
+               break;
+       case I40E_LINK_SPEED_20GB:
+               speed = "20 G";
+               break;
+       case I40E_LINK_SPEED_25GB:
+               speed = "25 G";
+               break;
+       case I40E_LINK_SPEED_10GB:
+               speed = "10 G";
+               break;
+       case I40E_LINK_SPEED_1GB:
+               speed = "1000 M";
+               break;
+       case I40E_LINK_SPEED_100MB:
+               speed = "100 M";
+               break;
+       default:
+               break;
+       }
+       switch (pf->hw.fc.current_mode) {
+       case I40E_FC_FULL:
+               fc = "RX/TX";
+               break;
+       case I40E_FC_TX_PAUSE:
+               fc = "TX";
+               break;
+       case I40E_FC_RX_PAUSE:
+               fc = "RX";
+               break;
+       default:
+               fc = "None";
+               break;
+       }
+       if (pf->hw.phy.link_info.link_speed == I40E_LINK_SPEED_25GB) {
+               req_fec = ", Requested FEC: None";
+               fec = ", FEC: None";
+               an = ", Autoneg: False";
+               if (pf->hw.phy.link_info.an_info & I40E_AQ_AN_COMPLETED)
+                       an = ", Autoneg: True";
+               if (pf->hw.phy.link_info.fec_info &
+                   I40E_AQ_CONFIG_FEC_KR_ENA)
+                       fec = ", FEC: CL74 FC-FEC/BASE-R";
+               else if (pf->hw.phy.link_info.fec_info &
+                        I40E_AQ_CONFIG_FEC_RS_ENA)
+                       fec = ", FEC: CL108 RS-FEC";
+               /* 'CL108 RS-FEC' should be displayed when RS is requested, or
+                * both RS and FC are requested
+                */
+               if (vsi->back->hw.phy.link_info.req_fec_info &
+                   (I40E_AQ_REQUEST_FEC_KR | I40E_AQ_REQUEST_FEC_RS)) {
+                       if (vsi->back->hw.phy.link_info.req_fec_info &
+                           I40E_AQ_REQUEST_FEC_RS)
+                               req_fec = ", Requested FEC: CL108 RS-FEC";
+                       else
+                               req_fec = ", Requested FEC: CL74 FC-FEC/BASE-R";
+               }
+       }
+       netdev_info(vsi->netdev, "NIC Link is Up, %sbps Full Duplex%s%s%s, Flow Control: %s\n",
+                   speed, req_fec, fec, an, fc);
+ }
+ /**
+  * i40e_up_complete - Finish the last steps of bringing up a connection
+  * @vsi: the VSI being configured
+  **/
+ static int i40e_up_complete(struct i40e_vsi *vsi)
+ {
+       struct i40e_pf *pf = vsi->back;
+       int err;
+       if (pf->flags & I40E_FLAG_MSIX_ENABLED)
+               i40e_vsi_configure_msix(vsi);
+       else
+               i40e_configure_msi_and_legacy(vsi);
+       /* start rings */
+       err = i40e_vsi_start_rings(vsi);
+       if (err)
+               return err;
+       clear_bit(__I40E_VSI_DOWN, vsi->state);
+       i40e_napi_enable_all(vsi);
+       i40e_vsi_enable_irq(vsi);
+       if ((pf->hw.phy.link_info.link_info & I40E_AQ_LINK_UP) &&
+           (vsi->netdev)) {
+               i40e_print_link_message(vsi, true);
+               netif_tx_start_all_queues(vsi->netdev);
+               netif_carrier_on(vsi->netdev);
+       }
+       /* replay FDIR SB filters */
+       if (vsi->type == I40E_VSI_FDIR) {
+               /* reset fd counters */
+               pf->fd_add_err = 0;
+               pf->fd_atr_cnt = 0;
+               i40e_fdir_filter_restore(vsi);
+       }
+       /* On the next run of the service_task, notify any clients of the new
+        * opened netdev
+        */
+       pf->flags |= I40E_FLAG_SERVICE_CLIENT_REQUESTED;
+       i40e_service_event_schedule(pf);
+       return 0;
+ }
+ /**
+  * i40e_vsi_reinit_locked - Reset the VSI
+  * @vsi: the VSI being configured
+  *
+  * Rebuild the ring structs after some configuration
+  * has changed, e.g. MTU size.
+  **/
+ static void i40e_vsi_reinit_locked(struct i40e_vsi *vsi)
+ {
+       struct i40e_pf *pf = vsi->back;
+       WARN_ON(in_interrupt());
+       while (test_and_set_bit(__I40E_CONFIG_BUSY, pf->state))
+               usleep_range(1000, 2000);
+       i40e_down(vsi);
+       i40e_up(vsi);
+       clear_bit(__I40E_CONFIG_BUSY, pf->state);
+ }
+ /**
+  * i40e_up - Bring the connection back up after being down
+  * @vsi: the VSI being configured
+  **/
+ int i40e_up(struct i40e_vsi *vsi)
+ {
+       int err;
+       err = i40e_vsi_configure(vsi);
+       if (!err)
+               err = i40e_up_complete(vsi);
+       return err;
+ }
+ /**
+  * i40e_down - Shutdown the connection processing
+  * @vsi: the VSI being stopped
+  **/
+ void i40e_down(struct i40e_vsi *vsi)
+ {
+       int i;
+       /* It is assumed that the caller of this function
+        * sets the vsi->state __I40E_VSI_DOWN bit.
+        */
+       if (vsi->netdev) {
+               netif_carrier_off(vsi->netdev);
+               netif_tx_disable(vsi->netdev);
+       }
+       i40e_vsi_disable_irq(vsi);
+       i40e_vsi_stop_rings(vsi);
+       i40e_napi_disable_all(vsi);
+       for (i = 0; i < vsi->num_queue_pairs; i++) {
+               i40e_clean_tx_ring(vsi->tx_rings[i]);
+               if (i40e_enabled_xdp_vsi(vsi))
+                       i40e_clean_tx_ring(vsi->xdp_rings[i]);
+               i40e_clean_rx_ring(vsi->rx_rings[i]);
+       }
+ }
+ /**
+  * i40e_validate_mqprio_qopt- validate queue mapping info
+  * @vsi: the VSI being configured
+  * @mqprio_qopt: queue parametrs
+  **/
+ static int i40e_validate_mqprio_qopt(struct i40e_vsi *vsi,
+                                    struct tc_mqprio_qopt_offload *mqprio_qopt)
+ {
+       u64 sum_max_rate = 0;
+       u64 max_rate = 0;
+       int i;
+       if (mqprio_qopt->qopt.offset[0] != 0 ||
+           mqprio_qopt->qopt.num_tc < 1 ||
+           mqprio_qopt->qopt.num_tc > I40E_MAX_TRAFFIC_CLASS)
+               return -EINVAL;
+       for (i = 0; ; i++) {
+               if (!mqprio_qopt->qopt.count[i])
+                       return -EINVAL;
+               if (mqprio_qopt->min_rate[i]) {
+                       dev_err(&vsi->back->pdev->dev,
+                               "Invalid min tx rate (greater than 0) specified\n");
+                       return -EINVAL;
+               }
+               max_rate = mqprio_qopt->max_rate[i];
+               do_div(max_rate, I40E_BW_MBPS_DIVISOR);
+               sum_max_rate += max_rate;
+               if (i >= mqprio_qopt->qopt.num_tc - 1)
+                       break;
+               if (mqprio_qopt->qopt.offset[i + 1] !=
+                   (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i]))
+                       return -EINVAL;
+       }
+       if (vsi->num_queue_pairs <
+           (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i])) {
+               return -EINVAL;
+       }
+       if (sum_max_rate > i40e_get_link_speed(vsi)) {
+               dev_err(&vsi->back->pdev->dev,
+                       "Invalid max tx rate specified\n");
+               return -EINVAL;
+       }
+       return 0;
+ }
+ /**
+  * i40e_vsi_set_default_tc_config - set default values for tc configuration
+  * @vsi: the VSI being configured
+  **/
+ static void i40e_vsi_set_default_tc_config(struct i40e_vsi *vsi)
+ {
+       u16 qcount;
+       int i;
+       /* Only TC0 is enabled */
+       vsi->tc_config.numtc = 1;
+       vsi->tc_config.enabled_tc = 1;
+       qcount = min_t(int, vsi->alloc_queue_pairs,
+                      i40e_pf_get_max_q_per_tc(vsi->back));
+       for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+               /* For the TC that is not enabled set the offset to to default
+                * queue and allocate one queue for the given TC.
+                */
+               vsi->tc_config.tc_info[i].qoffset = 0;
+               if (i == 0)
+                       vsi->tc_config.tc_info[i].qcount = qcount;
+               else
+                       vsi->tc_config.tc_info[i].qcount = 1;
+               vsi->tc_config.tc_info[i].netdev_tc = 0;
+       }
+ }
+ /**
+  * i40e_setup_tc - configure multiple traffic classes
+  * @netdev: net device to configure
+  * @type_data: tc offload data
+  **/
+ static int i40e_setup_tc(struct net_device *netdev, void *type_data)
+ {
+       struct tc_mqprio_qopt_offload *mqprio_qopt = type_data;
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_vsi *vsi = np->vsi;
+       struct i40e_pf *pf = vsi->back;
+       u8 enabled_tc = 0, num_tc, hw;
+       bool need_reset = false;
+       int ret = -EINVAL;
+       u16 mode;
+       int i;
+       num_tc = mqprio_qopt->qopt.num_tc;
+       hw = mqprio_qopt->qopt.hw;
+       mode = mqprio_qopt->mode;
+       if (!hw) {
+               pf->flags &= ~I40E_FLAG_TC_MQPRIO;
+               memcpy(&vsi->mqprio_qopt, mqprio_qopt, sizeof(*mqprio_qopt));
+               goto config_tc;
+       }
+       /* Check if MFP enabled */
+       if (pf->flags & I40E_FLAG_MFP_ENABLED) {
+               netdev_info(netdev,
+                           "Configuring TC not supported in MFP mode\n");
+               return ret;
+       }
+       switch (mode) {
+       case TC_MQPRIO_MODE_DCB:
+               pf->flags &= ~I40E_FLAG_TC_MQPRIO;
+               /* Check if DCB enabled to continue */
+               if (!(pf->flags & I40E_FLAG_DCB_ENABLED)) {
+                       netdev_info(netdev,
+                                   "DCB is not enabled for adapter\n");
+                       return ret;
+               }
+               /* Check whether tc count is within enabled limit */
+               if (num_tc > i40e_pf_get_num_tc(pf)) {
+                       netdev_info(netdev,
+                                   "TC count greater than enabled on link for adapter\n");
+                       return ret;
+               }
+               break;
+       case TC_MQPRIO_MODE_CHANNEL:
+               if (pf->flags & I40E_FLAG_DCB_ENABLED) {
+                       netdev_info(netdev,
+                                   "Full offload of TC Mqprio options is not supported when DCB is enabled\n");
+                       return ret;
+               }
+               if (!(pf->flags & I40E_FLAG_MSIX_ENABLED))
+                       return ret;
+               ret = i40e_validate_mqprio_qopt(vsi, mqprio_qopt);
+               if (ret)
+                       return ret;
+               memcpy(&vsi->mqprio_qopt, mqprio_qopt,
+                      sizeof(*mqprio_qopt));
+               pf->flags |= I40E_FLAG_TC_MQPRIO;
+               pf->flags &= ~I40E_FLAG_DCB_ENABLED;
+               break;
+       default:
+               return -EINVAL;
+       }
+ config_tc:
+       /* Generate TC map for number of tc requested */
+       for (i = 0; i < num_tc; i++)
+               enabled_tc |= BIT(i);
+       /* Requesting same TC configuration as already enabled */
+       if (enabled_tc == vsi->tc_config.enabled_tc &&
+           mode != TC_MQPRIO_MODE_CHANNEL)
+               return 0;
+       /* Quiesce VSI queues */
+       i40e_quiesce_vsi(vsi);
+       if (!hw && !(pf->flags & I40E_FLAG_TC_MQPRIO))
+               i40e_remove_queue_channels(vsi);
+       /* Configure VSI for enabled TCs */
+       ret = i40e_vsi_config_tc(vsi, enabled_tc);
+       if (ret) {
+               netdev_info(netdev, "Failed configuring TC for VSI seid=%d\n",
+                           vsi->seid);
+               need_reset = true;
+               goto exit;
+       }
+       if (pf->flags & I40E_FLAG_TC_MQPRIO) {
+               if (vsi->mqprio_qopt.max_rate[0]) {
+                       u64 max_tx_rate = vsi->mqprio_qopt.max_rate[0];
+                       do_div(max_tx_rate, I40E_BW_MBPS_DIVISOR);
+                       ret = i40e_set_bw_limit(vsi, vsi->seid, max_tx_rate);
+                       if (!ret) {
+                               u64 credits = max_tx_rate;
+                               do_div(credits, I40E_BW_CREDIT_DIVISOR);
+                               dev_dbg(&vsi->back->pdev->dev,
+                                       "Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n",
+                                       max_tx_rate,
+                                       credits,
+                                       vsi->seid);
+                       } else {
+                               need_reset = true;
+                               goto exit;
+                       }
+               }
+               ret = i40e_configure_queue_channels(vsi);
                if (ret) {
-                       dev_info(&pf->pdev->dev,
-                                "Failed configuring TC for VEB seid=%d\n",
-                                pf->veb[v]->seid);
-                       /* Will try to configure as many components */
+                       netdev_info(netdev,
+                                   "Failed configuring queue channels\n");
+                       need_reset = true;
+                       goto exit;
                }
        }
  
-       /* Update each VSI */
-       for (v = 0; v < pf->num_alloc_vsi; v++) {
-               if (!pf->vsi[v])
-                       continue;
+ exit:
+       /* Reset the configuration data to defaults, only TC0 is enabled */
+       if (need_reset) {
+               i40e_vsi_set_default_tc_config(vsi);
+               need_reset = false;
+       }
  
-               /* - Enable all TCs for the LAN VSI
-                * - For all others keep them at TC0 for now
-                */
-               if (v == pf->lan_vsi)
-                       tc_map = i40e_pf_get_tc_map(pf);
-               else
-                       tc_map = I40E_DEFAULT_TRAFFIC_CLASS;
+       /* Unquiesce VSI */
+       i40e_unquiesce_vsi(vsi);
+       return ret;
+ }
  
-               ret = i40e_vsi_config_tc(pf->vsi[v], tc_map);
-               if (ret) {
-                       dev_info(&pf->pdev->dev,
-                                "Failed configuring TC for VSI seid=%d\n",
-                                pf->vsi[v]->seid);
-                       /* Will try to configure as many components */
-               } else {
-                       /* Re-configure VSI vectors based on updated TC map */
-                       i40e_vsi_map_rings_to_vectors(pf->vsi[v]);
-                       if (pf->vsi[v]->netdev)
-                               i40e_dcbnl_set_all(pf->vsi[v]);
+ /**
+  * i40e_set_cld_element - sets cloud filter element data
+  * @filter: cloud filter rule
+  * @cld: ptr to cloud filter element data
+  *
+  * This is helper function to copy data into cloud filter element
+  **/
+ static inline void
+ i40e_set_cld_element(struct i40e_cloud_filter *filter,
+                    struct i40e_aqc_cloud_filters_element_data *cld)
+ {
+       int i, j;
+       u32 ipa;
+       memset(cld, 0, sizeof(*cld));
+       ether_addr_copy(cld->outer_mac, filter->dst_mac);
+       ether_addr_copy(cld->inner_mac, filter->src_mac);
+       if (filter->n_proto != ETH_P_IP && filter->n_proto != ETH_P_IPV6)
+               return;
+       if (filter->n_proto == ETH_P_IPV6) {
+ #define IPV6_MAX_INDEX        (ARRAY_SIZE(filter->dst_ipv6) - 1)
+               for (i = 0, j = 0; i < ARRAY_SIZE(filter->dst_ipv6);
+                    i++, j += 2) {
+                       ipa = be32_to_cpu(filter->dst_ipv6[IPV6_MAX_INDEX - i]);
+                       ipa = cpu_to_le32(ipa);
+                       memcpy(&cld->ipaddr.raw_v6.data[j], &ipa, sizeof(ipa));
                }
+       } else {
+               ipa = be32_to_cpu(filter->dst_ipv4);
+               memcpy(&cld->ipaddr.v4.data, &ipa, sizeof(ipa));
        }
+       cld->inner_vlan = cpu_to_le16(ntohs(filter->vlan_id));
+       /* tenant_id is not supported by FW now, once the support is enabled
+        * fill the cld->tenant_id with cpu_to_le32(filter->tenant_id)
+        */
+       if (filter->tenant_id)
+               return;
  }
  
  /**
-  * i40e_resume_port_tx - Resume port Tx
-  * @pf: PF struct
+  * i40e_add_del_cloud_filter - Add/del cloud filter
+  * @vsi: pointer to VSI
+  * @filter: cloud filter rule
+  * @add: if true, add, if false, delete
   *
-  * Resume a port's Tx and issue a PF reset in case of failure to
-  * resume.
+  * Add or delete a cloud filter for a specific flow spec.
+  * Returns 0 if the filter were successfully added.
   **/
- static int i40e_resume_port_tx(struct i40e_pf *pf)
+ static int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,
+                                    struct i40e_cloud_filter *filter, bool add)
  {
-       struct i40e_hw *hw = &pf->hw;
+       struct i40e_aqc_cloud_filters_element_data cld_filter;
+       struct i40e_pf *pf = vsi->back;
        int ret;
+       static const u16 flag_table[128] = {
+               [I40E_CLOUD_FILTER_FLAGS_OMAC]  =
+                       I40E_AQC_ADD_CLOUD_FILTER_OMAC,
+               [I40E_CLOUD_FILTER_FLAGS_IMAC]  =
+                       I40E_AQC_ADD_CLOUD_FILTER_IMAC,
+               [I40E_CLOUD_FILTER_FLAGS_IMAC_IVLAN]  =
+                       I40E_AQC_ADD_CLOUD_FILTER_IMAC_IVLAN,
+               [I40E_CLOUD_FILTER_FLAGS_IMAC_TEN_ID] =
+                       I40E_AQC_ADD_CLOUD_FILTER_IMAC_TEN_ID,
+               [I40E_CLOUD_FILTER_FLAGS_OMAC_TEN_ID_IMAC] =
+                       I40E_AQC_ADD_CLOUD_FILTER_OMAC_TEN_ID_IMAC,
+               [I40E_CLOUD_FILTER_FLAGS_IMAC_IVLAN_TEN_ID] =
+                       I40E_AQC_ADD_CLOUD_FILTER_IMAC_IVLAN_TEN_ID,
+               [I40E_CLOUD_FILTER_FLAGS_IIP] =
+                       I40E_AQC_ADD_CLOUD_FILTER_IIP,
+       };
+       if (filter->flags >= ARRAY_SIZE(flag_table))
+               return I40E_ERR_CONFIG;
+       /* copy element needed to add cloud filter from filter */
+       i40e_set_cld_element(filter, &cld_filter);
+       if (filter->tunnel_type != I40E_CLOUD_TNL_TYPE_NONE)
+               cld_filter.flags = cpu_to_le16(filter->tunnel_type <<
+                                            I40E_AQC_ADD_CLOUD_TNL_TYPE_SHIFT);
+       if (filter->n_proto == ETH_P_IPV6)
+               cld_filter.flags |= cpu_to_le16(flag_table[filter->flags] |
+                                               I40E_AQC_ADD_CLOUD_FLAGS_IPV6);
+       else
+               cld_filter.flags |= cpu_to_le16(flag_table[filter->flags] |
+                                               I40E_AQC_ADD_CLOUD_FLAGS_IPV4);
  
-       ret = i40e_aq_resume_port_tx(hw, NULL);
-       if (ret) {
+       if (add)
+               ret = i40e_aq_add_cloud_filters(&pf->hw, filter->seid,
+                                               &cld_filter, 1);
+       else
+               ret = i40e_aq_rem_cloud_filters(&pf->hw, filter->seid,
+                                               &cld_filter, 1);
+       if (ret)
+               dev_dbg(&pf->pdev->dev,
+                       "Failed to %s cloud filter using l4 port %u, err %d aq_err %d\n",
+                       add ? "add" : "delete", filter->dst_port, ret,
+                       pf->hw.aq.asq_last_status);
+       else
                dev_info(&pf->pdev->dev,
-                        "Resume Port Tx failed, err %s aq_err %s\n",
-                         i40e_stat_str(&pf->hw, ret),
-                         i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
-               /* Schedule PF reset to recover */
-               set_bit(__I40E_PF_RESET_REQUESTED, pf->state);
-               i40e_service_event_schedule(pf);
-       }
+                        "%s cloud filter for VSI: %d\n",
+                        add ? "Added" : "Deleted", filter->seid);
        return ret;
  }
  
  /**
-  * i40e_init_pf_dcb - Initialize DCB configuration
-  * @pf: PF being configured
+  * i40e_add_del_cloud_filter_big_buf - Add/del cloud filter using big_buf
+  * @vsi: pointer to VSI
+  * @filter: cloud filter rule
+  * @add: if true, add, if false, delete
   *
-  * Query the current DCB configuration and cache it
-  * in the hardware structure
+  * Add or delete a cloud filter for a specific flow spec using big buffer.
+  * Returns 0 if the filter were successfully added.
   **/
- static int i40e_init_pf_dcb(struct i40e_pf *pf)
+ static int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
+                                            struct i40e_cloud_filter *filter,
+                                            bool add)
  {
-       struct i40e_hw *hw = &pf->hw;
-       int err = 0;
+       struct i40e_aqc_cloud_filters_element_bb cld_filter;
+       struct i40e_pf *pf = vsi->back;
+       int ret;
  
-       /* Do not enable DCB for SW1 and SW2 images even if the FW is capable */
-       if (pf->hw_features & I40E_HW_NO_DCB_SUPPORT)
-               goto out;
+       /* Both (src/dst) valid mac_addr are not supported */
+       if ((is_valid_ether_addr(filter->dst_mac) &&
+            is_valid_ether_addr(filter->src_mac)) ||
+           (is_multicast_ether_addr(filter->dst_mac) &&
+            is_multicast_ether_addr(filter->src_mac)))
+               return -EINVAL;
  
-       /* Get the initial DCB configuration */
-       err = i40e_init_dcb(hw);
-       if (!err) {
-               /* Device/Function is not DCBX capable */
-               if ((!hw->func_caps.dcb) ||
-                   (hw->dcbx_status == I40E_DCBX_STATUS_DISABLED)) {
-                       dev_info(&pf->pdev->dev,
-                                "DCBX offload is not supported or is disabled for this PF.\n");
-               } else {
-                       /* When status is not DISABLED then DCBX in FW */
-                       pf->dcbx_cap = DCB_CAP_DCBX_LLD_MANAGED |
-                                      DCB_CAP_DCBX_VER_IEEE;
+       /* Make sure port is specified, otherwise bail out, for channel
+        * specific cloud filter needs 'L4 port' to be non-zero
+        */
+       if (!filter->dst_port)
+               return -EINVAL;
  
-                       pf->flags |= I40E_FLAG_DCB_CAPABLE;
-                       /* Enable DCB tagging only when more than one TC
-                        * or explicitly disable if only one TC
-                        */
-                       if (i40e_dcb_get_num_tc(&hw->local_dcbx_config) > 1)
-                               pf->flags |= I40E_FLAG_DCB_ENABLED;
-                       else
-                               pf->flags &= ~I40E_FLAG_DCB_ENABLED;
-                       dev_dbg(&pf->pdev->dev,
-                               "DCBX offload is supported for this PF.\n");
+       /* adding filter using src_port/src_ip is not supported at this stage */
+       if (filter->src_port || filter->src_ipv4 ||
+           !ipv6_addr_any(&filter->ip.v6.src_ip6))
+               return -EINVAL;
+       /* copy element needed to add cloud filter from filter */
+       i40e_set_cld_element(filter, &cld_filter.element);
+       if (is_valid_ether_addr(filter->dst_mac) ||
+           is_valid_ether_addr(filter->src_mac) ||
+           is_multicast_ether_addr(filter->dst_mac) ||
+           is_multicast_ether_addr(filter->src_mac)) {
+               /* MAC + IP : unsupported mode */
+               if (filter->dst_ipv4)
+                       return -EINVAL;
+               /* since we validated that L4 port must be valid before
+                * we get here, start with respective "flags" value
+                * and update if vlan is present or not
+                */
+               cld_filter.element.flags =
+                       cpu_to_le16(I40E_AQC_ADD_CLOUD_FILTER_MAC_PORT);
+               if (filter->vlan_id) {
+                       cld_filter.element.flags =
+                       cpu_to_le16(I40E_AQC_ADD_CLOUD_FILTER_MAC_VLAN_PORT);
                }
+       } else if (filter->dst_ipv4 ||
+                  !ipv6_addr_any(&filter->ip.v6.dst_ip6)) {
+               cld_filter.element.flags =
+                               cpu_to_le16(I40E_AQC_ADD_CLOUD_FILTER_IP_PORT);
+               if (filter->n_proto == ETH_P_IPV6)
+                       cld_filter.element.flags |=
+                               cpu_to_le16(I40E_AQC_ADD_CLOUD_FLAGS_IPV6);
+               else
+                       cld_filter.element.flags |=
+                               cpu_to_le16(I40E_AQC_ADD_CLOUD_FLAGS_IPV4);
        } else {
-               dev_info(&pf->pdev->dev,
-                        "Query for DCB configuration failed, err %s aq_err %s\n",
-                        i40e_stat_str(&pf->hw, err),
-                        i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+               dev_err(&pf->pdev->dev,
+                       "either mac or ip has to be valid for cloud filter\n");
+               return -EINVAL;
        }
  
- out:
-       return err;
+       /* Now copy L4 port in Byte 6..7 in general fields */
+       cld_filter.general_fields[I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD0] =
+                                               be16_to_cpu(filter->dst_port);
+       if (add) {
+               /* Validate current device switch mode, change if necessary */
+               ret = i40e_validate_and_set_switch_mode(vsi);
+               if (ret) {
+                       dev_err(&pf->pdev->dev,
+                               "failed to set switch mode, ret %d\n",
+                               ret);
+                       return ret;
+               }
+               ret = i40e_aq_add_cloud_filters_bb(&pf->hw, filter->seid,
+                                                  &cld_filter, 1);
+       } else {
+               ret = i40e_aq_rem_cloud_filters_bb(&pf->hw, filter->seid,
+                                                  &cld_filter, 1);
+       }
+       if (ret)
+               dev_dbg(&pf->pdev->dev,
+                       "Failed to %s cloud filter(big buffer) err %d aq_err %d\n",
+                       add ? "add" : "delete", ret, pf->hw.aq.asq_last_status);
+       else
+               dev_info(&pf->pdev->dev,
+                        "%s cloud filter for VSI: %d, L4 port: %d\n",
+                        add ? "add" : "delete", filter->seid,
+                        ntohs(filter->dst_port));
+       return ret;
  }
- #endif /* CONFIG_I40E_DCB */
- #define SPEED_SIZE 14
- #define FC_SIZE 8
  /**
-  * i40e_print_link_message - print link up or down
-  * @vsi: the VSI for which link needs a message
-  */
- void i40e_print_link_message(struct i40e_vsi *vsi, bool isup)
+  * i40e_parse_cls_flower - Parse tc flower filters provided by kernel
+  * @vsi: Pointer to VSI
+  * @cls_flower: Pointer to struct tc_cls_flower_offload
+  * @filter: Pointer to cloud filter structure
+  *
+  **/
+ static int i40e_parse_cls_flower(struct i40e_vsi *vsi,
+                                struct tc_cls_flower_offload *f,
+                                struct i40e_cloud_filter *filter)
  {
-       enum i40e_aq_link_speed new_speed;
-       char *speed = "Unknown";
-       char *fc = "Unknown";
-       char *fec = "";
-       char *req_fec = "";
-       char *an = "";
+       u16 n_proto_mask = 0, n_proto_key = 0, addr_type = 0;
+       struct i40e_pf *pf = vsi->back;
+       u8 field_flags = 0;
+       if (f->dissector->used_keys &
+           ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
+             BIT(FLOW_DISSECTOR_KEY_BASIC) |
+             BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
+             BIT(FLOW_DISSECTOR_KEY_VLAN) |
+             BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
+             BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
+             BIT(FLOW_DISSECTOR_KEY_PORTS) |
+             BIT(FLOW_DISSECTOR_KEY_ENC_KEYID))) {
+               dev_err(&pf->pdev->dev, "Unsupported key used: 0x%x\n",
+                       f->dissector->used_keys);
+               return -EOPNOTSUPP;
+       }
  
-       new_speed = vsi->back->hw.phy.link_info.link_speed;
+       if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+               struct flow_dissector_key_keyid *key =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_ENC_KEYID,
+                                                 f->key);
  
-       if ((vsi->current_isup == isup) && (vsi->current_speed == new_speed))
-               return;
-       vsi->current_isup = isup;
-       vsi->current_speed = new_speed;
-       if (!isup) {
-               netdev_info(vsi->netdev, "NIC Link is Down\n");
-               return;
+               struct flow_dissector_key_keyid *mask =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_ENC_KEYID,
+                                                 f->mask);
+               if (mask->keyid != 0)
+                       field_flags |= I40E_CLOUD_FIELD_TEN_ID;
+               filter->tenant_id = be32_to_cpu(key->keyid);
        }
  
-       /* Warn user if link speed on NPAR enabled partition is not at
-        * least 10GB
-        */
-       if (vsi->back->hw.func_caps.npar_enable &&
-           (vsi->back->hw.phy.link_info.link_speed == I40E_LINK_SPEED_1GB ||
-            vsi->back->hw.phy.link_info.link_speed == I40E_LINK_SPEED_100MB))
-               netdev_warn(vsi->netdev,
-                           "The partition detected link speed that is less than 10Gbps\n");
+       if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
+               struct flow_dissector_key_basic *key =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_BASIC,
+                                                 f->key);
  
-       switch (vsi->back->hw.phy.link_info.link_speed) {
-       case I40E_LINK_SPEED_40GB:
-               speed = "40 G";
-               break;
-       case I40E_LINK_SPEED_20GB:
-               speed = "20 G";
-               break;
-       case I40E_LINK_SPEED_25GB:
-               speed = "25 G";
-               break;
-       case I40E_LINK_SPEED_10GB:
-               speed = "10 G";
-               break;
-       case I40E_LINK_SPEED_1GB:
-               speed = "1000 M";
-               break;
-       case I40E_LINK_SPEED_100MB:
-               speed = "100 M";
-               break;
-       default:
-               break;
+               struct flow_dissector_key_basic *mask =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_BASIC,
+                                                 f->mask);
+               n_proto_key = ntohs(key->n_proto);
+               n_proto_mask = ntohs(mask->n_proto);
+               if (n_proto_key == ETH_P_ALL) {
+                       n_proto_key = 0;
+                       n_proto_mask = 0;
+               }
+               filter->n_proto = n_proto_key & n_proto_mask;
+               filter->ip_proto = key->ip_proto;
        }
  
-       switch (vsi->back->hw.fc.current_mode) {
-       case I40E_FC_FULL:
-               fc = "RX/TX";
-               break;
-       case I40E_FC_TX_PAUSE:
-               fc = "TX";
-               break;
-       case I40E_FC_RX_PAUSE:
-               fc = "RX";
-               break;
-       default:
-               fc = "None";
-               break;
+       if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+               struct flow_dissector_key_eth_addrs *key =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_ETH_ADDRS,
+                                                 f->key);
+               struct flow_dissector_key_eth_addrs *mask =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_ETH_ADDRS,
+                                                 f->mask);
+               /* use is_broadcast and is_zero to check for all 0xf or 0 */
+               if (!is_zero_ether_addr(mask->dst)) {
+                       if (is_broadcast_ether_addr(mask->dst)) {
+                               field_flags |= I40E_CLOUD_FIELD_OMAC;
+                       } else {
+                               dev_err(&pf->pdev->dev, "Bad ether dest mask %pM\n",
+                                       mask->dst);
+                               return I40E_ERR_CONFIG;
+                       }
+               }
+               if (!is_zero_ether_addr(mask->src)) {
+                       if (is_broadcast_ether_addr(mask->src)) {
+                               field_flags |= I40E_CLOUD_FIELD_IMAC;
+                       } else {
+                               dev_err(&pf->pdev->dev, "Bad ether src mask %pM\n",
+                                       mask->src);
+                               return I40E_ERR_CONFIG;
+                       }
+               }
+               ether_addr_copy(filter->dst_mac, key->dst);
+               ether_addr_copy(filter->src_mac, key->src);
        }
  
-       if (vsi->back->hw.phy.link_info.link_speed == I40E_LINK_SPEED_25GB) {
-               req_fec = ", Requested FEC: None";
-               fec = ", FEC: None";
-               an = ", Autoneg: False";
+       if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
+               struct flow_dissector_key_vlan *key =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_VLAN,
+                                                 f->key);
+               struct flow_dissector_key_vlan *mask =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_VLAN,
+                                                 f->mask);
  
-               if (vsi->back->hw.phy.link_info.an_info & I40E_AQ_AN_COMPLETED)
-                       an = ", Autoneg: True";
+               if (mask->vlan_id) {
+                       if (mask->vlan_id == VLAN_VID_MASK) {
+                               field_flags |= I40E_CLOUD_FIELD_IVLAN;
  
-               if (vsi->back->hw.phy.link_info.fec_info &
-                   I40E_AQ_CONFIG_FEC_KR_ENA)
-                       fec = ", FEC: CL74 FC-FEC/BASE-R";
-               else if (vsi->back->hw.phy.link_info.fec_info &
-                        I40E_AQ_CONFIG_FEC_RS_ENA)
-                       fec = ", FEC: CL108 RS-FEC";
+                       } else {
+                               dev_err(&pf->pdev->dev, "Bad vlan mask 0x%04x\n",
+                                       mask->vlan_id);
+                               return I40E_ERR_CONFIG;
+                       }
+               }
  
-               /* 'CL108 RS-FEC' should be displayed when RS is requested, or
-                * both RS and FC are requested
+               filter->vlan_id = cpu_to_be16(key->vlan_id);
+       }
+       if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
+               struct flow_dissector_key_control *key =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_CONTROL,
+                                                 f->key);
+               addr_type = key->addr_type;
+       }
+       if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+               struct flow_dissector_key_ipv4_addrs *key =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+                                                 f->key);
+               struct flow_dissector_key_ipv4_addrs *mask =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+                                                 f->mask);
+               if (mask->dst) {
+                       if (mask->dst == cpu_to_be32(0xffffffff)) {
+                               field_flags |= I40E_CLOUD_FIELD_IIP;
+                       } else {
+                               mask->dst = be32_to_cpu(mask->dst);
+                               dev_err(&pf->pdev->dev, "Bad ip dst mask %pI4\n",
+                                       &mask->dst);
+                               return I40E_ERR_CONFIG;
+                       }
+               }
+               if (mask->src) {
+                       if (mask->src == cpu_to_be32(0xffffffff)) {
+                               field_flags |= I40E_CLOUD_FIELD_IIP;
+                       } else {
+                               mask->src = be32_to_cpu(mask->src);
+                               dev_err(&pf->pdev->dev, "Bad ip src mask %pI4\n",
+                                       &mask->src);
+                               return I40E_ERR_CONFIG;
+                       }
+               }
+               if (field_flags & I40E_CLOUD_FIELD_TEN_ID) {
+                       dev_err(&pf->pdev->dev, "Tenant id not allowed for ip filter\n");
+                       return I40E_ERR_CONFIG;
+               }
+               filter->dst_ipv4 = key->dst;
+               filter->src_ipv4 = key->src;
+       }
+       if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+               struct flow_dissector_key_ipv6_addrs *key =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+                                                 f->key);
+               struct flow_dissector_key_ipv6_addrs *mask =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+                                                 f->mask);
+               /* src and dest IPV6 address should not be LOOPBACK
+                * (0:0:0:0:0:0:0:1), which can be represented as ::1
                 */
-               if (vsi->back->hw.phy.link_info.req_fec_info &
-                   (I40E_AQ_REQUEST_FEC_KR | I40E_AQ_REQUEST_FEC_RS)) {
-                       if (vsi->back->hw.phy.link_info.req_fec_info &
-                           I40E_AQ_REQUEST_FEC_RS)
-                               req_fec = ", Requested FEC: CL108 RS-FEC";
-                       else
-                               req_fec = ", Requested FEC: CL74 FC-FEC/BASE-R";
+               if (ipv6_addr_loopback(&key->dst) ||
+                   ipv6_addr_loopback(&key->src)) {
+                       dev_err(&pf->pdev->dev,
+                               "Bad ipv6, addr is LOOPBACK\n");
+                       return I40E_ERR_CONFIG;
+               }
+               if (!ipv6_addr_any(&mask->dst) || !ipv6_addr_any(&mask->src))
+                       field_flags |= I40E_CLOUD_FIELD_IIP;
+               memcpy(&filter->src_ipv6, &key->src.s6_addr32,
+                      sizeof(filter->src_ipv6));
+               memcpy(&filter->dst_ipv6, &key->dst.s6_addr32,
+                      sizeof(filter->dst_ipv6));
+       }
+       if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) {
+               struct flow_dissector_key_ports *key =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_PORTS,
+                                                 f->key);
+               struct flow_dissector_key_ports *mask =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_PORTS,
+                                                 f->mask);
+               if (mask->src) {
+                       if (mask->src == cpu_to_be16(0xffff)) {
+                               field_flags |= I40E_CLOUD_FIELD_IIP;
+                       } else {
+                               dev_err(&pf->pdev->dev, "Bad src port mask 0x%04x\n",
+                                       be16_to_cpu(mask->src));
+                               return I40E_ERR_CONFIG;
+                       }
+               }
+               if (mask->dst) {
+                       if (mask->dst == cpu_to_be16(0xffff)) {
+                               field_flags |= I40E_CLOUD_FIELD_IIP;
+                       } else {
+                               dev_err(&pf->pdev->dev, "Bad dst port mask 0x%04x\n",
+                                       be16_to_cpu(mask->dst));
+                               return I40E_ERR_CONFIG;
+                       }
+               }
+               filter->dst_port = key->dst;
+               filter->src_port = key->src;
+               switch (filter->ip_proto) {
+               case IPPROTO_TCP:
+               case IPPROTO_UDP:
+                       break;
+               default:
+                       dev_err(&pf->pdev->dev,
+                               "Only UDP and TCP transport are supported\n");
+                       return -EINVAL;
                }
        }
+       filter->flags = field_flags;
+       return 0;
+ }
  
-       netdev_info(vsi->netdev, "NIC Link is Up, %sbps Full Duplex%s%s%s, Flow Control: %s\n",
-                   speed, req_fec, fec, an, fc);
+ /**
+  * i40e_handle_tclass: Forward to a traffic class on the device
+  * @vsi: Pointer to VSI
+  * @tc: traffic class index on the device
+  * @filter: Pointer to cloud filter structure
+  *
+  **/
+ static int i40e_handle_tclass(struct i40e_vsi *vsi, u32 tc,
+                             struct i40e_cloud_filter *filter)
+ {
+       struct i40e_channel *ch, *ch_tmp;
+       /* direct to a traffic class on the same device */
+       if (tc == 0) {
+               filter->seid = vsi->seid;
+               return 0;
+       } else if (vsi->tc_config.enabled_tc & BIT(tc)) {
+               if (!filter->dst_port) {
+                       dev_err(&vsi->back->pdev->dev,
+                               "Specify destination port to direct to traffic class that is not default\n");
+                       return -EINVAL;
+               }
+               if (list_empty(&vsi->ch_list))
+                       return -EINVAL;
+               list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list,
+                                        list) {
+                       if (ch->seid == vsi->tc_seid_map[tc])
+                               filter->seid = ch->seid;
+               }
+               return 0;
+       }
+       dev_err(&vsi->back->pdev->dev, "TC is not enabled\n");
+       return -EINVAL;
  }
  
  /**
-  * i40e_up_complete - Finish the last steps of bringing up a connection
-  * @vsi: the VSI being configured
+  * i40e_configure_clsflower - Configure tc flower filters
+  * @vsi: Pointer to VSI
+  * @cls_flower: Pointer to struct tc_cls_flower_offload
+  *
   **/
- static int i40e_up_complete(struct i40e_vsi *vsi)
+ static int i40e_configure_clsflower(struct i40e_vsi *vsi,
+                                   struct tc_cls_flower_offload *cls_flower)
  {
+       int tc = tc_classid_to_hwtc(vsi->netdev, cls_flower->classid);
+       struct i40e_cloud_filter *filter = NULL;
        struct i40e_pf *pf = vsi->back;
-       int err;
+       int err = 0;
  
-       if (pf->flags & I40E_FLAG_MSIX_ENABLED)
-               i40e_vsi_configure_msix(vsi);
-       else
-               i40e_configure_msi_and_legacy(vsi);
+       if (tc < 0) {
+               dev_err(&vsi->back->pdev->dev, "Invalid traffic class\n");
+               return -EINVAL;
+       }
  
-       /* start rings */
-       err = i40e_vsi_start_rings(vsi);
-       if (err)
-               return err;
+       if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) ||
+           test_bit(__I40E_RESET_INTR_RECEIVED, pf->state))
+               return -EBUSY;
  
-       clear_bit(__I40E_VSI_DOWN, vsi->state);
-       i40e_napi_enable_all(vsi);
-       i40e_vsi_enable_irq(vsi);
+       if (pf->fdir_pf_active_filters ||
+           (!hlist_empty(&pf->fdir_filter_list))) {
+               dev_err(&vsi->back->pdev->dev,
+                       "Flow Director Sideband filters exists, turn ntuple off to configure cloud filters\n");
+               return -EINVAL;
+       }
  
-       if ((pf->hw.phy.link_info.link_info & I40E_AQ_LINK_UP) &&
-           (vsi->netdev)) {
-               i40e_print_link_message(vsi, true);
-               netif_tx_start_all_queues(vsi->netdev);
-               netif_carrier_on(vsi->netdev);
-       } else if (vsi->netdev) {
-               i40e_print_link_message(vsi, false);
-               /* need to check for qualified module here*/
-               if ((pf->hw.phy.link_info.link_info &
-                       I40E_AQ_MEDIA_AVAILABLE) &&
-                   (!(pf->hw.phy.link_info.an_info &
-                       I40E_AQ_QUALIFIED_MODULE)))
-                       netdev_err(vsi->netdev,
-                                  "the driver failed to link because an unqualified module was detected.");
+       if (vsi->back->flags & I40E_FLAG_FD_SB_ENABLED) {
+               dev_err(&vsi->back->pdev->dev,
+                       "Disable Flow Director Sideband, configuring Cloud filters via tc-flower\n");
+               vsi->back->flags &= ~I40E_FLAG_FD_SB_ENABLED;
+               vsi->back->flags |= I40E_FLAG_FD_SB_TO_CLOUD_FILTER;
        }
  
-       /* replay FDIR SB filters */
-       if (vsi->type == I40E_VSI_FDIR) {
-               /* reset fd counters */
-               pf->fd_add_err = 0;
-               pf->fd_atr_cnt = 0;
-               i40e_fdir_filter_restore(vsi);
+       filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+       if (!filter)
+               return -ENOMEM;
+       filter->cookie = cls_flower->cookie;
+       err = i40e_parse_cls_flower(vsi, cls_flower, filter);
+       if (err < 0)
+               goto err;
+       err = i40e_handle_tclass(vsi, tc, filter);
+       if (err < 0)
+               goto err;
+       /* Add cloud filter */
+       if (filter->dst_port)
+               err = i40e_add_del_cloud_filter_big_buf(vsi, filter, true);
+       else
+               err = i40e_add_del_cloud_filter(vsi, filter, true);
+       if (err) {
+               dev_err(&pf->pdev->dev,
+                       "Failed to add cloud filter, err %s\n",
+                       i40e_stat_str(&pf->hw, err));
+               err = i40e_aq_rc_to_posix(err, pf->hw.aq.asq_last_status);
+               goto err;
        }
  
-       /* On the next run of the service_task, notify any clients of the new
-        * opened netdev
-        */
-       pf->flags |= I40E_FLAG_SERVICE_CLIENT_REQUESTED;
-       i40e_service_event_schedule(pf);
+       /* add filter to the ordered list */
+       INIT_HLIST_NODE(&filter->cloud_node);
  
-       return 0;
+       hlist_add_head(&filter->cloud_node, &pf->cloud_filter_list);
+       pf->num_cloud_filters++;
+       return err;
+ err:
+       kfree(filter);
+       return err;
  }
  
  /**
-  * i40e_vsi_reinit_locked - Reset the VSI
-  * @vsi: the VSI being configured
+  * i40e_find_cloud_filter - Find the could filter in the list
+  * @vsi: Pointer to VSI
+  * @cookie: filter specific cookie
   *
-  * Rebuild the ring structs after some configuration
-  * has changed, e.g. MTU size.
   **/
- static void i40e_vsi_reinit_locked(struct i40e_vsi *vsi)
+ static struct i40e_cloud_filter *i40e_find_cloud_filter(struct i40e_vsi *vsi,
+                                                       unsigned long *cookie)
  {
-       struct i40e_pf *pf = vsi->back;
-       WARN_ON(in_interrupt());
-       while (test_and_set_bit(__I40E_CONFIG_BUSY, pf->state))
-               usleep_range(1000, 2000);
-       i40e_down(vsi);
+       struct i40e_cloud_filter *filter = NULL;
+       struct hlist_node *node2;
  
-       i40e_up(vsi);
-       clear_bit(__I40E_CONFIG_BUSY, pf->state);
+       hlist_for_each_entry_safe(filter, node2,
+                                 &vsi->back->cloud_filter_list, cloud_node)
+               if (!memcmp(cookie, &filter->cookie, sizeof(filter->cookie)))
+                       return filter;
+       return NULL;
  }
  
  /**
-  * i40e_up - Bring the connection back up after being down
-  * @vsi: the VSI being configured
+  * i40e_delete_clsflower - Remove tc flower filters
+  * @vsi: Pointer to VSI
+  * @cls_flower: Pointer to struct tc_cls_flower_offload
+  *
   **/
- int i40e_up(struct i40e_vsi *vsi)
+ static int i40e_delete_clsflower(struct i40e_vsi *vsi,
+                                struct tc_cls_flower_offload *cls_flower)
  {
-       int err;
+       struct i40e_cloud_filter *filter = NULL;
+       struct i40e_pf *pf = vsi->back;
+       int err = 0;
  
-       err = i40e_vsi_configure(vsi);
-       if (!err)
-               err = i40e_up_complete(vsi);
+       filter = i40e_find_cloud_filter(vsi, &cls_flower->cookie);
  
-       return err;
- }
+       if (!filter)
+               return -EINVAL;
  
- /**
-  * i40e_down - Shutdown the connection processing
-  * @vsi: the VSI being stopped
-  **/
- void i40e_down(struct i40e_vsi *vsi)
- {
-       int i;
+       hash_del(&filter->cloud_node);
  
-       /* It is assumed that the caller of this function
-        * sets the vsi->state __I40E_VSI_DOWN bit.
-        */
-       if (vsi->netdev) {
-               netif_carrier_off(vsi->netdev);
-               netif_tx_disable(vsi->netdev);
-       }
-       i40e_vsi_disable_irq(vsi);
-       i40e_vsi_stop_rings(vsi);
-       i40e_napi_disable_all(vsi);
+       if (filter->dst_port)
+               err = i40e_add_del_cloud_filter_big_buf(vsi, filter, false);
+       else
+               err = i40e_add_del_cloud_filter(vsi, filter, false);
  
-       for (i = 0; i < vsi->num_queue_pairs; i++) {
-               i40e_clean_tx_ring(vsi->tx_rings[i]);
-               if (i40e_enabled_xdp_vsi(vsi))
-                       i40e_clean_tx_ring(vsi->xdp_rings[i]);
-               i40e_clean_rx_ring(vsi->rx_rings[i]);
+       kfree(filter);
+       if (err) {
+               dev_err(&pf->pdev->dev,
+                       "Failed to delete cloud filter, err %s\n",
+                       i40e_stat_str(&pf->hw, err));
+               return i40e_aq_rc_to_posix(err, pf->hw.aq.asq_last_status);
        }
  
+       pf->num_cloud_filters--;
+       if (!pf->num_cloud_filters)
+               if ((pf->flags & I40E_FLAG_FD_SB_TO_CLOUD_FILTER) &&
+                   !(pf->flags & I40E_FLAG_FD_SB_INACTIVE)) {
+                       pf->flags |= I40E_FLAG_FD_SB_ENABLED;
+                       pf->flags &= ~I40E_FLAG_FD_SB_TO_CLOUD_FILTER;
+                       pf->flags &= ~I40E_FLAG_FD_SB_INACTIVE;
+               }
+       return 0;
  }
  
  /**
-  * i40e_setup_tc - configure multiple traffic classes
+  * i40e_setup_tc_cls_flower - flower classifier offloads
   * @netdev: net device to configure
-  * @tc: number of traffic classes to enable
+  * @type_data: offload data
   **/
- static int i40e_setup_tc(struct net_device *netdev, u8 tc)
+ static int i40e_setup_tc_cls_flower(struct i40e_netdev_priv *np,
+                                   struct tc_cls_flower_offload *cls_flower)
  {
-       struct i40e_netdev_priv *np = netdev_priv(netdev);
        struct i40e_vsi *vsi = np->vsi;
-       struct i40e_pf *pf = vsi->back;
-       u8 enabled_tc = 0;
-       int ret = -EINVAL;
-       int i;
-       /* Check if DCB enabled to continue */
-       if (!(pf->flags & I40E_FLAG_DCB_ENABLED)) {
-               netdev_info(netdev, "DCB is not enabled for adapter\n");
-               goto exit;
-       }
  
-       /* Check if MFP enabled */
-       if (pf->flags & I40E_FLAG_MFP_ENABLED) {
-               netdev_info(netdev, "Configuring TC not supported in MFP mode\n");
-               goto exit;
-       }
+       if (cls_flower->common.chain_index)
+               return -EOPNOTSUPP;
  
-       /* Check whether tc count is within enabled limit */
-       if (tc > i40e_pf_get_num_tc(pf)) {
-               netdev_info(netdev, "TC count greater than enabled on link for adapter\n");
-               goto exit;
+       switch (cls_flower->command) {
+       case TC_CLSFLOWER_REPLACE:
+               return i40e_configure_clsflower(vsi, cls_flower);
+       case TC_CLSFLOWER_DESTROY:
+               return i40e_delete_clsflower(vsi, cls_flower);
+       case TC_CLSFLOWER_STATS:
+               return -EOPNOTSUPP;
+       default:
+               return -EINVAL;
        }
+ }
  
-       /* Generate TC map for number of tc requested */
-       for (i = 0; i < tc; i++)
-               enabled_tc |= BIT(i);
-       /* Requesting same TC configuration as already enabled */
-       if (enabled_tc == vsi->tc_config.enabled_tc)
-               return 0;
+ static int i40e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+                                 void *cb_priv)
+ {
+       struct i40e_netdev_priv *np = cb_priv;
  
-       /* Quiesce VSI queues */
-       i40e_quiesce_vsi(vsi);
+       switch (type) {
+       case TC_SETUP_CLSFLOWER:
+               return i40e_setup_tc_cls_flower(np, type_data);
  
-       /* Configure VSI for enabled TCs */
-       ret = i40e_vsi_config_tc(vsi, enabled_tc);
-       if (ret) {
-               netdev_info(netdev, "Failed configuring TC for VSI seid=%d\n",
-                           vsi->seid);
-               goto exit;
+       default:
+               return -EOPNOTSUPP;
        }
+ }
  
-       /* Unquiesce VSI */
-       i40e_unquiesce_vsi(vsi);
+ static int i40e_setup_tc_block(struct net_device *dev,
+                              struct tc_block_offload *f)
+ {
+       struct i40e_netdev_priv *np = netdev_priv(dev);
  
- exit:
-       return ret;
+       if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+               return -EOPNOTSUPP;
+       switch (f->command) {
+       case TC_BLOCK_BIND:
+               return tcf_block_cb_register(f->block, i40e_setup_tc_block_cb,
+                                            np, np);
+       case TC_BLOCK_UNBIND:
+               tcf_block_cb_unregister(f->block, i40e_setup_tc_block_cb, np);
+               return 0;
+       default:
+               return -EOPNOTSUPP;
+       }
  }
  
  static int __i40e_setup_tc(struct net_device *netdev, enum tc_setup_type type,
                           void *type_data)
  {
-       struct tc_mqprio_qopt *mqprio = type_data;
-       if (type != TC_SETUP_MQPRIO)
+       switch (type) {
+       case TC_SETUP_QDISC_MQPRIO:
+               return i40e_setup_tc(netdev, type_data);
+       case TC_SETUP_BLOCK:
+               return i40e_setup_tc_block(netdev, type_data);
+       default:
                return -EOPNOTSUPP;
-       mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
-       return i40e_setup_tc(netdev, mqprio->num_tc);
+       }
  }
  
  /**
@@@ -5747,7 -7674,7 +7674,7 @@@ err_setup_rx
  err_setup_tx:
        i40e_vsi_free_tx_resources(vsi);
        if (vsi == pf->vsi[pf->lan_vsi])
-               i40e_do_reset(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED), true);
+               i40e_do_reset(pf, I40E_PF_RESET_FLAG, true);
  
        return err;
  }
@@@ -5809,6 -7736,33 +7736,33 @@@ static void i40e_fdir_filter_exit(struc
                                I40E_L3_SRC_MASK | I40E_L3_DST_MASK);
  }
  
+ /**
+  * i40e_cloud_filter_exit - Cleans up the cloud filters
+  * @pf: Pointer to PF
+  *
+  * This function destroys the hlist where all the cloud filters
+  * were saved.
+  **/
+ static void i40e_cloud_filter_exit(struct i40e_pf *pf)
+ {
+       struct i40e_cloud_filter *cfilter;
+       struct hlist_node *node;
+       hlist_for_each_entry_safe(cfilter, node,
+                                 &pf->cloud_filter_list, cloud_node) {
+               hlist_del(&cfilter->cloud_node);
+               kfree(cfilter);
+       }
+       pf->num_cloud_filters = 0;
+       if ((pf->flags & I40E_FLAG_FD_SB_TO_CLOUD_FILTER) &&
+           !(pf->flags & I40E_FLAG_FD_SB_INACTIVE)) {
+               pf->flags |= I40E_FLAG_FD_SB_ENABLED;
+               pf->flags &= ~I40E_FLAG_FD_SB_TO_CLOUD_FILTER;
+               pf->flags &= ~I40E_FLAG_FD_SB_INACTIVE;
+       }
+ }
  /**
   * i40e_close - Disables a network interface
   * @netdev: network interface device structure
@@@ -5875,7 -7829,7 +7829,7 @@@ void i40e_do_reset(struct i40e_pf *pf, 
                wr32(&pf->hw, I40E_GLGEN_RTRIG, val);
                i40e_flush(&pf->hw);
  
-       } else if (reset_flags & BIT_ULL(__I40E_PF_RESET_REQUESTED)) {
+       } else if (reset_flags & I40E_PF_RESET_FLAG) {
  
                /* Request a PF Reset
                 *
@@@ -6226,6 -8180,7 +8180,7 @@@ void i40e_fdir_check_and_reenable(struc
                                hlist_del(&filter->fdir_node);
                                kfree(filter);
                                pf->fdir_pf_active_filters--;
+                               pf->fd_inv = 0;
                        }
                }
        }
@@@ -6429,8 -8384,7 +8384,7 @@@ static void i40e_link_event(struct i40e
             new_link == netif_carrier_ok(vsi->netdev)))
                return;
  
-       if (!test_bit(__I40E_VSI_DOWN, vsi->state))
-               i40e_print_link_message(vsi, new_link);
+       i40e_print_link_message(vsi, new_link);
  
        /* Notify the base of the switch tree connected to
         * the link.  Floating VEBs are not notified.
@@@ -6553,12 -8507,26 +8507,26 @@@ static void i40e_handle_link_event(stru
         */
        i40e_link_event(pf);
  
-       /* check for unqualified module, if link is down */
-       if ((status->link_info & I40E_AQ_MEDIA_AVAILABLE) &&
-           (!(status->an_info & I40E_AQ_QUALIFIED_MODULE)) &&
-           (!(status->link_info & I40E_AQ_LINK_UP)))
+       /* Check if module meets thermal requirements */
+       if (status->phy_type == I40E_PHY_TYPE_NOT_SUPPORTED_HIGH_TEMP) {
                dev_err(&pf->pdev->dev,
-                       "The driver failed to link because an unqualified module was detected.\n");
+                       "Rx/Tx is disabled on this device because the module does not meet thermal requirements.\n");
+               dev_err(&pf->pdev->dev,
+                       "Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n");
+       } else {
+               /* check for unqualified module, if link is down, suppress
+                * the message if link was forced to be down.
+                */
+               if ((status->link_info & I40E_AQ_MEDIA_AVAILABLE) &&
+                   (!(status->an_info & I40E_AQ_QUALIFIED_MODULE)) &&
+                   (!(status->link_info & I40E_AQ_LINK_UP)) &&
+                   (!(pf->flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED))) {
+                       dev_err(&pf->pdev->dev,
+                               "Rx/Tx is disabled on this device because an unsupported SFP module type was detected.\n");
+                       dev_err(&pf->pdev->dev,
+                               "Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n");
+               }
+       }
  }
  
  /**
@@@ -6900,7 -8868,8 +8868,8 @@@ end_reconstitute
   * i40e_get_capabilities - get info about the HW
   * @pf: the PF struct
   **/
- static int i40e_get_capabilities(struct i40e_pf *pf)
+ static int i40e_get_capabilities(struct i40e_pf *pf,
+                                enum i40e_admin_queue_opc list_type)
  {
        struct i40e_aqc_list_capabilities_element_resp *cap_buf;
        u16 data_size;
  
                /* this loads the data into the hw struct for us */
                err = i40e_aq_discover_capabilities(&pf->hw, cap_buf, buf_len,
-                                           &data_size,
-                                           i40e_aqc_opc_list_func_capabilities,
-                                           NULL);
+                                                   &data_size, list_type,
+                                                   NULL);
                /* data loaded, buffer no longer needed */
                kfree(cap_buf);
  
                }
        } while (err);
  
-       if (pf->hw.debug_mask & I40E_DEBUG_USER)
-               dev_info(&pf->pdev->dev,
-                        "pf=%d, num_vfs=%d, msix_pf=%d, msix_vf=%d, fd_g=%d, fd_b=%d, pf_max_q=%d num_vsi=%d\n",
-                        pf->hw.pf_id, pf->hw.func_caps.num_vfs,
-                        pf->hw.func_caps.num_msix_vectors,
-                        pf->hw.func_caps.num_msix_vectors_vf,
-                        pf->hw.func_caps.fd_filters_guaranteed,
-                        pf->hw.func_caps.fd_filters_best_effort,
-                        pf->hw.func_caps.num_tx_qp,
-                        pf->hw.func_caps.num_vsis);
+       if (pf->hw.debug_mask & I40E_DEBUG_USER) {
+               if (list_type == i40e_aqc_opc_list_func_capabilities) {
+                       dev_info(&pf->pdev->dev,
+                                "pf=%d, num_vfs=%d, msix_pf=%d, msix_vf=%d, fd_g=%d, fd_b=%d, pf_max_q=%d num_vsi=%d\n",
+                                pf->hw.pf_id, pf->hw.func_caps.num_vfs,
+                                pf->hw.func_caps.num_msix_vectors,
+                                pf->hw.func_caps.num_msix_vectors_vf,
+                                pf->hw.func_caps.fd_filters_guaranteed,
+                                pf->hw.func_caps.fd_filters_best_effort,
+                                pf->hw.func_caps.num_tx_qp,
+                                pf->hw.func_caps.num_vsis);
+               } else if (list_type == i40e_aqc_opc_list_dev_capabilities) {
+                       dev_info(&pf->pdev->dev,
+                                "switch_mode=0x%04x, function_valid=0x%08x\n",
+                                pf->hw.dev_caps.switch_mode,
+                                pf->hw.dev_caps.valid_functions);
+                       dev_info(&pf->pdev->dev,
+                                "SR-IOV=%d, num_vfs for all function=%u\n",
+                                pf->hw.dev_caps.sr_iov_1_1,
+                                pf->hw.dev_caps.num_vfs);
+                       dev_info(&pf->pdev->dev,
+                                "num_vsis=%u, num_rx:%u, num_tx=%u\n",
+                                pf->hw.dev_caps.num_vsis,
+                                pf->hw.dev_caps.num_rx_qp,
+                                pf->hw.dev_caps.num_tx_qp);
+               }
+       }
+       if (list_type == i40e_aqc_opc_list_func_capabilities) {
  #define DEF_NUM_VSI (1 + (pf->hw.func_caps.fcoe ? 1 : 0) \
                       + pf->hw.func_caps.num_vfs)
-       if (pf->hw.revision_id == 0 && (DEF_NUM_VSI > pf->hw.func_caps.num_vsis)) {
-               dev_info(&pf->pdev->dev,
-                        "got num_vsis %d, setting num_vsis to %d\n",
-                        pf->hw.func_caps.num_vsis, DEF_NUM_VSI);
-               pf->hw.func_caps.num_vsis = DEF_NUM_VSI;
+               if (pf->hw.revision_id == 0 &&
+                   pf->hw.func_caps.num_vsis < DEF_NUM_VSI) {
+                       dev_info(&pf->pdev->dev,
+                                "got num_vsis %d, setting num_vsis to %d\n",
+                                pf->hw.func_caps.num_vsis, DEF_NUM_VSI);
+                       pf->hw.func_caps.num_vsis = DEF_NUM_VSI;
+               }
        }
        return 0;
  }
  
@@@ -6985,35 -8971,125 +8971,125 @@@ static void i40e_fdir_sb_setup(struct i
        if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED))
                return;
  
-       /* find existing VSI and see if it needs configuring */
-       vsi = i40e_find_vsi_by_type(pf, I40E_VSI_FDIR);
+       /* find existing VSI and see if it needs configuring */
+       vsi = i40e_find_vsi_by_type(pf, I40E_VSI_FDIR);
+       /* create a new VSI if none exists */
+       if (!vsi) {
+               vsi = i40e_vsi_setup(pf, I40E_VSI_FDIR,
+                                    pf->vsi[pf->lan_vsi]->seid, 0);
+               if (!vsi) {
+                       dev_info(&pf->pdev->dev, "Couldn't create FDir VSI\n");
+                       pf->flags &= ~I40E_FLAG_FD_SB_ENABLED;
+                       pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
+                       return;
+               }
+       }
+       i40e_vsi_setup_irqhandler(vsi, i40e_fdir_clean_ring);
+ }
+ /**
+  * i40e_fdir_teardown - release the Flow Director resources
+  * @pf: board private structure
+  **/
+ static void i40e_fdir_teardown(struct i40e_pf *pf)
+ {
+       struct i40e_vsi *vsi;
+       i40e_fdir_filter_exit(pf);
+       vsi = i40e_find_vsi_by_type(pf, I40E_VSI_FDIR);
+       if (vsi)
+               i40e_vsi_release(vsi);
+ }
+ /**
+  * i40e_rebuild_cloud_filters - Rebuilds cloud filters for VSIs
+  * @vsi: PF main vsi
+  * @seid: seid of main or channel VSIs
+  *
+  * Rebuilds cloud filters associated with main VSI and channel VSIs if they
+  * existed before reset
+  **/
+ static int i40e_rebuild_cloud_filters(struct i40e_vsi *vsi, u16 seid)
+ {
+       struct i40e_cloud_filter *cfilter;
+       struct i40e_pf *pf = vsi->back;
+       struct hlist_node *node;
+       i40e_status ret;
+       /* Add cloud filters back if they exist */
+       hlist_for_each_entry_safe(cfilter, node, &pf->cloud_filter_list,
+                                 cloud_node) {
+               if (cfilter->seid != seid)
+                       continue;
+               if (cfilter->dst_port)
+                       ret = i40e_add_del_cloud_filter_big_buf(vsi, cfilter,
+                                                               true);
+               else
+                       ret = i40e_add_del_cloud_filter(vsi, cfilter, true);
  
-       /* create a new VSI if none exists */
-       if (!vsi) {
-               vsi = i40e_vsi_setup(pf, I40E_VSI_FDIR,
-                                    pf->vsi[pf->lan_vsi]->seid, 0);
-               if (!vsi) {
-                       dev_info(&pf->pdev->dev, "Couldn't create FDir VSI\n");
-                       pf->flags &= ~I40E_FLAG_FD_SB_ENABLED;
-                       return;
+               if (ret) {
+                       dev_dbg(&pf->pdev->dev,
+                               "Failed to rebuild cloud filter, err %s aq_err %s\n",
+                               i40e_stat_str(&pf->hw, ret),
+                               i40e_aq_str(&pf->hw,
+                                           pf->hw.aq.asq_last_status));
+                       return ret;
                }
        }
-       i40e_vsi_setup_irqhandler(vsi, i40e_fdir_clean_ring);
+       return 0;
  }
  
  /**
-  * i40e_fdir_teardown - release the Flow Director resources
-  * @pf: board private structure
+  * i40e_rebuild_channels - Rebuilds channel VSIs if they existed before reset
+  * @vsi: PF main vsi
+  *
+  * Rebuilds channel VSIs if they existed before reset
   **/
- static void i40e_fdir_teardown(struct i40e_pf *pf)
+ static int i40e_rebuild_channels(struct i40e_vsi *vsi)
  {
-       struct i40e_vsi *vsi;
+       struct i40e_channel *ch, *ch_tmp;
+       i40e_status ret;
  
-       i40e_fdir_filter_exit(pf);
-       vsi = i40e_find_vsi_by_type(pf, I40E_VSI_FDIR);
-       if (vsi)
-               i40e_vsi_release(vsi);
+       if (list_empty(&vsi->ch_list))
+               return 0;
+       list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) {
+               if (!ch->initialized)
+                       break;
+               /* Proceed with creation of channel (VMDq2) VSI */
+               ret = i40e_add_channel(vsi->back, vsi->uplink_seid, ch);
+               if (ret) {
+                       dev_info(&vsi->back->pdev->dev,
+                                "failed to rebuild channels using uplink_seid %u\n",
+                                vsi->uplink_seid);
+                       return ret;
+               }
+               if (ch->max_tx_rate) {
+                       u64 credits = ch->max_tx_rate;
+                       if (i40e_set_bw_limit(vsi, ch->seid,
+                                             ch->max_tx_rate))
+                               return -EINVAL;
+                       do_div(credits, I40E_BW_CREDIT_DIVISOR);
+                       dev_dbg(&vsi->back->pdev->dev,
+                               "Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n",
+                               ch->max_tx_rate,
+                               credits,
+                               ch->seid);
+               }
+               ret = i40e_rebuild_cloud_filters(vsi, ch->seid);
+               if (ret) {
+                       dev_dbg(&vsi->back->pdev->dev,
+                               "Failed to rebuild cloud filters for channel VSI %u\n",
+                               ch->seid);
+                       return ret;
+               }
+       }
+       return 0;
  }
  
  /**
@@@ -7152,6 -9228,7 +9228,7 @@@ static int i40e_reset(struct i40e_pf *p
   **/
  static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
  {
+       struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
        struct i40e_hw *hw = &pf->hw;
        u8 set_fc_aq_fail = 0;
        i40e_status ret;
                i40e_verify_eeprom(pf);
  
        i40e_clear_pxe_mode(hw);
-       ret = i40e_get_capabilities(pf);
+       ret = i40e_get_capabilities(pf, i40e_aqc_opc_list_func_capabilities);
        if (ret)
                goto end_core_reset;
  
         * If there were VEBs but the reconstitution failed, we'll try
         * try to recover minimal use by getting the basic PF VSI working.
         */
-       if (pf->vsi[pf->lan_vsi]->uplink_seid != pf->mac_seid) {
+       if (vsi->uplink_seid != pf->mac_seid) {
                dev_dbg(&pf->pdev->dev, "attempting to rebuild switch\n");
                /* find the one VEB connected to the MAC, and find orphans */
                for (v = 0; v < I40E_MAX_VEB; v++) {
                                        dev_info(&pf->pdev->dev,
                                                 "rebuild of switch failed: %d, will try to set up simple PF connection\n",
                                                 ret);
-                                       pf->vsi[pf->lan_vsi]->uplink_seid
-                                                               = pf->mac_seid;
+                                       vsi->uplink_seid = pf->mac_seid;
                                        break;
                                } else if (pf->veb[v]->uplink_seid == 0) {
                                        dev_info(&pf->pdev->dev,
                }
        }
  
-       if (pf->vsi[pf->lan_vsi]->uplink_seid == pf->mac_seid) {
+       if (vsi->uplink_seid == pf->mac_seid) {
                dev_dbg(&pf->pdev->dev, "attempting to rebuild PF VSI\n");
                /* no VEB, so rebuild only the Main VSI */
-               ret = i40e_add_vsi(pf->vsi[pf->lan_vsi]);
+               ret = i40e_add_vsi(vsi);
                if (ret) {
                        dev_info(&pf->pdev->dev,
                                 "rebuild of Main VSI failed: %d\n", ret);
                }
        }
  
+       if (vsi->mqprio_qopt.max_rate[0]) {
+               u64 max_tx_rate = vsi->mqprio_qopt.max_rate[0];
+               u64 credits = 0;
+               do_div(max_tx_rate, I40E_BW_MBPS_DIVISOR);
+               ret = i40e_set_bw_limit(vsi, vsi->seid, max_tx_rate);
+               if (ret)
+                       goto end_unlock;
+               credits = max_tx_rate;
+               do_div(credits, I40E_BW_CREDIT_DIVISOR);
+               dev_dbg(&vsi->back->pdev->dev,
+                       "Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n",
+                       max_tx_rate,
+                       credits,
+                       vsi->seid);
+       }
+       ret = i40e_rebuild_cloud_filters(vsi, vsi->seid);
+       if (ret)
+               goto end_unlock;
+       /* PF Main VSI is rebuild by now, go ahead and rebuild channel VSIs
+        * for this main VSI if they exist
+        */
+       ret = i40e_rebuild_channels(vsi);
+       if (ret)
+               goto end_unlock;
        /* Reconfigure hardware for allowing smaller MSS in the case
         * of TSO, so that we avoid the MDD being fired and causing
         * a reset in the case of small MSS+TSO.
@@@ -7615,9 -9720,9 +9720,9 @@@ static void i40e_service_task(struct wo
   * i40e_service_timer - timer callback
   * @data: pointer to PF struct
   **/
- static void i40e_service_timer(unsigned long data)
+ static void i40e_service_timer(struct timer_list *t)
  {
-       struct i40e_pf *pf = (struct i40e_pf *)data;
+       struct i40e_pf *pf = from_timer(pf, t, service_timer);
  
        mod_timer(&pf->service_timer,
                  round_jiffies(jiffies + pf->service_timer_period));
@@@ -7674,7 -9779,7 +9779,7 @@@ static int i40e_set_num_rings_in_vsi(st
  
  /**
   * i40e_vsi_alloc_arrays - Allocate queue and vector pointer arrays for the vsi
-  * @type: VSI pointer
+  * @vsi: VSI pointer
   * @alloc_qvectors: a bool to specify if q_vectors need to be allocated.
   *
   * On error: returns error code (negative)
@@@ -8139,7 -10244,7 +10244,7 @@@ static int i40e_init_msix(struct i40e_p
                pf->num_lan_qps = 1;
                pf->num_lan_msix = 1;
  
-       } else if (!vectors_left) {
+       } else if (v_actual != v_budget) {
                /* If we have limited resources, we will start with no vectors
                 * for the special features and then allocate vectors to some
                 * of these features based on the policy and at the end disable
                int vec;
  
                dev_info(&pf->pdev->dev,
-                        "MSI-X vector limit reached, attempting to redistribute vectors\n");
+                        "MSI-X vector limit reached with %d, wanted %d, attempting to redistribute vectors\n",
+                        v_actual, v_budget);
                /* reserve the misc vector */
                vec = v_actual - 1;
  
            (pf->num_fdsb_msix == 0)) {
                dev_info(&pf->pdev->dev, "Sideband Flowdir disabled, not enough MSI-X vectors\n");
                pf->flags &= ~I40E_FLAG_FD_SB_ENABLED;
+               pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
        }
        if ((pf->flags & I40E_FLAG_VMDQ_ENABLED) &&
            (pf->num_vmdq_msix == 0)) {
@@@ -8313,6 -10420,7 +10420,7 @@@ static int i40e_init_interrupt_scheme(s
                                       I40E_FLAG_FD_SB_ENABLED  |
                                       I40E_FLAG_FD_ATR_ENABLED |
                                       I40E_FLAG_VMDQ_ENABLED);
+                       pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
  
                        /* rework the queue expectations without MSIX */
                        i40e_determine_queue_usage(pf);
        return 0;
  }
  
+ /**
+  * i40e_restore_interrupt_scheme - Restore the interrupt scheme
+  * @pf: private board data structure
+  *
+  * Restore the interrupt scheme that was cleared when we suspended the
+  * device. This should be called during resume to re-allocate the q_vectors
+  * and reacquire IRQs.
+  */
+ static int i40e_restore_interrupt_scheme(struct i40e_pf *pf)
+ {
+       int err, i;
+       /* We cleared the MSI and MSI-X flags when disabling the old interrupt
+        * scheme. We need to re-enabled them here in order to attempt to
+        * re-acquire the MSI or MSI-X vectors
+        */
+       pf->flags |= (I40E_FLAG_MSIX_ENABLED | I40E_FLAG_MSI_ENABLED);
+       err = i40e_init_interrupt_scheme(pf);
+       if (err)
+               return err;
+       /* Now that we've re-acquired IRQs, we need to remap the vectors and
+        * rings together again.
+        */
+       for (i = 0; i < pf->num_alloc_vsi; i++) {
+               if (pf->vsi[i]) {
+                       err = i40e_vsi_alloc_q_vectors(pf->vsi[i]);
+                       if (err)
+                               goto err_unwind;
+                       i40e_vsi_map_rings_to_vectors(pf->vsi[i]);
+               }
+       }
+       err = i40e_setup_misc_vector(pf);
+       if (err)
+               goto err_unwind;
+       return 0;
+ err_unwind:
+       while (i--) {
+               if (pf->vsi[i])
+                       i40e_vsi_free_q_vectors(pf->vsi[i]);
+       }
+       return err;
+ }
  /**
   * i40e_setup_misc_vector - Setup the misc vector to handle non queue events
   * @pf: board private structure
@@@ -8363,13 -10520,12 +10520,12 @@@ static int i40e_setup_misc_vector(struc
        struct i40e_hw *hw = &pf->hw;
        int err = 0;
  
-       /* Only request the irq if this is the first time through, and
-        * not when we're rebuilding after a Reset
-        */
-       if (!test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) {
+       /* Only request the IRQ once, the first time through. */
+       if (!test_and_set_bit(__I40E_MISC_IRQ_REQUESTED, pf->state)) {
                err = request_irq(pf->msix_entries[0].vector,
                                  i40e_intr, 0, pf->int_name, pf);
                if (err) {
+                       clear_bit(__I40E_MISC_IRQ_REQUESTED, pf->state);
                        dev_info(&pf->pdev->dev,
                                 "request_irq for %s failed: %d\n",
                                 pf->int_name, err);
  
        i40e_flush(hw);
  
-       i40e_irq_dynamic_enable_icr0(pf, true);
+       i40e_irq_dynamic_enable_icr0(pf);
  
        return err;
  }
  
- /**
-  * i40e_config_rss_aq - Prepare for RSS using AQ commands
-  * @vsi: vsi structure
-  * @seed: RSS hash seed
-  **/
- static int i40e_config_rss_aq(struct i40e_vsi *vsi, const u8 *seed,
-                             u8 *lut, u16 lut_size)
- {
-       struct i40e_pf *pf = vsi->back;
-       struct i40e_hw *hw = &pf->hw;
-       int ret = 0;
-       if (seed) {
-               struct i40e_aqc_get_set_rss_key_data *seed_dw =
-                       (struct i40e_aqc_get_set_rss_key_data *)seed;
-               ret = i40e_aq_set_rss_key(hw, vsi->id, seed_dw);
-               if (ret) {
-                       dev_info(&pf->pdev->dev,
-                                "Cannot set RSS key, err %s aq_err %s\n",
-                                i40e_stat_str(hw, ret),
-                                i40e_aq_str(hw, hw->aq.asq_last_status));
-                       return ret;
-               }
-       }
-       if (lut) {
-               bool pf_lut = vsi->type == I40E_VSI_MAIN ? true : false;
-               ret = i40e_aq_set_rss_lut(hw, vsi->id, pf_lut, lut, lut_size);
-               if (ret) {
-                       dev_info(&pf->pdev->dev,
-                                "Cannot set RSS lut, err %s aq_err %s\n",
-                                i40e_stat_str(hw, ret),
-                                i40e_aq_str(hw, hw->aq.asq_last_status));
-                       return ret;
-               }
-       }
-       return ret;
- }
  /**
   * i40e_get_rss_aq - Get RSS keys and lut by using AQ commands
   * @vsi: Pointer to vsi structure
@@@ -8475,46 -10592,6 +10592,6 @@@ static int i40e_get_rss_aq(struct i40e_
        return ret;
  }
  
- /**
-  * i40e_vsi_config_rss - Prepare for VSI(VMDq) RSS if used
-  * @vsi: VSI structure
-  **/
- static int i40e_vsi_config_rss(struct i40e_vsi *vsi)
- {
-       u8 seed[I40E_HKEY_ARRAY_SIZE];
-       struct i40e_pf *pf = vsi->back;
-       u8 *lut;
-       int ret;
-       if (!(pf->hw_features & I40E_HW_RSS_AQ_CAPABLE))
-               return 0;
-       if (!vsi->rss_size)
-               vsi->rss_size = min_t(int, pf->alloc_rss_size,
-                                     vsi->num_queue_pairs);
-       if (!vsi->rss_size)
-               return -EINVAL;
-       lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
-       if (!lut)
-               return -ENOMEM;
-       /* Use the user configured hash keys and lookup table if there is one,
-        * otherwise use default
-        */
-       if (vsi->rss_lut_user)
-               memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size);
-       else
-               i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size);
-       if (vsi->rss_hkey_user)
-               memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE);
-       else
-               netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE);
-       ret = i40e_config_rss_aq(vsi, seed, lut, vsi->rss_table_size);
-       kfree(lut);
-       return ret;
- }
  /**
   * i40e_config_rss_reg - Configure RSS keys and lut by writing registers
   * @vsi: Pointer to vsi structure
@@@ -8913,8 -10990,8 +10990,8 @@@ static int i40e_sw_init(struct i40e_pf 
                    I40E_FLAG_MSIX_ENABLED;
  
        /* Set default ITR */
-       pf->rx_itr_default = I40E_ITR_DYNAMIC | I40E_ITR_RX_DEF;
-       pf->tx_itr_default = I40E_ITR_DYNAMIC | I40E_ITR_TX_DEF;
+       pf->rx_itr_default = I40E_ITR_RX_DEF;
+       pf->tx_itr_default = I40E_ITR_TX_DEF;
  
        /* Depending on PF configurations, it is possible that the RSS
         * maximum might end up larger than the available queues
            (pf->hw.aq.fw_maj_ver >= 5)))
                pf->hw_features |= I40E_HW_USE_SET_LLDP_MIB;
  
+       /* Enable PTP L4 if FW > v6.0 */
+       if (pf->hw.mac.type == I40E_MAC_XL710 &&
+           pf->hw.aq.fw_maj_ver >= 6)
+               pf->hw_features |= I40E_HW_PTP_L4_CAPABLE;
        if (pf->hw.func_caps.vmdq) {
                pf->num_vmdq_vsis = I40E_DEFAULT_NUM_VMDQ_VSI;
                pf->flags |= I40E_FLAG_VMDQ_ENABLED;
@@@ -9079,9 -11161,13 +11161,13 @@@ bool i40e_set_ntuple(struct i40e_pf *pf
                /* Enable filters and mark for reset */
                if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED))
                        need_reset = true;
-               /* enable FD_SB only if there is MSI-X vector */
-               if (pf->num_fdsb_msix > 0)
+               /* enable FD_SB only if there is MSI-X vector and no cloud
+                * filters exist
+                */
+               if (pf->num_fdsb_msix > 0 && !pf->num_cloud_filters) {
                        pf->flags |= I40E_FLAG_FD_SB_ENABLED;
+                       pf->flags &= ~I40E_FLAG_FD_SB_INACTIVE;
+               }
        } else {
                /* turn off filters, mark for reset and clear SW filter list */
                if (pf->flags & I40E_FLAG_FD_SB_ENABLED) {
                }
                pf->flags &= ~(I40E_FLAG_FD_SB_ENABLED |
                               I40E_FLAG_FD_SB_AUTO_DISABLED);
+               pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
                /* reset fd counters */
                pf->fd_add_err = 0;
                pf->fd_atr_cnt = 0;
@@@ -9151,10 -11239,16 +11239,16 @@@ static int i40e_set_features(struct net
        else
                i40e_vlan_stripping_disable(vsi);
  
+       if (!(features & NETIF_F_HW_TC) && pf->num_cloud_filters) {
+               dev_err(&pf->pdev->dev,
+                       "Offloaded tc filters active, can't turn hw_tc_offload off");
+               return -EINVAL;
+       }
        need_reset = i40e_set_ntuple(pf, features);
  
        if (need_reset)
-               i40e_do_reset(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED), true);
+               i40e_do_reset(pf, I40E_PF_RESET_FLAG, true);
  
        return 0;
  }
@@@ -9406,8 -11500,7 +11500,7 @@@ static int i40e_ndo_bridge_setlink(stru
                                pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
                        else
                                pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED;
-                       i40e_do_reset(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED),
-                                     true);
+                       i40e_do_reset(pf, I40E_PF_RESET_FLAG, true);
                        break;
                }
        }
@@@ -9555,12 -11648,12 +11648,12 @@@ static int i40e_xdp_setup(struct i40e_v
  }
  
  /**
-  * i40e_xdp - implements ndo_xdp for i40e
+  * i40e_xdp - implements ndo_bpf for i40e
   * @dev: netdevice
   * @xdp: XDP command
   **/
  static int i40e_xdp(struct net_device *dev,
-                   struct netdev_xdp *xdp)
+                   struct netdev_bpf *xdp)
  {
        struct i40e_netdev_priv *np = netdev_priv(dev);
        struct i40e_vsi *vsi = np->vsi;
@@@ -9612,7 -11705,7 +11705,7 @@@ static const struct net_device_ops i40e
        .ndo_features_check     = i40e_features_check,
        .ndo_bridge_getlink     = i40e_ndo_bridge_getlink,
        .ndo_bridge_setlink     = i40e_ndo_bridge_setlink,
-       .ndo_xdp                = i40e_xdp,
+       .ndo_bpf                = i40e_xdp,
  };
  
  /**
@@@ -9671,7 -11764,8 +11764,8 @@@ static int i40e_config_netdev(struct i4
        netdev->vlan_features |= hw_enc_features | NETIF_F_TSO_MANGLEID;
  
        if (!(pf->flags & I40E_FLAG_MFP_ENABLED))
-               netdev->hw_features |= NETIF_F_NTUPLE;
+               netdev->hw_features |= NETIF_F_NTUPLE | NETIF_F_HW_TC;
        hw_features = hw_enc_features           |
                      NETIF_F_HW_VLAN_CTAG_TX   |
                      NETIF_F_HW_VLAN_CTAG_RX;
@@@ -9849,6 -11943,31 +11943,31 @@@ static int i40e_add_vsi(struct i40e_vs
  
                enabled_tc = i40e_pf_get_tc_map(pf);
  
+               /* Source pruning is enabled by default, so the flag is
+                * negative logic - if it's set, we need to fiddle with
+                * the VSI to disable source pruning.
+                */
+               if (pf->flags & I40E_FLAG_SOURCE_PRUNING_DISABLED) {
+                       memset(&ctxt, 0, sizeof(ctxt));
+                       ctxt.seid = pf->main_vsi_seid;
+                       ctxt.pf_num = pf->hw.pf_id;
+                       ctxt.vf_num = 0;
+                       ctxt.info.valid_sections |=
+                                    cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
+                       ctxt.info.switch_id =
+                                  cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_LOCAL_LB);
+                       ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
+                       if (ret) {
+                               dev_info(&pf->pdev->dev,
+                                        "update vsi failed, err %s aq_err %s\n",
+                                        i40e_stat_str(&pf->hw, ret),
+                                        i40e_aq_str(&pf->hw,
+                                                    pf->hw.aq.asq_last_status));
+                               ret = -ENOENT;
+                               goto err;
+                       }
+               }
                /* MFP mode setup queue map and update VSI */
                if ((pf->flags & I40E_FLAG_MFP_ENABLED) &&
                    !(pf->hw.func_caps.iscsi)) { /* NIC type PF */
@@@ -10951,14 -13070,16 +13070,16 @@@ static int i40e_setup_pf_switch(struct 
        */
  
        if ((pf->hw.pf_id == 0) &&
-           !(pf->flags & I40E_FLAG_TRUE_PROMISC_SUPPORT))
+           !(pf->flags & I40E_FLAG_TRUE_PROMISC_SUPPORT)) {
                flags = I40E_AQ_SET_SWITCH_CFG_PROMISC;
+               pf->last_sw_conf_flags = flags;
+       }
  
        if (pf->hw.pf_id == 0) {
                u16 valid_flags;
  
                valid_flags = I40E_AQ_SET_SWITCH_CFG_PROMISC;
-               ret = i40e_aq_set_switch_config(&pf->hw, flags, valid_flags,
+               ret = i40e_aq_set_switch_config(&pf->hw, flags, valid_flags, 0,
                                                NULL);
                if (ret && pf->hw.aq.asq_last_status != I40E_AQ_RC_ESRCH) {
                        dev_info(&pf->pdev->dev,
                                             pf->hw.aq.asq_last_status));
                        /* not a fatal problem, just keep going */
                }
+               pf->last_sw_conf_valid_flags = valid_flags;
        }
  
        /* first time setup */
                        vsi = i40e_vsi_reinit_setup(pf->vsi[pf->lan_vsi]);
                if (!vsi) {
                        dev_info(&pf->pdev->dev, "setup of MAIN VSI failed\n");
+                       i40e_cloud_filter_exit(pf);
                        i40e_fdir_teardown(pf);
                        return -EAGAIN;
                }
  static void i40e_determine_queue_usage(struct i40e_pf *pf)
  {
        int queues_left;
+       int q_max;
  
        pf->num_lan_qps = 0;
  
                               I40E_FLAG_DCB_ENABLED    |
                               I40E_FLAG_SRIOV_ENABLED  |
                               I40E_FLAG_VMDQ_ENABLED);
+               pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
        } else if (!(pf->flags & (I40E_FLAG_RSS_ENABLED |
                                  I40E_FLAG_FD_SB_ENABLED |
                                  I40E_FLAG_FD_ATR_ENABLED |
                               I40E_FLAG_FD_ATR_ENABLED |
                               I40E_FLAG_DCB_ENABLED    |
                               I40E_FLAG_VMDQ_ENABLED);
+               pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
        } else {
                /* Not enough queues for all TCs */
                if ((pf->flags & I40E_FLAG_DCB_CAPABLE) &&
                                        I40E_FLAG_DCB_ENABLED);
                        dev_info(&pf->pdev->dev, "not enough queues for DCB. DCB is disabled.\n");
                }
-               pf->num_lan_qps = max_t(int, pf->rss_size_max,
-                                       num_online_cpus());
-               pf->num_lan_qps = min_t(int, pf->num_lan_qps,
-                                       pf->hw.func_caps.num_tx_qp);
+               /* limit lan qps to the smaller of qps, cpus or msix */
+               q_max = max_t(int, pf->rss_size_max, num_online_cpus());
+               q_max = min_t(int, q_max, pf->hw.func_caps.num_tx_qp);
+               q_max = min_t(int, q_max, pf->hw.func_caps.num_msix_vectors);
+               pf->num_lan_qps = q_max;
  
                queues_left -= pf->num_lan_qps;
        }
                        queues_left -= 1; /* save 1 queue for FD */
                } else {
                        pf->flags &= ~I40E_FLAG_FD_SB_ENABLED;
+                       pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
                        dev_info(&pf->pdev->dev, "not enough queues for Flow Director. Flow Director feature is disabled\n");
                }
        }
@@@ -11304,6 -13433,13 +13433,13 @@@ static int i40e_probe(struct pci_dev *p
        hw->bus.bus_id = pdev->bus->number;
        pf->instance = pfs_found;
  
+       /* Select something other than the 802.1ad ethertype for the
+        * switch to use internally and drop on ingress.
+        */
+       hw->switch_tag = 0xffff;
+       hw->first_tag = ETH_P_8021AD;
+       hw->second_tag = ETH_P_8021Q;
        INIT_LIST_HEAD(&pf->l3_flex_pit_list);
        INIT_LIST_HEAD(&pf->l4_flex_pit_list);
  
                 i40e_nvm_version_str(hw));
  
        if (hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR &&
-           hw->aq.api_min_ver > I40E_FW_API_VERSION_MINOR)
+           hw->aq.api_min_ver > I40E_FW_MINOR_VERSION(hw))
                dev_info(&pdev->dev,
                         "The driver for the device detected a newer version of the NVM image than expected. Please install the most recent version of the network driver.\n");
-       else if (hw->aq.api_maj_ver < I40E_FW_API_VERSION_MAJOR ||
-                hw->aq.api_min_ver < (I40E_FW_API_VERSION_MINOR - 1))
+       else if (hw->aq.api_maj_ver == 1 && hw->aq.api_min_ver < 4)
                dev_info(&pdev->dev,
                         "The driver for the device detected an older version of the NVM image than expected. Please update the NVM image.\n");
  
                dev_warn(&pdev->dev, "This device is a pre-production adapter/LOM. Please be aware there may be issues with your hardware. If you are experiencing problems please contact your Intel or hardware representative who provided you with this hardware.\n");
  
        i40e_clear_pxe_mode(hw);
-       err = i40e_get_capabilities(pf);
+       err = i40e_get_capabilities(pf, i40e_aqc_opc_list_func_capabilities);
        if (err)
                goto err_adminq_setup;
  
  #endif /* CONFIG_I40E_DCB */
  
        /* set up periodic task facility */
-       setup_timer(&pf->service_timer, i40e_service_timer, (unsigned long)pf);
+       timer_setup(&pf->service_timer, i40e_service_timer, 0);
        pf->service_timer_period = HZ;
  
        INIT_WORK(&pf->service_task, i40e_service_task);
                dev_info(&pdev->dev, "setup_pf_switch failed: %d\n", err);
                goto err_vsis;
        }
+       INIT_LIST_HEAD(&pf->vsi[pf->lan_vsi]->ch_list);
  
        /* Make sure flow control is set according to current settings */
        err = i40e_set_fc(hw, &set_fc_aq_fail, true);
@@@ -11777,7 -13913,7 +13913,7 @@@ static void i40e_remove(struct pci_dev 
        /* no more scheduling of any task */
        set_bit(__I40E_SUSPENDED, pf->state);
        set_bit(__I40E_DOWN, pf->state);
-       if (pf->service_timer.data)
+       if (pf->service_timer.function)
                del_timer_sync(&pf->service_timer);
        if (pf->service_task.func)
                cancel_work_sync(&pf->service_task);
        if (pf->vsi[pf->lan_vsi])
                i40e_vsi_release(pf->vsi[pf->lan_vsi]);
  
+       i40e_cloud_filter_exit(pf);
        /* remove attached clients */
        if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
                ret_code = i40e_lan_del_device(pf);
@@@ -11936,6 -14074,28 +14074,28 @@@ static pci_ers_result_t i40e_pci_error_
        return result;
  }
  
+ /**
+  * i40e_pci_error_reset_prepare - prepare device driver for pci reset
+  * @pdev: PCI device information struct
+  */
+ static void i40e_pci_error_reset_prepare(struct pci_dev *pdev)
+ {
+       struct i40e_pf *pf = pci_get_drvdata(pdev);
+       i40e_prep_for_reset(pf, false);
+ }
+ /**
+  * i40e_pci_error_reset_done - pci reset done, device driver reset can begin
+  * @pdev: PCI device information struct
+  */
+ static void i40e_pci_error_reset_done(struct pci_dev *pdev)
+ {
+       struct i40e_pf *pf = pci_get_drvdata(pdev);
+       i40e_reset_and_rebuild(pf, false, false);
+ }
  /**
   * i40e_pci_error_resume - restart operations after PCI error recovery
   * @pdev: PCI device information struct
@@@ -12021,6 -14181,7 +14181,7 @@@ static void i40e_shutdown(struct pci_de
  
        del_timer_sync(&pf->service_timer);
        cancel_work_sync(&pf->service_task);
+       i40e_cloud_filter_exit(pf);
        i40e_fdir_teardown(pf);
  
        /* Client close must be called explicitly here because the timer
        }
  }
  
- #ifdef CONFIG_PM
  /**
-  * i40e_suspend - PCI callback for moving to D3
-  * @pdev: PCI device information struct
+  * i40e_suspend - PM callback for moving to D3
+  * @dev: generic device information structure
   **/
- static int i40e_suspend(struct pci_dev *pdev, pm_message_t state)
+ static int __maybe_unused i40e_suspend(struct device *dev)
  {
+       struct pci_dev *pdev = to_pci_dev(dev);
        struct i40e_pf *pf = pci_get_drvdata(pdev);
        struct i40e_hw *hw = &pf->hw;
-       int retval = 0;
  
-       set_bit(__I40E_SUSPENDED, pf->state);
+       /* If we're already suspended, then there is nothing to do */
+       if (test_and_set_bit(__I40E_SUSPENDED, pf->state))
+               return 0;
        set_bit(__I40E_DOWN, pf->state);
  
+       /* Ensure service task will not be running */
+       del_timer_sync(&pf->service_timer);
+       cancel_work_sync(&pf->service_task);
        if (pf->wol_en && (pf->hw_features & I40E_HW_WOL_MC_MAGIC_PKT_WAKE))
                i40e_enable_mc_magic_wake(pf);
  
        wr32(hw, I40E_PFPM_APM, (pf->wol_en ? I40E_PFPM_APM_APME_MASK : 0));
        wr32(hw, I40E_PFPM_WUFC, (pf->wol_en ? I40E_PFPM_WUFC_MAG_MASK : 0));
  
-       i40e_stop_misc_vector(pf);
-       if (pf->msix_entries) {
-               synchronize_irq(pf->msix_entries[0].vector);
-               free_irq(pf->msix_entries[0].vector, pf);
-       }
-       retval = pci_save_state(pdev);
-       if (retval)
-               return retval;
-       pci_wake_from_d3(pdev, pf->wol_en);
-       pci_set_power_state(pdev, PCI_D3hot);
+       /* Clear the interrupt scheme and release our IRQs so that the system
+        * can safely hibernate even when there are a large number of CPUs.
+        * Otherwise hibernation might fail when mapping all the vectors back
+        * to CPU0.
+        */
+       i40e_clear_interrupt_scheme(pf);
  
-       return retval;
+       return 0;
  }
  
  /**
-  * i40e_resume - PCI callback for waking up from D3
-  * @pdev: PCI device information struct
+  * i40e_resume - PM callback for waking up from D3
+  * @dev: generic device information structure
   **/
- static int i40e_resume(struct pci_dev *pdev)
+ static int __maybe_unused i40e_resume(struct device *dev)
  {
+       struct pci_dev *pdev = to_pci_dev(dev);
        struct i40e_pf *pf = pci_get_drvdata(pdev);
-       u32 err;
+       int err;
  
-       pci_set_power_state(pdev, PCI_D0);
-       pci_restore_state(pdev);
-       /* pci_restore_state() clears dev->state_saves, so
-        * call pci_save_state() again to restore it.
-        */
-       pci_save_state(pdev);
+       /* If we're not suspended, then there is nothing to do */
+       if (!test_bit(__I40E_SUSPENDED, pf->state))
+               return 0;
  
-       err = pci_enable_device_mem(pdev);
+       /* We cleared the interrupt scheme when we suspended, so we need to
+        * restore it now to resume device functionality.
+        */
+       err = i40e_restore_interrupt_scheme(pf);
        if (err) {
-               dev_err(&pdev->dev, "Cannot enable PCI device from suspend\n");
-               return err;
+               dev_err(&pdev->dev, "Cannot restore interrupt scheme: %d\n",
+                       err);
        }
-       pci_set_master(pdev);
  
-       /* no wakeup events while running */
-       pci_wake_from_d3(pdev, false);
-       /* handling the reset will rebuild the device state */
-       if (test_and_clear_bit(__I40E_SUSPENDED, pf->state)) {
-               clear_bit(__I40E_DOWN, pf->state);
-               if (pf->msix_entries) {
-                       err = request_irq(pf->msix_entries[0].vector,
-                                         i40e_intr, 0, pf->int_name, pf);
-                       if (err) {
-                               dev_err(&pf->pdev->dev,
-                                       "request_irq for %s failed: %d\n",
-                                       pf->int_name, err);
-                       }
-               }
-               i40e_reset_and_rebuild(pf, false, false);
-       }
+       clear_bit(__I40E_DOWN, pf->state);
+       i40e_reset_and_rebuild(pf, false, false);
+       /* Clear suspended state last after everything is recovered */
+       clear_bit(__I40E_SUSPENDED, pf->state);
+       /* Restart the service task */
+       mod_timer(&pf->service_timer,
+                 round_jiffies(jiffies + pf->service_timer_period));
  
        return 0;
  }
  
- #endif
  static const struct pci_error_handlers i40e_err_handler = {
        .error_detected = i40e_pci_error_detected,
        .slot_reset = i40e_pci_error_slot_reset,
+       .reset_prepare = i40e_pci_error_reset_prepare,
+       .reset_done = i40e_pci_error_reset_done,
        .resume = i40e_pci_error_resume,
  };
  
+ static SIMPLE_DEV_PM_OPS(i40e_pm_ops, i40e_suspend, i40e_resume);
  static struct pci_driver i40e_driver = {
        .name     = i40e_driver_name,
        .id_table = i40e_pci_tbl,
        .probe    = i40e_probe,
        .remove   = i40e_remove,
- #ifdef CONFIG_PM
-       .suspend  = i40e_suspend,
-       .resume   = i40e_resume,
- #endif
+       .driver   = {
+               .pm = &i40e_pm_ops,
+       },
        .shutdown = i40e_shutdown,
        .err_handler = &i40e_err_handler,
        .sriov_configure = i40e_pci_sriov_configure,
index 31a3f09df9f75fee5ab62472c64fb07446408f61,8eee081d395f97a77363a9b1965cea223941d1d5..568c96842f2818b1e72de9fd0a19a2625070e363
@@@ -375,7 -375,7 +375,7 @@@ u32 igb_rd32(struct e1000_hw *hw, u32 r
  /* write operations, indexed using DWORDS */
  #define wr32(reg, val) \
  do { \
 -      u8 __iomem *hw_addr = ACCESS_ONCE((hw)->hw_addr); \
 +      u8 __iomem *hw_addr = READ_ONCE((hw)->hw_addr); \
        if (!E1000_REMOVED(hw_addr)) \
                writel((val), &hw_addr[(reg)]); \
  } while (0)
  
  #define E1000_I210_FLA                0x1201C
  
+ #define E1000_I210_DTXMXPKTSZ 0x355C
+ #define E1000_I210_TXDCTL(_n) (0x0E028 + ((_n) * 0x40))
+ #define E1000_I210_TQAVCTRL   0x3570
+ #define E1000_I210_TQAVCC(_n) (0x3004 + ((_n) * 0x40))
+ #define E1000_I210_TQAVHC(_n) (0x300C + ((_n) * 0x40))
  #define E1000_INVM_DATA_REG(_n)       (0x12120 + 4*(_n))
  #define E1000_INVM_SIZE               64 /* Number of INVM Data Registers */
  
index 18b6c25d4705b9ca12918c5369a7a9e02cf35634,43cf39527660b6c6e07f200131c60066b83df932..e94d3c256667637c8186fd83299b64ea2de53c72
@@@ -34,6 -34,7 +34,7 @@@
  #include <linux/slab.h>
  #include <net/checksum.h>
  #include <net/ip6_checksum.h>
+ #include <net/pkt_sched.h>
  #include <linux/net_tstamp.h>
  #include <linux/mii.h>
  #include <linux/ethtool.h>
  #define BUILD 0
  #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
  __stringify(BUILD) "-k"
+ enum queue_mode {
+       QUEUE_MODE_STRICT_PRIORITY,
+       QUEUE_MODE_STREAM_RESERVATION,
+ };
+ enum tx_queue_prio {
+       TX_QUEUE_PRIO_HIGH,
+       TX_QUEUE_PRIO_LOW,
+ };
  char igb_driver_name[] = "igb";
  char igb_driver_version[] = DRV_VERSION;
  static const char igb_driver_string[] =
@@@ -133,8 -145,8 +145,8 @@@ static void igb_clean_all_rx_rings(stru
  static void igb_clean_tx_ring(struct igb_ring *);
  static void igb_clean_rx_ring(struct igb_ring *);
  static void igb_set_rx_mode(struct net_device *);
- static void igb_update_phy_info(unsigned long);
- static void igb_watchdog(unsigned long);
+ static void igb_update_phy_info(struct timer_list *);
+ static void igb_watchdog(struct timer_list *);
  static void igb_watchdog_task(struct work_struct *);
  static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
  static void igb_get_stats64(struct net_device *dev,
@@@ -750,7 -762,7 +762,7 @@@ static void igb_cache_ring_register(str
  u32 igb_rd32(struct e1000_hw *hw, u32 reg)
  {
        struct igb_adapter *igb = container_of(hw, struct igb_adapter, hw);
 -      u8 __iomem *hw_addr = ACCESS_ONCE(hw->hw_addr);
 +      u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr);
        u32 value = 0;
  
        if (E1000_REMOVED(hw_addr))
@@@ -1271,6 -1283,12 +1283,12 @@@ static int igb_alloc_q_vector(struct ig
                ring->count = adapter->tx_ring_count;
                ring->queue_index = txr_idx;
  
+               ring->cbs_enable = false;
+               ring->idleslope = 0;
+               ring->sendslope = 0;
+               ring->hicredit = 0;
+               ring->locredit = 0;
                u64_stats_init(&ring->tx_syncp);
                u64_stats_init(&ring->tx_syncp2);
  
@@@ -1598,6 -1616,284 +1616,284 @@@ static void igb_get_hw_control(struct i
                        ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
  }
  
+ static void enable_fqtss(struct igb_adapter *adapter, bool enable)
+ {
+       struct net_device *netdev = adapter->netdev;
+       struct e1000_hw *hw = &adapter->hw;
+       WARN_ON(hw->mac.type != e1000_i210);
+       if (enable)
+               adapter->flags |= IGB_FLAG_FQTSS;
+       else
+               adapter->flags &= ~IGB_FLAG_FQTSS;
+       if (netif_running(netdev))
+               schedule_work(&adapter->reset_task);
+ }
+ static bool is_fqtss_enabled(struct igb_adapter *adapter)
+ {
+       return (adapter->flags & IGB_FLAG_FQTSS) ? true : false;
+ }
+ static void set_tx_desc_fetch_prio(struct e1000_hw *hw, int queue,
+                                  enum tx_queue_prio prio)
+ {
+       u32 val;
+       WARN_ON(hw->mac.type != e1000_i210);
+       WARN_ON(queue < 0 || queue > 4);
+       val = rd32(E1000_I210_TXDCTL(queue));
+       if (prio == TX_QUEUE_PRIO_HIGH)
+               val |= E1000_TXDCTL_PRIORITY;
+       else
+               val &= ~E1000_TXDCTL_PRIORITY;
+       wr32(E1000_I210_TXDCTL(queue), val);
+ }
+ static void set_queue_mode(struct e1000_hw *hw, int queue, enum queue_mode mode)
+ {
+       u32 val;
+       WARN_ON(hw->mac.type != e1000_i210);
+       WARN_ON(queue < 0 || queue > 1);
+       val = rd32(E1000_I210_TQAVCC(queue));
+       if (mode == QUEUE_MODE_STREAM_RESERVATION)
+               val |= E1000_TQAVCC_QUEUEMODE;
+       else
+               val &= ~E1000_TQAVCC_QUEUEMODE;
+       wr32(E1000_I210_TQAVCC(queue), val);
+ }
+ /**
+  *  igb_configure_cbs - Configure Credit-Based Shaper (CBS)
+  *  @adapter: pointer to adapter struct
+  *  @queue: queue number
+  *  @enable: true = enable CBS, false = disable CBS
+  *  @idleslope: idleSlope in kbps
+  *  @sendslope: sendSlope in kbps
+  *  @hicredit: hiCredit in bytes
+  *  @locredit: loCredit in bytes
+  *
+  *  Configure CBS for a given hardware queue. When disabling, idleslope,
+  *  sendslope, hicredit, locredit arguments are ignored. Returns 0 if
+  *  success. Negative otherwise.
+  **/
+ static void igb_configure_cbs(struct igb_adapter *adapter, int queue,
+                             bool enable, int idleslope, int sendslope,
+                             int hicredit, int locredit)
+ {
+       struct net_device *netdev = adapter->netdev;
+       struct e1000_hw *hw = &adapter->hw;
+       u32 tqavcc;
+       u16 value;
+       WARN_ON(hw->mac.type != e1000_i210);
+       WARN_ON(queue < 0 || queue > 1);
+       if (enable) {
+               set_tx_desc_fetch_prio(hw, queue, TX_QUEUE_PRIO_HIGH);
+               set_queue_mode(hw, queue, QUEUE_MODE_STREAM_RESERVATION);
+               /* According to i210 datasheet section 7.2.7.7, we should set
+                * the 'idleSlope' field from TQAVCC register following the
+                * equation:
+                *
+                * For 100 Mbps link speed:
+                *
+                *     value = BW * 0x7735 * 0.2                          (E1)
+                *
+                * For 1000Mbps link speed:
+                *
+                *     value = BW * 0x7735 * 2                            (E2)
+                *
+                * E1 and E2 can be merged into one equation as shown below.
+                * Note that 'link-speed' is in Mbps.
+                *
+                *     value = BW * 0x7735 * 2 * link-speed
+                *                           --------------               (E3)
+                *                                1000
+                *
+                * 'BW' is the percentage bandwidth out of full link speed
+                * which can be found with the following equation. Note that
+                * idleSlope here is the parameter from this function which
+                * is in kbps.
+                *
+                *     BW =     idleSlope
+                *          -----------------                             (E4)
+                *          link-speed * 1000
+                *
+                * That said, we can come up with a generic equation to
+                * calculate the value we should set it TQAVCC register by
+                * replacing 'BW' in E3 by E4. The resulting equation is:
+                *
+                * value =     idleSlope     * 0x7735 * 2 * link-speed
+                *         -----------------            --------------    (E5)
+                *         link-speed * 1000                 1000
+                *
+                * 'link-speed' is present in both sides of the fraction so
+                * it is canceled out. The final equation is the following:
+                *
+                *     value = idleSlope * 61034
+                *             -----------------                          (E6)
+                *                  1000000
+                */
+               value = DIV_ROUND_UP_ULL(idleslope * 61034ULL, 1000000);
+               tqavcc = rd32(E1000_I210_TQAVCC(queue));
+               tqavcc &= ~E1000_TQAVCC_IDLESLOPE_MASK;
+               tqavcc |= value;
+               wr32(E1000_I210_TQAVCC(queue), tqavcc);
+               wr32(E1000_I210_TQAVHC(queue), 0x80000000 + hicredit * 0x7735);
+       } else {
+               set_tx_desc_fetch_prio(hw, queue, TX_QUEUE_PRIO_LOW);
+               set_queue_mode(hw, queue, QUEUE_MODE_STRICT_PRIORITY);
+               /* Set idleSlope to zero. */
+               tqavcc = rd32(E1000_I210_TQAVCC(queue));
+               tqavcc &= ~E1000_TQAVCC_IDLESLOPE_MASK;
+               wr32(E1000_I210_TQAVCC(queue), tqavcc);
+               /* Set hiCredit to zero. */
+               wr32(E1000_I210_TQAVHC(queue), 0);
+       }
+       /* XXX: In i210 controller the sendSlope and loCredit parameters from
+        * CBS are not configurable by software so we don't do any 'controller
+        * configuration' in respect to these parameters.
+        */
+       netdev_dbg(netdev, "CBS %s: queue %d idleslope %d sendslope %d hiCredit %d locredit %d\n",
+                  (enable) ? "enabled" : "disabled", queue,
+                  idleslope, sendslope, hicredit, locredit);
+ }
+ static int igb_save_cbs_params(struct igb_adapter *adapter, int queue,
+                              bool enable, int idleslope, int sendslope,
+                              int hicredit, int locredit)
+ {
+       struct igb_ring *ring;
+       if (queue < 0 || queue > adapter->num_tx_queues)
+               return -EINVAL;
+       ring = adapter->tx_ring[queue];
+       ring->cbs_enable = enable;
+       ring->idleslope = idleslope;
+       ring->sendslope = sendslope;
+       ring->hicredit = hicredit;
+       ring->locredit = locredit;
+       return 0;
+ }
+ static bool is_any_cbs_enabled(struct igb_adapter *adapter)
+ {
+       struct igb_ring *ring;
+       int i;
+       for (i = 0; i < adapter->num_tx_queues; i++) {
+               ring = adapter->tx_ring[i];
+               if (ring->cbs_enable)
+                       return true;
+       }
+       return false;
+ }
+ static void igb_setup_tx_mode(struct igb_adapter *adapter)
+ {
+       struct net_device *netdev = adapter->netdev;
+       struct e1000_hw *hw = &adapter->hw;
+       u32 val;
+       /* Only i210 controller supports changing the transmission mode. */
+       if (hw->mac.type != e1000_i210)
+               return;
+       if (is_fqtss_enabled(adapter)) {
+               int i, max_queue;
+               /* Configure TQAVCTRL register: set transmit mode to 'Qav',
+                * set data fetch arbitration to 'round robin' and set data
+                * transfer arbitration to 'credit shaper algorithm.
+                */
+               val = rd32(E1000_I210_TQAVCTRL);
+               val |= E1000_TQAVCTRL_XMIT_MODE | E1000_TQAVCTRL_DATATRANARB;
+               val &= ~E1000_TQAVCTRL_DATAFETCHARB;
+               wr32(E1000_I210_TQAVCTRL, val);
+               /* Configure Tx and Rx packet buffers sizes as described in
+                * i210 datasheet section 7.2.7.7.
+                */
+               val = rd32(E1000_TXPBS);
+               val &= ~I210_TXPBSIZE_MASK;
+               val |= I210_TXPBSIZE_PB0_8KB | I210_TXPBSIZE_PB1_8KB |
+                       I210_TXPBSIZE_PB2_4KB | I210_TXPBSIZE_PB3_4KB;
+               wr32(E1000_TXPBS, val);
+               val = rd32(E1000_RXPBS);
+               val &= ~I210_RXPBSIZE_MASK;
+               val |= I210_RXPBSIZE_PB_32KB;
+               wr32(E1000_RXPBS, val);
+               /* Section 8.12.9 states that MAX_TPKT_SIZE from DTXMXPKTSZ
+                * register should not exceed the buffer size programmed in
+                * TXPBS. The smallest buffer size programmed in TXPBS is 4kB
+                * so according to the datasheet we should set MAX_TPKT_SIZE to
+                * 4kB / 64.
+                *
+                * However, when we do so, no frame from queue 2 and 3 are
+                * transmitted.  It seems the MAX_TPKT_SIZE should not be great
+                * or _equal_ to the buffer size programmed in TXPBS. For this
+                * reason, we set set MAX_ TPKT_SIZE to (4kB - 1) / 64.
+                */
+               val = (4096 - 1) / 64;
+               wr32(E1000_I210_DTXMXPKTSZ, val);
+               /* Since FQTSS mode is enabled, apply any CBS configuration
+                * previously set. If no previous CBS configuration has been
+                * done, then the initial configuration is applied, which means
+                * CBS is disabled.
+                */
+               max_queue = (adapter->num_tx_queues < I210_SR_QUEUES_NUM) ?
+                           adapter->num_tx_queues : I210_SR_QUEUES_NUM;
+               for (i = 0; i < max_queue; i++) {
+                       struct igb_ring *ring = adapter->tx_ring[i];
+                       igb_configure_cbs(adapter, i, ring->cbs_enable,
+                                         ring->idleslope, ring->sendslope,
+                                         ring->hicredit, ring->locredit);
+               }
+       } else {
+               wr32(E1000_RXPBS, I210_RXPBSIZE_DEFAULT);
+               wr32(E1000_TXPBS, I210_TXPBSIZE_DEFAULT);
+               wr32(E1000_I210_DTXMXPKTSZ, I210_DTXMXPKTSZ_DEFAULT);
+               val = rd32(E1000_I210_TQAVCTRL);
+               /* According to Section 8.12.21, the other flags we've set when
+                * enabling FQTSS are not relevant when disabling FQTSS so we
+                * don't set they here.
+                */
+               val &= ~E1000_TQAVCTRL_XMIT_MODE;
+               wr32(E1000_I210_TQAVCTRL, val);
+       }
+       netdev_dbg(netdev, "FQTSS %s\n", (is_fqtss_enabled(adapter)) ?
+                  "enabled" : "disabled");
+ }
  /**
   *  igb_configure - configure the hardware for RX and TX
   *  @adapter: private board structure
@@@ -1609,6 -1905,7 +1905,7 @@@ static void igb_configure(struct igb_ad
  
        igb_get_hw_control(adapter);
        igb_set_rx_mode(netdev);
+       igb_setup_tx_mode(adapter);
  
        igb_restore_vlan(adapter);
  
@@@ -2150,6 -2447,55 +2447,55 @@@ igb_features_check(struct sk_buff *skb
        return features;
  }
  
+ static int igb_offload_cbs(struct igb_adapter *adapter,
+                          struct tc_cbs_qopt_offload *qopt)
+ {
+       struct e1000_hw *hw = &adapter->hw;
+       int err;
+       /* CBS offloading is only supported by i210 controller. */
+       if (hw->mac.type != e1000_i210)
+               return -EOPNOTSUPP;
+       /* CBS offloading is only supported by queue 0 and queue 1. */
+       if (qopt->queue < 0 || qopt->queue > 1)
+               return -EINVAL;
+       err = igb_save_cbs_params(adapter, qopt->queue, qopt->enable,
+                                 qopt->idleslope, qopt->sendslope,
+                                 qopt->hicredit, qopt->locredit);
+       if (err)
+               return err;
+       if (is_fqtss_enabled(adapter)) {
+               igb_configure_cbs(adapter, qopt->queue, qopt->enable,
+                                 qopt->idleslope, qopt->sendslope,
+                                 qopt->hicredit, qopt->locredit);
+               if (!is_any_cbs_enabled(adapter))
+                       enable_fqtss(adapter, false);
+       } else {
+               enable_fqtss(adapter, true);
+       }
+       return 0;
+ }
+ static int igb_setup_tc(struct net_device *dev, enum tc_setup_type type,
+                       void *type_data)
+ {
+       struct igb_adapter *adapter = netdev_priv(dev);
+       switch (type) {
+       case TC_SETUP_QDISC_CBS:
+               return igb_offload_cbs(adapter, type_data);
+       default:
+               return -EOPNOTSUPP;
+       }
+ }
  static const struct net_device_ops igb_netdev_ops = {
        .ndo_open               = igb_open,
        .ndo_stop               = igb_close,
        .ndo_set_features       = igb_set_features,
        .ndo_fdb_add            = igb_ndo_fdb_add,
        .ndo_features_check     = igb_features_check,
+       .ndo_setup_tc           = igb_setup_tc,
  };
  
  /**
@@@ -2538,10 -2885,8 +2885,8 @@@ static int igb_probe(struct pci_dev *pd
                wr32(E1000_TXPBS, I210_TXPBSIZE_DEFAULT);
        }
  
-       setup_timer(&adapter->watchdog_timer, igb_watchdog,
-                   (unsigned long) adapter);
-       setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
-                   (unsigned long) adapter);
+       timer_setup(&adapter->watchdog_timer, igb_watchdog, 0);
+       timer_setup(&adapter->phy_info_timer, igb_update_phy_info, 0);
  
        INIT_WORK(&adapter->reset_task, igb_reset_task);
        INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
@@@ -3162,6 -3507,8 +3507,8 @@@ static int igb_sw_init(struct igb_adapt
        /* Setup and initialize a copy of the hw vlan table array */
        adapter->shadow_vfta = kcalloc(E1000_VLAN_FILTER_TBL_SIZE, sizeof(u32),
                                       GFP_ATOMIC);
+       if (!adapter->shadow_vfta)
+               return -ENOMEM;
  
        /* This call may decrease the number of queues */
        if (igb_init_interrupt_scheme(adapter, true)) {
@@@ -4423,9 -4770,9 +4770,9 @@@ static void igb_spoof_check(struct igb_
  /* Need to wait a few seconds after link up to get diagnostic information from
   * the phy
   */
- static void igb_update_phy_info(unsigned long data)
+ static void igb_update_phy_info(struct timer_list *t)
  {
-       struct igb_adapter *adapter = (struct igb_adapter *) data;
+       struct igb_adapter *adapter = from_timer(adapter, t, phy_info_timer);
        igb_get_phy_info(&adapter->hw);
  }
  
@@@ -4512,9 -4859,9 +4859,9 @@@ static void igb_check_lvmmc(struct igb_
   *  igb_watchdog - Timer Call-back
   *  @data: pointer to adapter cast into an unsigned long
   **/
- static void igb_watchdog(unsigned long data)
+ static void igb_watchdog(struct timer_list *t)
  {
-       struct igb_adapter *adapter = (struct igb_adapter *)data;
+       struct igb_adapter *adapter = from_timer(adapter, t, watchdog_timer);
        /* Do the rest outside of interrupt context */
        schedule_work(&adapter->watchdog_task);
  }
index 935a2f15b0b00e72763214aad0248a504f5b3a45,6eaca8366ac88c4cb0d271413d2f33d6bf497bde..ca06c3cc2ca841fc395c957efe64cf717b36670f
@@@ -380,7 -380,7 +380,7 @@@ static void ixgbe_check_remove(struct i
   */
  u32 ixgbe_read_reg(struct ixgbe_hw *hw, u32 reg)
  {
 -      u8 __iomem *reg_addr = ACCESS_ONCE(hw->hw_addr);
 +      u8 __iomem *reg_addr = READ_ONCE(hw->hw_addr);
        u32 value;
  
        if (ixgbe_removed(reg_addr))
@@@ -1620,6 -1620,7 +1620,7 @@@ static bool ixgbe_alloc_mapped_page(str
        bi->page = page;
        bi->page_offset = ixgbe_rx_offset(rx_ring);
        bi->pagecnt_bias = 1;
+       rx_ring->rx_stats.alloc_rx_page++;
  
        return true;
  }
@@@ -2133,6 -2134,21 +2134,21 @@@ static struct sk_buff *ixgbe_construct_
  #if L1_CACHE_BYTES < 128
        prefetch(xdp->data + L1_CACHE_BYTES);
  #endif
+       /* Note, we get here by enabling legacy-rx via:
+        *
+        *    ethtool --set-priv-flags <dev> legacy-rx on
+        *
+        * In this mode, we currently get 0 extra XDP headroom as
+        * opposed to having legacy-rx off, where we process XDP
+        * packets going to stack via ixgbe_build_skb(). The latter
+        * provides us currently with 192 bytes of headroom.
+        *
+        * For ixgbe_construct_skb() mode it means that the
+        * xdp->data_meta will always point to xdp->data, since
+        * the helper cannot expand the head. Should this ever
+        * change in future for legacy-rx mode on, then lets also
+        * add xdp->data_meta handling here.
+        */
  
        /* allocate a skb to store the frags */
        skb = napi_alloc_skb(&rx_ring->q_vector->napi, IXGBE_RX_HDR_SIZE);
@@@ -2165,6 -2181,7 +2181,7 @@@ static struct sk_buff *ixgbe_build_skb(
                                       struct xdp_buff *xdp,
                                       union ixgbe_adv_rx_desc *rx_desc)
  {
+       unsigned int metasize = xdp->data - xdp->data_meta;
  #if (PAGE_SIZE < 8192)
        unsigned int truesize = ixgbe_rx_pg_size(rx_ring) / 2;
  #else
  #endif
        struct sk_buff *skb;
  
-       /* prefetch first cache line of first page */
-       prefetch(xdp->data);
+       /* Prefetch first cache line of first page. If xdp->data_meta
+        * is unused, this points extactly as xdp->data, otherwise we
+        * likely have a consumer accessing first few bytes of meta
+        * data, and then actual data.
+        */
+       prefetch(xdp->data_meta);
  #if L1_CACHE_BYTES < 128
-       prefetch(xdp->data + L1_CACHE_BYTES);
+       prefetch(xdp->data_meta + L1_CACHE_BYTES);
  #endif
  
        /* build an skb to around the page buffer */
        /* update pointers within the skb to store the data */
        skb_reserve(skb, xdp->data - xdp->data_hard_start);
        __skb_put(skb, xdp->data_end - xdp->data);
+       if (metasize)
+               skb_metadata_set(skb, metasize);
  
        /* record DMA address if this is the start of a chain of buffers */
        if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))
@@@ -2326,6 -2349,7 +2349,7 @@@ static int ixgbe_clean_rx_irq(struct ix
                if (!skb) {
                        xdp.data = page_address(rx_buffer->page) +
                                   rx_buffer->page_offset;
+                       xdp.data_meta = xdp.data;
                        xdp.data_hard_start = xdp.data -
                                              ixgbe_rx_offset(rx_ring);
                        xdp.data_end = xdp.data + size;
@@@ -2516,50 -2540,174 +2540,174 @@@ enum latency_range 
  static void ixgbe_update_itr(struct ixgbe_q_vector *q_vector,
                             struct ixgbe_ring_container *ring_container)
  {
-       int bytes = ring_container->total_bytes;
-       int packets = ring_container->total_packets;
-       u32 timepassed_us;
-       u64 bytes_perint;
-       u8 itr_setting = ring_container->itr;
+       unsigned int itr = IXGBE_ITR_ADAPTIVE_MIN_USECS |
+                          IXGBE_ITR_ADAPTIVE_LATENCY;
+       unsigned int avg_wire_size, packets, bytes;
+       unsigned long next_update = jiffies;
  
-       if (packets == 0)
+       /* If we don't have any rings just leave ourselves set for maximum
+        * possible latency so we take ourselves out of the equation.
+        */
+       if (!ring_container->ring)
                return;
  
-       /* simple throttlerate management
-        *   0-10MB/s   lowest (100000 ints/s)
-        *  10-20MB/s   low    (20000 ints/s)
-        *  20-1249MB/s bulk   (12000 ints/s)
+       /* If we didn't update within up to 1 - 2 jiffies we can assume
+        * that either packets are coming in so slow there hasn't been
+        * any work, or that there is so much work that NAPI is dealing
+        * with interrupt moderation and we don't need to do anything.
         */
-       /* what was last interrupt timeslice? */
-       timepassed_us = q_vector->itr >> 2;
-       if (timepassed_us == 0)
-               return;
+       if (time_after(next_update, ring_container->next_update))
+               goto clear_counts;
  
-       bytes_perint = bytes / timepassed_us; /* bytes/usec */
+       packets = ring_container->total_packets;
  
-       switch (itr_setting) {
-       case lowest_latency:
-               if (bytes_perint > 10)
-                       itr_setting = low_latency;
-               break;
-       case low_latency:
-               if (bytes_perint > 20)
-                       itr_setting = bulk_latency;
-               else if (bytes_perint <= 10)
-                       itr_setting = lowest_latency;
+       /* We have no packets to actually measure against. This means
+        * either one of the other queues on this vector is active or
+        * we are a Tx queue doing TSO with too high of an interrupt rate.
+        *
+        * When this occurs just tick up our delay by the minimum value
+        * and hope that this extra delay will prevent us from being called
+        * without any work on our queue.
+        */
+       if (!packets) {
+               itr = (q_vector->itr >> 2) + IXGBE_ITR_ADAPTIVE_MIN_INC;
+               if (itr > IXGBE_ITR_ADAPTIVE_MAX_USECS)
+                       itr = IXGBE_ITR_ADAPTIVE_MAX_USECS;
+               itr += ring_container->itr & IXGBE_ITR_ADAPTIVE_LATENCY;
+               goto clear_counts;
+       }
+       bytes = ring_container->total_bytes;
+       /* If packets are less than 4 or bytes are less than 9000 assume
+        * insufficient data to use bulk rate limiting approach. We are
+        * likely latency driven.
+        */
+       if (packets < 4 && bytes < 9000) {
+               itr = IXGBE_ITR_ADAPTIVE_LATENCY;
+               goto adjust_by_size;
+       }
+       /* Between 4 and 48 we can assume that our current interrupt delay
+        * is only slightly too low. As such we should increase it by a small
+        * fixed amount.
+        */
+       if (packets < 48) {
+               itr = (q_vector->itr >> 2) + IXGBE_ITR_ADAPTIVE_MIN_INC;
+               if (itr > IXGBE_ITR_ADAPTIVE_MAX_USECS)
+                       itr = IXGBE_ITR_ADAPTIVE_MAX_USECS;
+               goto clear_counts;
+       }
+       /* Between 48 and 96 is our "goldilocks" zone where we are working
+        * out "just right". Just report that our current ITR is good for us.
+        */
+       if (packets < 96) {
+               itr = q_vector->itr >> 2;
+               goto clear_counts;
+       }
+       /* If packet count is 96 or greater we are likely looking at a slight
+        * overrun of the delay we want. Try halving our delay to see if that
+        * will cut the number of packets in half per interrupt.
+        */
+       if (packets < 256) {
+               itr = q_vector->itr >> 3;
+               if (itr < IXGBE_ITR_ADAPTIVE_MIN_USECS)
+                       itr = IXGBE_ITR_ADAPTIVE_MIN_USECS;
+               goto clear_counts;
+       }
+       /* The paths below assume we are dealing with a bulk ITR since number
+        * of packets is 256 or greater. We are just going to have to compute
+        * a value and try to bring the count under control, though for smaller
+        * packet sizes there isn't much we can do as NAPI polling will likely
+        * be kicking in sooner rather than later.
+        */
+       itr = IXGBE_ITR_ADAPTIVE_BULK;
+ adjust_by_size:
+       /* If packet counts are 256 or greater we can assume we have a gross
+        * overestimation of what the rate should be. Instead of trying to fine
+        * tune it just use the formula below to try and dial in an exact value
+        * give the current packet size of the frame.
+        */
+       avg_wire_size = bytes / packets;
+       /* The following is a crude approximation of:
+        *  wmem_default / (size + overhead) = desired_pkts_per_int
+        *  rate / bits_per_byte / (size + ethernet overhead) = pkt_rate
+        *  (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value
+        *
+        * Assuming wmem_default is 212992 and overhead is 640 bytes per
+        * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the
+        * formula down to
+        *
+        *  (170 * (size + 24)) / (size + 640) = ITR
+        *
+        * We first do some math on the packet size and then finally bitshift
+        * by 8 after rounding up. We also have to account for PCIe link speed
+        * difference as ITR scales based on this.
+        */
+       if (avg_wire_size <= 60) {
+               /* Start at 50k ints/sec */
+               avg_wire_size = 5120;
+       } else if (avg_wire_size <= 316) {
+               /* 50K ints/sec to 16K ints/sec */
+               avg_wire_size *= 40;
+               avg_wire_size += 2720;
+       } else if (avg_wire_size <= 1084) {
+               /* 16K ints/sec to 9.2K ints/sec */
+               avg_wire_size *= 15;
+               avg_wire_size += 11452;
+       } else if (avg_wire_size <= 1980) {
+               /* 9.2K ints/sec to 8K ints/sec */
+               avg_wire_size *= 5;
+               avg_wire_size += 22420;
+       } else {
+               /* plateau at a limit of 8K ints/sec */
+               avg_wire_size = 32256;
+       }
+       /* If we are in low latency mode half our delay which doubles the rate
+        * to somewhere between 100K to 16K ints/sec
+        */
+       if (itr & IXGBE_ITR_ADAPTIVE_LATENCY)
+               avg_wire_size >>= 1;
+       /* Resultant value is 256 times larger than it needs to be. This
+        * gives us room to adjust the value as needed to either increase
+        * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc.
+        *
+        * Use addition as we have already recorded the new latency flag
+        * for the ITR value.
+        */
+       switch (q_vector->adapter->link_speed) {
+       case IXGBE_LINK_SPEED_10GB_FULL:
+       case IXGBE_LINK_SPEED_100_FULL:
+       default:
+               itr += DIV_ROUND_UP(avg_wire_size,
+                                   IXGBE_ITR_ADAPTIVE_MIN_INC * 256) *
+                      IXGBE_ITR_ADAPTIVE_MIN_INC;
                break;
-       case bulk_latency:
-               if (bytes_perint <= 20)
-                       itr_setting = low_latency;
+       case IXGBE_LINK_SPEED_2_5GB_FULL:
+       case IXGBE_LINK_SPEED_1GB_FULL:
+       case IXGBE_LINK_SPEED_10_FULL:
+               itr += DIV_ROUND_UP(avg_wire_size,
+                                   IXGBE_ITR_ADAPTIVE_MIN_INC * 64) *
+                      IXGBE_ITR_ADAPTIVE_MIN_INC;
                break;
        }
  
-       /* clear work counters since we have the values we need */
+ clear_counts:
+       /* write back value */
+       ring_container->itr = itr;
+       /* next update should occur within next jiffy */
+       ring_container->next_update = next_update + 1;
        ring_container->total_bytes = 0;
        ring_container->total_packets = 0;
-       /* write updated itr to ring container */
-       ring_container->itr = itr_setting;
  }
  
  /**
@@@ -2601,34 -2749,19 +2749,19 @@@ void ixgbe_write_eitr(struct ixgbe_q_ve
  
  static void ixgbe_set_itr(struct ixgbe_q_vector *q_vector)
  {
-       u32 new_itr = q_vector->itr;
-       u8 current_itr;
+       u32 new_itr;
  
        ixgbe_update_itr(q_vector, &q_vector->tx);
        ixgbe_update_itr(q_vector, &q_vector->rx);
  
-       current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
+       /* use the smallest value of new ITR delay calculations */
+       new_itr = min(q_vector->rx.itr, q_vector->tx.itr);
  
-       switch (current_itr) {
-       /* counts and packets in update_itr are dependent on these numbers */
-       case lowest_latency:
-               new_itr = IXGBE_100K_ITR;
-               break;
-       case low_latency:
-               new_itr = IXGBE_20K_ITR;
-               break;
-       case bulk_latency:
-               new_itr = IXGBE_12K_ITR;
-               break;
-       default:
-               break;
-       }
+       /* Clear latency flag if set, shift into correct position */
+       new_itr &= ~IXGBE_ITR_ADAPTIVE_LATENCY;
+       new_itr <<= 2;
  
        if (new_itr != q_vector->itr) {
-               /* do an exponential smoothing */
-               new_itr = (10 * new_itr * q_vector->itr) /
-                         ((9 * new_itr) + q_vector->itr);
                /* save the algorithm value here */
                q_vector->itr = new_itr;
  
@@@ -6771,6 -6904,7 +6904,7 @@@ void ixgbe_update_stats(struct ixgbe_ad
        u32 i, missed_rx = 0, mpc, bprc, lxon, lxoff, xon_off_tot;
        u64 non_eop_descs = 0, restart_queue = 0, tx_busy = 0;
        u64 alloc_rx_page_failed = 0, alloc_rx_buff_failed = 0;
+       u64 alloc_rx_page = 0;
        u64 bytes = 0, packets = 0, hw_csum_rx_error = 0;
  
        if (test_bit(__IXGBE_DOWN, &adapter->state) ||
        for (i = 0; i < adapter->num_rx_queues; i++) {
                struct ixgbe_ring *rx_ring = adapter->rx_ring[i];
                non_eop_descs += rx_ring->rx_stats.non_eop_descs;
+               alloc_rx_page += rx_ring->rx_stats.alloc_rx_page;
                alloc_rx_page_failed += rx_ring->rx_stats.alloc_rx_page_failed;
                alloc_rx_buff_failed += rx_ring->rx_stats.alloc_rx_buff_failed;
                hw_csum_rx_error += rx_ring->rx_stats.csum_err;
                packets += rx_ring->stats.packets;
        }
        adapter->non_eop_descs = non_eop_descs;
+       adapter->alloc_rx_page = alloc_rx_page;
        adapter->alloc_rx_page_failed = alloc_rx_page_failed;
        adapter->alloc_rx_buff_failed = alloc_rx_buff_failed;
        adapter->hw_csum_rx_error = hw_csum_rx_error;
@@@ -7554,9 -7690,9 +7690,9 @@@ static void ixgbe_sfp_link_config_subta
   * ixgbe_service_timer - Timer Call-back
   * @data: pointer to adapter cast into an unsigned long
   **/
- static void ixgbe_service_timer(unsigned long data)
+ static void ixgbe_service_timer(struct timer_list *t)
  {
-       struct ixgbe_adapter *adapter = (struct ixgbe_adapter *)data;
+       struct ixgbe_adapter *adapter = from_timer(adapter, t, service_timer);
        unsigned long next_event_offset;
  
        /* poll faster when waiting for link */
@@@ -8624,7 -8760,7 +8760,7 @@@ static void ixgbe_get_stats64(struct ne
  
        rcu_read_lock();
        for (i = 0; i < adapter->num_rx_queues; i++) {
 -              struct ixgbe_ring *ring = ACCESS_ONCE(adapter->rx_ring[i]);
 +              struct ixgbe_ring *ring = READ_ONCE(adapter->rx_ring[i]);
                u64 bytes, packets;
                unsigned int start;
  
        }
  
        for (i = 0; i < adapter->num_tx_queues; i++) {
 -              struct ixgbe_ring *ring = ACCESS_ONCE(adapter->tx_ring[i]);
 +              struct ixgbe_ring *ring = READ_ONCE(adapter->tx_ring[i]);
  
                ixgbe_get_ring_stats64(stats, ring);
        }
        for (i = 0; i < adapter->num_xdp_queues; i++) {
 -              struct ixgbe_ring *ring = ACCESS_ONCE(adapter->xdp_ring[i]);
 +              struct ixgbe_ring *ring = READ_ONCE(adapter->xdp_ring[i]);
  
                ixgbe_get_ring_stats64(stats, ring);
        }
@@@ -9223,13 -9359,10 +9359,10 @@@ free_jump
        return err;
  }
  
- static int ixgbe_setup_tc_cls_u32(struct net_device *dev,
+ static int ixgbe_setup_tc_cls_u32(struct ixgbe_adapter *adapter,
                                  struct tc_cls_u32_offload *cls_u32)
  {
-       struct ixgbe_adapter *adapter = netdev_priv(dev);
-       if (!is_classid_clsact_ingress(cls_u32->common.classid) ||
-           cls_u32->common.chain_index)
+       if (cls_u32->common.chain_index)
                return -EOPNOTSUPP;
  
        switch (cls_u32->command) {
        }
  }
  
+ static int ixgbe_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+                                  void *cb_priv)
+ {
+       struct ixgbe_adapter *adapter = cb_priv;
+       if (!tc_can_offload(adapter->netdev))
+               return -EOPNOTSUPP;
+       switch (type) {
+       case TC_SETUP_CLSU32:
+               return ixgbe_setup_tc_cls_u32(adapter, type_data);
+       default:
+               return -EOPNOTSUPP;
+       }
+ }
+ static int ixgbe_setup_tc_block(struct net_device *dev,
+                               struct tc_block_offload *f)
+ {
+       struct ixgbe_adapter *adapter = netdev_priv(dev);
+       if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+               return -EOPNOTSUPP;
+       switch (f->command) {
+       case TC_BLOCK_BIND:
+               return tcf_block_cb_register(f->block, ixgbe_setup_tc_block_cb,
+                                            adapter, adapter);
+       case TC_BLOCK_UNBIND:
+               tcf_block_cb_unregister(f->block, ixgbe_setup_tc_block_cb,
+                                       adapter);
+               return 0;
+       default:
+               return -EOPNOTSUPP;
+       }
+ }
  static int ixgbe_setup_tc_mqprio(struct net_device *dev,
                                 struct tc_mqprio_qopt *mqprio)
  {
@@@ -9259,9 -9429,9 +9429,9 @@@ static int __ixgbe_setup_tc(struct net_
                            void *type_data)
  {
        switch (type) {
-       case TC_SETUP_CLSU32:
-               return ixgbe_setup_tc_cls_u32(dev, type_data);
-       case TC_SETUP_MQPRIO:
+       case TC_SETUP_BLOCK:
+               return ixgbe_setup_tc_block(dev, type_data);
+       case TC_SETUP_QDISC_MQPRIO:
                return ixgbe_setup_tc_mqprio(dev, type_data);
        default:
                return -EOPNOTSUPP;
@@@ -9733,6 -9903,17 +9903,17 @@@ static void ixgbe_fwd_del(struct net_de
        limit = find_last_bit(&adapter->fwd_bitmask, 32);
        adapter->ring_feature[RING_F_VMDQ].limit = limit + 1;
        ixgbe_fwd_ring_down(fwd_adapter->netdev, fwd_adapter);
+       /* go back to full RSS if we're done with our VMQs */
+       if (adapter->ring_feature[RING_F_VMDQ].limit == 1) {
+               int rss = min_t(int, ixgbe_max_rss_indices(adapter),
+                               num_online_cpus());
+               adapter->flags &= ~IXGBE_FLAG_VMDQ_ENABLED;
+               adapter->flags &= ~IXGBE_FLAG_SRIOV_ENABLED;
+               adapter->ring_feature[RING_F_RSS].limit = rss;
+       }
        ixgbe_setup_tc(pdev, netdev_get_num_tc(pdev));
        netdev_dbg(pdev, "pool %i:%i queues %i:%i VSI bitmask %lx\n",
                   fwd_adapter->pool, adapter->num_rx_pools,
@@@ -9823,7 -10004,7 +10004,7 @@@ static int ixgbe_xdp_setup(struct net_d
        return 0;
  }
  
- static int ixgbe_xdp(struct net_device *dev, struct netdev_xdp *xdp)
+ static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp)
  {
        struct ixgbe_adapter *adapter = netdev_priv(dev);
  
@@@ -9932,7 -10113,7 +10113,7 @@@ static const struct net_device_ops ixgb
        .ndo_udp_tunnel_add     = ixgbe_add_udp_tunnel_port,
        .ndo_udp_tunnel_del     = ixgbe_del_udp_tunnel_port,
        .ndo_features_check     = ixgbe_features_check,
-       .ndo_xdp                = ixgbe_xdp,
+       .ndo_bpf                = ixgbe_xdp,
        .ndo_xdp_xmit           = ixgbe_xdp_xmit,
        .ndo_xdp_flush          = ixgbe_xdp_flush,
  };
@@@ -10355,8 -10536,7 +10536,7 @@@ skip_sriov
        ether_addr_copy(hw->mac.addr, hw->mac.perm_addr);
        ixgbe_mac_set_default_filter(adapter);
  
-       setup_timer(&adapter->service_timer, &ixgbe_service_timer,
-                   (unsigned long) adapter);
+       timer_setup(&adapter->service_timer, ixgbe_service_timer, 0);
  
        if (ixgbe_removed(hw->hw_addr)) {
                err = -EIO;
@@@ -10711,6 -10891,9 +10891,9 @@@ skip_bad_vf_detection
  #endif /* CONFIG_PCI_IOV */
        if (!test_bit(__IXGBE_SERVICE_INITED, &adapter->state))
                return PCI_ERS_RESULT_DISCONNECT;
+       if (!netif_device_present(netdev))
+               return PCI_ERS_RESULT_DISCONNECT;
  
        rtnl_lock();
        netif_device_detach(netdev);
index cacb30682434b8685a0102fa51b883fe08caf547,12d3601b1d57f9fa977acd54c646e81c3d1b9048..feed11bc9ddffdf7b779abab19a7e3e678909ecd
@@@ -164,7 -164,7 +164,7 @@@ static void ixgbevf_check_remove(struc
  
  u32 ixgbevf_read_reg(struct ixgbe_hw *hw, u32 reg)
  {
 -      u8 __iomem *reg_addr = ACCESS_ONCE(hw->hw_addr);
 +      u8 __iomem *reg_addr = READ_ONCE(hw->hw_addr);
        u32 value;
  
        if (IXGBE_REMOVED(reg_addr))
@@@ -2747,9 -2747,10 +2747,10 @@@ void ixgbevf_update_stats(struct ixgbev
   * ixgbevf_service_timer - Timer Call-back
   * @data: pointer to adapter cast into an unsigned long
   **/
- static void ixgbevf_service_timer(unsigned long data)
+ static void ixgbevf_service_timer(struct timer_list *t)
  {
-       struct ixgbevf_adapter *adapter = (struct ixgbevf_adapter *)data;
+       struct ixgbevf_adapter *adapter = from_timer(adapter, t,
+                                                    service_timer);
  
        /* Reset the timer */
        mod_timer(&adapter->service_timer, (HZ * 2) + jiffies);
@@@ -4120,8 -4121,7 +4121,7 @@@ static int ixgbevf_probe(struct pci_de
                goto err_sw_init;
        }
  
-       setup_timer(&adapter->service_timer, &ixgbevf_service_timer,
-                   (unsigned long)adapter);
+       timer_setup(&adapter->service_timer, ixgbevf_service_timer, 0);
  
        INIT_WORK(&adapter->service_task, ixgbevf_service_task);
        set_bit(__IXGBEVF_SERVICE_INITED, &adapter->state);
index 3541a7f9d12e5a06f924f80d0fee20c355d1204a,596445a4a241419737596b51fb38d00b02ad6681..6b68537738480eb649b962647fe8b52c65f4cb32
@@@ -414,8 -414,8 +414,8 @@@ bool mlx4_en_process_tx_cq(struct net_d
  
        index = cons_index & size_mask;
        cqe = mlx4_en_get_cqe(buf, index, priv->cqe_size) + factor;
 -      last_nr_txbb = ACCESS_ONCE(ring->last_nr_txbb);
 -      ring_cons = ACCESS_ONCE(ring->cons);
 +      last_nr_txbb = READ_ONCE(ring->last_nr_txbb);
 +      ring_cons = READ_ONCE(ring->cons);
        ring_index = ring_cons & size_mask;
        stamp_index = ring_index;
  
        wmb();
  
        /* we want to dirty this cache line once */
 -      ACCESS_ONCE(ring->last_nr_txbb) = last_nr_txbb;
 -      ACCESS_ONCE(ring->cons) = ring_cons + txbbs_skipped;
 +      WRITE_ONCE(ring->last_nr_txbb, last_nr_txbb);
 +      WRITE_ONCE(ring->cons, ring_cons + txbbs_skipped);
  
        if (cq->type == TX_XDP)
                return done < budget;
@@@ -718,7 -718,7 +718,7 @@@ void mlx4_en_xmit_doorbell(struct mlx4_
  #else
        iowrite32be(
  #endif
-                 ring->doorbell_qpn,
+                 (__force u32)ring->doorbell_qpn,
                  ring->bf.uar->map + MLX4_SEND_DOORBELL);
  }
  
@@@ -858,7 -858,7 +858,7 @@@ netdev_tx_t mlx4_en_xmit(struct sk_buf
                goto tx_drop;
  
        /* fetch ring->cons far ahead before needing it to avoid stall */
 -      ring_cons = ACCESS_ONCE(ring->cons);
 +      ring_cons = READ_ONCE(ring->cons);
  
        real_size = get_real_size(skb, shinfo, dev, &lso_header_size,
                                  &inline_ok, &fragptr);
                 */
                smp_rmb();
  
 -              ring_cons = ACCESS_ONCE(ring->cons);
 +              ring_cons = READ_ONCE(ring->cons);
                if (unlikely(!mlx4_en_is_tx_ring_full(ring))) {
                        netif_tx_wake_queue(ring->tx_queue);
                        ring->wake_queue++;
@@@ -1085,13 -1085,35 +1085,35 @@@ tx_drop
  #define MLX4_EN_XDP_TX_REAL_SZ (((CTRL_SIZE + MLX4_EN_XDP_TX_NRTXBB * DS_SIZE) \
                                 / 16) & 0x3f)
  
+ void mlx4_en_init_tx_xdp_ring_descs(struct mlx4_en_priv *priv,
+                                   struct mlx4_en_tx_ring *ring)
+ {
+       int i;
+       for (i = 0; i < ring->size; i++) {
+               struct mlx4_en_tx_info *tx_info = &ring->tx_info[i];
+               struct mlx4_en_tx_desc *tx_desc = ring->buf +
+                       (i << LOG_TXBB_SIZE);
+               tx_info->map0_byte_count = PAGE_SIZE;
+               tx_info->nr_txbb = MLX4_EN_XDP_TX_NRTXBB;
+               tx_info->data_offset = offsetof(struct mlx4_en_tx_desc, data);
+               tx_info->ts_requested = 0;
+               tx_info->nr_maps = 1;
+               tx_info->linear = 1;
+               tx_info->inl = 0;
+               tx_desc->data.lkey = ring->mr_key;
+               tx_desc->ctrl.qpn_vlan.fence_size = MLX4_EN_XDP_TX_REAL_SZ;
+               tx_desc->ctrl.srcrb_flags = priv->ctrl_flags;
+       }
+ }
  netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
                               struct mlx4_en_rx_alloc *frame,
-                              struct net_device *dev, unsigned int length,
+                              struct mlx4_en_priv *priv, unsigned int length,
                               int tx_ind, bool *doorbell_pending)
  {
-       struct mlx4_en_priv *priv = netdev_priv(dev);
-       union mlx4_wqe_qpn_vlan qpn_vlan = {};
        struct mlx4_en_tx_desc *tx_desc;
        struct mlx4_en_tx_info *tx_info;
        struct mlx4_wqe_data_seg *data;
        tx_info->page = frame->page;
        frame->page = NULL;
        tx_info->map0_dma = dma;
-       tx_info->map0_byte_count = PAGE_SIZE;
-       tx_info->nr_txbb = MLX4_EN_XDP_TX_NRTXBB;
        tx_info->nr_bytes = max_t(unsigned int, length, ETH_ZLEN);
-       tx_info->data_offset = offsetof(struct mlx4_en_tx_desc, data);
-       tx_info->ts_requested = 0;
-       tx_info->nr_maps = 1;
-       tx_info->linear = 1;
-       tx_info->inl = 0;
  
        dma_sync_single_range_for_device(priv->ddev, dma, frame->page_offset,
                                         length, PCI_DMA_TODEVICE);
  
        data->addr = cpu_to_be64(dma + frame->page_offset);
-       data->lkey = ring->mr_key;
        dma_wmb();
        data->byte_count = cpu_to_be32(length);
  
        /* tx completion can avoid cache line miss for common cases */
-       tx_desc->ctrl.srcrb_flags = priv->ctrl_flags;
  
        op_own = cpu_to_be32(MLX4_OPCODE_SEND) |
                ((ring->prod & ring->size) ?
  
        ring->prod += MLX4_EN_XDP_TX_NRTXBB;
  
-       qpn_vlan.fence_size = MLX4_EN_XDP_TX_REAL_SZ;
+       /* Ensure new descriptor hits memory
+        * before setting ownership of this descriptor to HW
+        */
+       dma_wmb();
+       tx_desc->ctrl.owner_opcode = op_own;
+       ring->xmit_more++;
  
-       mlx4_en_tx_write_desc(ring, tx_desc, qpn_vlan, TXBB_SIZE, 0,
-                             op_own, false, false);
        *doorbell_pending = true;
  
        return NETDEV_TX_OK;
index 5dd5f61e1114bd1f7870fd1b34ed32d002a4d427,426c9a946eb4f9f372ca138c640351c78be9016d..fe7e0e1dd01def224bd1932483170e04d488ff72
@@@ -1122,7 -1122,6 +1122,6 @@@ static void vxge_set_multicast(struct n
        struct netdev_hw_addr *ha;
        struct vxgedev *vdev;
        int i, mcast_cnt = 0;
-       struct __vxge_hw_device *hldev;
        struct vxge_vpath *vpath;
        enum vxge_hw_status status = VXGE_HW_OK;
        struct macInfo mac_info;
                "%s:%d", __func__, __LINE__);
  
        vdev = netdev_priv(dev);
-       hldev = vdev->devh;
  
        if (unlikely(!is_vxge_card_up(vdev)))
                return;
@@@ -1283,7 -1281,6 +1281,6 @@@ static int vxge_set_mac_addr(struct net
  {
        struct sockaddr *addr = p;
        struct vxgedev *vdev;
-       struct __vxge_hw_device *hldev;
        enum vxge_hw_status status = VXGE_HW_OK;
        struct macInfo mac_info_new, mac_info_old;
        int vpath_idx = 0;
        vxge_debug_entryexit(VXGE_TRACE, "%s:%d", __func__, __LINE__);
  
        vdev = netdev_priv(dev);
-       hldev = vdev->devh;
  
        if (!is_valid_ether_addr(addr->sa_data))
                return -EINVAL;
@@@ -2177,7 -2173,6 +2173,6 @@@ static void adaptive_coalesce_rx_interr
   */
  static irqreturn_t vxge_isr_napi(int irq, void *dev_id)
  {
-       struct net_device *dev;
        struct __vxge_hw_device *hldev;
        u64 reason;
        enum vxge_hw_status status;
  
        vxge_debug_intr(VXGE_TRACE, "%s:%d", __func__, __LINE__);
  
-       dev = vdev->ndev;
        hldev = pci_get_drvdata(vdev->pdev);
  
        if (pci_channel_offline(vdev->pdev))
@@@ -2597,9 -2591,9 +2591,9 @@@ INTA_MODE
        return VXGE_HW_OK;
  }
  
- static void vxge_poll_vp_reset(unsigned long data)
+ static void vxge_poll_vp_reset(struct timer_list *t)
  {
-       struct vxgedev *vdev = (struct vxgedev *)data;
+       struct vxgedev *vdev = from_timer(vdev, t, vp_reset_timer);
        int i, j = 0;
  
        for (i = 0; i < vdev->no_of_vpath; i++) {
        mod_timer(&vdev->vp_reset_timer, jiffies + HZ / 2);
  }
  
- static void vxge_poll_vp_lockup(unsigned long data)
+ static void vxge_poll_vp_lockup(struct timer_list *t)
  {
-       struct vxgedev *vdev = (struct vxgedev *)data;
+       struct vxgedev *vdev = from_timer(vdev, t, vp_lockup_timer);
        enum vxge_hw_status status = VXGE_HW_OK;
        struct vxge_vpath *vpath;
        struct vxge_ring *ring;
                ring = &vdev->vpaths[i].ring;
  
                /* Truncated to machine word size number of frames */
 -              rx_frms = ACCESS_ONCE(ring->stats.rx_frms);
 +              rx_frms = READ_ONCE(ring->stats.rx_frms);
  
                /* Did this vpath received any packets */
                if (ring->stats.prev_rx_frms == rx_frms) {
@@@ -2713,14 -2707,13 +2707,13 @@@ static int vxge_open(struct net_device 
        struct vxge_vpath *vpath;
        int ret = 0;
        int i;
-       u64 val64, function_mode;
+       u64 val64;
  
        vxge_debug_entryexit(VXGE_TRACE,
                "%s: %s:%d", dev->name, __func__, __LINE__);
  
        vdev = netdev_priv(dev);
        hldev = pci_get_drvdata(vdev->pdev);
-       function_mode = vdev->config.device_hw_info.function_mode;
  
        /* make sure you have link off by default every time Nic is
         * initialized */
                vdev->config.rx_pause_enable);
  
        if (vdev->vp_reset_timer.function == NULL)
-               vxge_os_timer(&vdev->vp_reset_timer, vxge_poll_vp_reset, vdev,
+               vxge_os_timer(&vdev->vp_reset_timer, vxge_poll_vp_reset,
                              HZ / 2);
  
        /* There is no need to check for RxD leak and RxD lookup on Titan1A */
        if (vdev->titan1 && vdev->vp_lockup_timer.function == NULL)
-               vxge_os_timer(&vdev->vp_lockup_timer, vxge_poll_vp_lockup, vdev,
+               vxge_os_timer(&vdev->vp_lockup_timer, vxge_poll_vp_lockup,
                              HZ / 2);
  
        set_bit(__VXGE_STATE_CARD_UP, &vdev->state);
index a95a46bcd339d824f442170b47f335c630c50b6b,46d60013564c9a7d4c2038a28e02867a766a2171..e566dbb3343d7cfbd4b244fbb534c09deb3a74f4
@@@ -674,6 -674,10 +674,10 @@@ static int efx_ef10_probe(struct efx_ni
        efx->rx_packet_len_offset =
                ES_DZ_RX_PREFIX_PKTLEN_OFST - ES_DZ_RX_PREFIX_SIZE;
  
+       if (nic_data->datapath_caps &
+           (1 << MC_CMD_GET_CAPABILITIES_OUT_RX_INCLUDE_FCS_LBN))
+               efx->net_dev->hw_features |= NETIF_F_RXFCS;
        rc = efx_mcdi_port_get_number(efx);
        if (rc < 0)
                goto fail5;
@@@ -2073,7 -2077,7 +2077,7 @@@ static irqreturn_t efx_ef10_msi_interru
        netif_vdbg(efx, intr, efx->net_dev,
                   "IRQ %d on CPU %d\n", irq, raw_smp_processor_id());
  
 -      if (likely(ACCESS_ONCE(efx->irq_soft_enabled))) {
 +      if (likely(READ_ONCE(efx->irq_soft_enabled))) {
                /* Note test interrupts */
                if (context->index == efx->irq_level)
                        efx->last_irq_cpu = raw_smp_processor_id();
  static irqreturn_t efx_ef10_legacy_interrupt(int irq, void *dev_id)
  {
        struct efx_nic *efx = dev_id;
 -      bool soft_enabled = ACCESS_ONCE(efx->irq_soft_enabled);
 +      bool soft_enabled = READ_ONCE(efx->irq_soft_enabled);
        struct efx_channel *channel;
        efx_dword_t reg;
        u32 queues;
@@@ -3199,11 -3203,15 +3203,15 @@@ static u16 efx_ef10_handle_rx_event_err
                                           const efx_qword_t *event)
  {
        struct efx_nic *efx = channel->efx;
+       bool handled = false;
  
        if (EFX_QWORD_FIELD(*event, ESF_DZ_RX_ECRC_ERR)) {
-               if (!efx->loopback_selftest)
-                       channel->n_rx_eth_crc_err += n_packets;
-               return EFX_RX_PKT_DISCARD;
+               if (!(efx->net_dev->features & NETIF_F_RXALL)) {
+                       if (!efx->loopback_selftest)
+                               channel->n_rx_eth_crc_err += n_packets;
+                       return EFX_RX_PKT_DISCARD;
+               }
+               handled = true;
        }
        if (EFX_QWORD_FIELD(*event, ESF_DZ_RX_IPCKSUM_ERR)) {
                if (unlikely(rx_encap_hdr != ESE_EZ_ENCAP_HDR_VXLAN &&
                return 0;
        }
  
-       WARN_ON(1); /* No error bits were recognised */
+       WARN_ON(!handled); /* No error bits were recognised */
        return 0;
  }
  
@@@ -3291,7 -3299,7 +3299,7 @@@ static int efx_ef10_handle_rx_event(str
        bool rx_cont;
        u16 flags = 0;
  
 -      if (unlikely(ACCESS_ONCE(efx->reset_pending)))
 +      if (unlikely(READ_ONCE(efx->reset_pending)))
                return 0;
  
        /* Basic packet information */
@@@ -3428,7 -3436,7 +3436,7 @@@ efx_ef10_handle_tx_event(struct efx_cha
        unsigned int tx_ev_q_label;
        int tx_descs = 0;
  
 -      if (unlikely(ACCESS_ONCE(efx->reset_pending)))
 +      if (unlikely(READ_ONCE(efx->reset_pending)))
                return 0;
  
        if (unlikely(EFX_QWORD_FIELD(*event, ESF_DZ_TX_DROP_EVENT)))
@@@ -5316,7 -5324,7 +5324,7 @@@ static void efx_ef10_filter_remove_old(
        int i;
  
        for (i = 0; i < HUNT_FILTER_TBL_ROWS; i++) {
 -              if (ACCESS_ONCE(table->entry[i].spec) &
 +              if (READ_ONCE(table->entry[i].spec) &
                    EFX_EF10_FILTER_FLAG_AUTO_OLD) {
                        rc = efx_ef10_filter_remove_internal(efx,
                                        1U << EFX_FILTER_PRI_AUTO, i, true);
@@@ -5726,7 -5734,7 +5734,7 @@@ static int efx_ef10_set_mac_address(str
                 * MCFW do not support VFs.
                 */
                rc = efx_ef10_vport_set_mac_address(efx);
-       } else {
+       } else if (rc) {
                efx_mcdi_display_error(efx, MC_CMD_VADAPTOR_SET_MAC,
                                       sizeof(inbuf), NULL, 0, rc);
        }
index 016616a6388057c7107196ae7521543785a5c678,6668e371405c9f8680fa360de03c1cee19a9004c..e3c492fcaff07d9c6b3c5211609c3bd0bde0eac4
@@@ -471,8 -471,7 +471,7 @@@ efx_alloc_channel(struct efx_nic *efx, 
  
        rx_queue = &channel->rx_queue;
        rx_queue->efx = efx;
-       setup_timer(&rx_queue->slow_fill, efx_rx_slow_fill,
-                   (unsigned long)rx_queue);
+       timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0);
  
        return channel;
  }
@@@ -511,8 -510,7 +510,7 @@@ efx_copy_channel(const struct efx_chann
        rx_queue = &channel->rx_queue;
        rx_queue->buffer = NULL;
        memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd));
-       setup_timer(&rx_queue->slow_fill, efx_rx_slow_fill,
-                   (unsigned long)rx_queue);
+       timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0);
  
        return channel;
  }
@@@ -2317,8 -2315,11 +2315,11 @@@ static int efx_set_features(struct net_
                        return rc;
        }
  
-       /* If Rx VLAN filter is changed, update filters via mac_reconfigure */
-       if ((net_dev->features ^ data) & NETIF_F_HW_VLAN_CTAG_FILTER) {
+       /* If Rx VLAN filter is changed, update filters via mac_reconfigure.
+        * If rx-fcs is changed, mac_reconfigure updates that too.
+        */
+       if ((net_dev->features ^ data) & (NETIF_F_HW_VLAN_CTAG_FILTER |
+                                         NETIF_F_RXFCS)) {
                /* efx_set_rx_mode() will schedule MAC work to update filters
                 * when a new features are finally set in net_dev.
                 */
@@@ -2809,7 -2810,7 +2810,7 @@@ static void efx_reset_work(struct work_
        unsigned long pending;
        enum reset_type method;
  
 -      pending = ACCESS_ONCE(efx->reset_pending);
 +      pending = READ_ONCE(efx->reset_pending);
        method = fls(pending) - 1;
  
        if (method == RESET_TYPE_MC_BIST)
@@@ -2874,7 -2875,7 +2875,7 @@@ void efx_schedule_reset(struct efx_nic 
        /* If we're not READY then just leave the flags set as the cue
         * to abort probing or reschedule the reset later.
         */
 -      if (ACCESS_ONCE(efx->state) != STATE_READY)
 +      if (READ_ONCE(efx->state) != STATE_READY)
                return;
  
        /* efx_process_channel() will no longer read events once a
@@@ -3244,7 -3245,7 +3245,7 @@@ static int efx_pci_probe_post_io(struc
  
        /* Determine netdevice features */
        net_dev->features |= (efx->type->offload_features | NETIF_F_SG |
-                             NETIF_F_TSO | NETIF_F_RXCSUM);
+                             NETIF_F_TSO | NETIF_F_RXCSUM | NETIF_F_RXALL);
        if (efx->type->offload_features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
                net_dev->features |= NETIF_F_TSO6;
        /* Check whether device supports TSO */
                                   NETIF_F_HIGHDMA | NETIF_F_ALL_TSO |
                                   NETIF_F_RXCSUM);
  
-       net_dev->hw_features = net_dev->features & ~efx->fixed_features;
+       net_dev->hw_features |= net_dev->features & ~efx->fixed_features;
+       /* Disable receiving frames with bad FCS, by default. */
+       net_dev->features &= ~NETIF_F_RXALL;
  
        /* Disable VLAN filtering by default.  It may be enforced if
         * the feature is fixed (i.e. VLAN filters are required to
index 7263275fde4a1d6eb27d7bd3d358904b2a66313a,6685a66ee1a3b85624414f65a8c8f24a0994b87d..3d6c91e96589870ca540331350ceae7563e163c5
@@@ -449,8 -449,7 +449,7 @@@ ef4_alloc_channel(struct ef4_nic *efx, 
  
        rx_queue = &channel->rx_queue;
        rx_queue->efx = efx;
-       setup_timer(&rx_queue->slow_fill, ef4_rx_slow_fill,
-                   (unsigned long)rx_queue);
+       timer_setup(&rx_queue->slow_fill, ef4_rx_slow_fill, 0);
  
        return channel;
  }
@@@ -489,8 -488,7 +488,7 @@@ ef4_copy_channel(const struct ef4_chann
        rx_queue = &channel->rx_queue;
        rx_queue->buffer = NULL;
        memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd));
-       setup_timer(&rx_queue->slow_fill, ef4_rx_slow_fill,
-                   (unsigned long)rx_queue);
+       timer_setup(&rx_queue->slow_fill, ef4_rx_slow_fill, 0);
  
        return channel;
  }
@@@ -2545,7 -2543,7 +2543,7 @@@ static void ef4_reset_work(struct work_
        unsigned long pending;
        enum reset_type method;
  
 -      pending = ACCESS_ONCE(efx->reset_pending);
 +      pending = READ_ONCE(efx->reset_pending);
        method = fls(pending) - 1;
  
        if ((method == RESET_TYPE_RECOVER_OR_DISABLE ||
@@@ -2605,7 -2603,7 +2603,7 @@@ void ef4_schedule_reset(struct ef4_nic 
        /* If we're not READY then just leave the flags set as the cue
         * to abort probing or reschedule the reset later.
         */
 -      if (ACCESS_ONCE(efx->state) != STATE_READY)
 +      if (READ_ONCE(efx->state) != STATE_READY)
                return;
  
        queue_work(reset_workqueue, &efx->reset_work);
index cd8bb472d75813773e645b6bf0e1c196523a38d0,ccda017b6794525f3043655e72171b37f79ef534..6520d7bc8d211755e44d1900e1ff1dbce2d0d5d7
@@@ -452,7 -452,7 +452,7 @@@ static irqreturn_t falcon_legacy_interr
                   "IRQ %d on CPU %d status " EF4_OWORD_FMT "\n",
                   irq, raw_smp_processor_id(), EF4_OWORD_VAL(*int_ker));
  
 -      if (!likely(ACCESS_ONCE(efx->irq_soft_enabled)))
 +      if (!likely(READ_ONCE(efx->irq_soft_enabled)))
                return IRQ_HANDLED;
  
        /* Check to see if we have a serious error condition */
@@@ -1372,7 -1372,7 +1372,7 @@@ static void falcon_reconfigure_mac_wrap
        ef4_oword_t reg;
        int link_speed, isolate;
  
 -      isolate = !!ACCESS_ONCE(efx->reset_pending);
 +      isolate = !!READ_ONCE(efx->reset_pending);
  
        switch (link_state->speed) {
        case 10000: link_speed = 3; break;
@@@ -1454,10 -1454,11 +1454,11 @@@ static void falcon_stats_complete(struc
        }
  }
  
- static void falcon_stats_timer_func(unsigned long context)
+ static void falcon_stats_timer_func(struct timer_list *t)
  {
-       struct ef4_nic *efx = (struct ef4_nic *)context;
-       struct falcon_nic_data *nic_data = efx->nic_data;
+       struct falcon_nic_data *nic_data = from_timer(nic_data, t,
+                                                     stats_timer);
+       struct ef4_nic *efx = nic_data->efx;
  
        spin_lock(&efx->stats_lock);
  
@@@ -2295,6 -2296,7 +2296,7 @@@ static int falcon_probe_nic(struct ef4_
        if (!nic_data)
                return -ENOMEM;
        efx->nic_data = nic_data;
+       nic_data->efx = efx;
  
        rc = -ENODEV;
  
        }
  
        nic_data->stats_disable_count = 1;
-       setup_timer(&nic_data->stats_timer, &falcon_stats_timer_func,
-                   (unsigned long)efx);
+       timer_setup(&nic_data->stats_timer, falcon_stats_timer_func, 0);
  
        return 0;
  
index 54ca457cdb15dc79f0d5175c83d530ed3c3440a9,e2e3c008d0738287a67bc0fc8883ba9bb194d4cb..07c62dc552cb923749edba62c9ac3399cdc04681
@@@ -83,7 -83,7 +83,7 @@@ static inline struct ef4_tx_queue *ef4_
  static inline bool __ef4_nic_tx_is_empty(struct ef4_tx_queue *tx_queue,
                                         unsigned int write_count)
  {
 -      unsigned int empty_read_count = ACCESS_ONCE(tx_queue->empty_read_count);
 +      unsigned int empty_read_count = READ_ONCE(tx_queue->empty_read_count);
  
        if (empty_read_count == 0)
                return false;
@@@ -267,6 -267,7 +267,7 @@@ enum 
  /**
   * struct falcon_nic_data - Falcon NIC state
   * @pci_dev2: Secondary function of Falcon A
+  * @efx: ef4_nic pointer
   * @board: Board state and functions
   * @stats: Hardware statistics
   * @stats_disable_count: Nest count for disabling statistics fetches
   */
  struct falcon_nic_data {
        struct pci_dev *pci_dev2;
+       struct ef4_nic *efx;
        struct falcon_board board;
        u64 stats[FALCON_STAT_COUNT];
        unsigned int stats_disable_count;
@@@ -464,11 -466,11 +466,11 @@@ irqreturn_t ef4_farch_fatal_interrupt(s
  
  static inline int ef4_nic_event_test_irq_cpu(struct ef4_channel *channel)
  {
 -      return ACCESS_ONCE(channel->event_test_cpu);
 +      return READ_ONCE(channel->event_test_cpu);
  }
  static inline int ef4_nic_irq_test_irq_cpu(struct ef4_nic *efx)
  {
 -      return ACCESS_ONCE(efx->last_irq_cpu);
 +      return READ_ONCE(efx->last_irq_cpu);
  }
  
  /* Global Resources */
index 6486814e97dccee08431dea134739d2840c6c991,1b978d69e702467dfdb9166157fc6d6daecdf3c8..3409bbf5b19fffbc5ec3538e592173f854eb8791
@@@ -134,8 -134,8 +134,8 @@@ static void ef4_tx_maybe_stop_queue(str
         */
        netif_tx_stop_queue(txq1->core_txq);
        smp_mb();
 -      txq1->old_read_count = ACCESS_ONCE(txq1->read_count);
 -      txq2->old_read_count = ACCESS_ONCE(txq2->read_count);
 +      txq1->old_read_count = READ_ONCE(txq1->read_count);
 +      txq2->old_read_count = READ_ONCE(txq2->read_count);
  
        fill_level = max(txq1->insert_count - txq1->old_read_count,
                         txq2->insert_count - txq2->old_read_count);
@@@ -435,7 -435,7 +435,7 @@@ int ef4_setup_tc(struct net_device *net
        unsigned tc, num_tc;
        int rc;
  
-       if (type != TC_SETUP_MQPRIO)
+       if (type != TC_SETUP_QDISC_MQPRIO)
                return -EOPNOTSUPP;
  
        num_tc = mqprio->num_tc;
@@@ -524,7 -524,7 +524,7 @@@ void ef4_xmit_done(struct ef4_tx_queue 
  
        /* Check whether the hardware queue is now empty */
        if ((int)(tx_queue->read_count - tx_queue->old_write_count) >= 0) {
 -              tx_queue->old_write_count = ACCESS_ONCE(tx_queue->write_count);
 +              tx_queue->old_write_count = READ_ONCE(tx_queue->write_count);
                if (tx_queue->read_count == tx_queue->old_write_count) {
                        smp_mb();
                        tx_queue->empty_read_count =
index 86454d25a405ecbcdd3dd604b963659e0d128541,6608dfe455b17beb87eb79bfbb6438b18b22a71b..5334dc83d926024e854420f8075d1b9e0a5af04c
@@@ -827,7 -827,7 +827,7 @@@ efx_farch_handle_tx_event(struct efx_ch
        struct efx_nic *efx = channel->efx;
        int tx_packets = 0;
  
 -      if (unlikely(ACCESS_ONCE(efx->reset_pending)))
 +      if (unlikely(READ_ONCE(efx->reset_pending)))
                return 0;
  
        if (likely(EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_COMP))) {
@@@ -927,6 -927,10 +927,10 @@@ static u16 efx_farch_handle_rx_not_ok(s
        }
  #endif
  
+       if (efx->net_dev->features & NETIF_F_RXALL)
+               /* don't discard frame for CRC error */
+               rx_ev_eth_crc_err = false;
        /* The frame must be discarded if any of these are true. */
        return (rx_ev_eth_crc_err | rx_ev_frm_trunc |
                rx_ev_tobe_disc | rx_ev_pause_frm) ?
@@@ -979,7 -983,7 +983,7 @@@ efx_farch_handle_rx_event(struct efx_ch
        struct efx_rx_queue *rx_queue;
        struct efx_nic *efx = channel->efx;
  
 -      if (unlikely(ACCESS_ONCE(efx->reset_pending)))
 +      if (unlikely(READ_ONCE(efx->reset_pending)))
                return;
  
        rx_ev_cont = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_JUMBO_CONT);
@@@ -1520,7 -1524,7 +1524,7 @@@ irqreturn_t efx_farch_fatal_interrupt(s
  irqreturn_t efx_farch_legacy_interrupt(int irq, void *dev_id)
  {
        struct efx_nic *efx = dev_id;
 -      bool soft_enabled = ACCESS_ONCE(efx->irq_soft_enabled);
 +      bool soft_enabled = READ_ONCE(efx->irq_soft_enabled);
        efx_oword_t *int_ker = efx->irq_status.addr;
        irqreturn_t result = IRQ_NONE;
        struct efx_channel *channel;
@@@ -1612,7 -1616,7 +1616,7 @@@ irqreturn_t efx_farch_msi_interrupt(in
                   "IRQ %d on CPU %d status " EFX_OWORD_FMT "\n",
                   irq, raw_smp_processor_id(), EFX_OWORD_VAL(*int_ker));
  
 -      if (!likely(ACCESS_ONCE(efx->irq_soft_enabled)))
 +      if (!likely(READ_ONCE(efx->irq_soft_enabled)))
                return IRQ_HANDLED;
  
        /* Handle non-event-queue sources */
index 56c2db398deff5f250f8cb729618daa61aa58cc7,4f54245df0ec0dd685895059e87a6d9cd1abe958..caa89bf7603e398d955ff929c94284dc66d357fb
@@@ -648,17 -648,15 +648,15 @@@ static void efx_ptp_send_times(struct e
        struct pps_event_time now;
        struct timespec64 limit;
        struct efx_ptp_data *ptp = efx->ptp_data;
-       struct timespec64 start;
        int *mc_running = ptp->start.addr;
  
        pps_get_ts(&now);
-       start = now.ts_real;
        limit = now.ts_real;
        timespec64_add_ns(&limit, SYNCHRONISE_PERIOD_NS);
  
        /* Write host time for specified period or until MC is done */
        while ((timespec64_compare(&now.ts_real, &limit) < 0) &&
 -             ACCESS_ONCE(*mc_running)) {
 +             READ_ONCE(*mc_running)) {
                struct timespec64 update_time;
                unsigned int host_time;
  
                do {
                        pps_get_ts(&now);
                } while ((timespec64_compare(&now.ts_real, &update_time) < 0) &&
 -                       ACCESS_ONCE(*mc_running));
 +                       READ_ONCE(*mc_running));
  
                /* Synchronise NIC with single word of time only */
                host_time = (now.ts_real.tv_sec << MC_NANOSECOND_BITS |
@@@ -832,14 -830,14 +830,14 @@@ static int efx_ptp_synchronize(struct e
                       ptp->start.dma_addr);
  
        /* Clear flag that signals MC ready */
 -      ACCESS_ONCE(*start) = 0;
 +      WRITE_ONCE(*start, 0);
        rc = efx_mcdi_rpc_start(efx, MC_CMD_PTP, synch_buf,
                                MC_CMD_PTP_IN_SYNCHRONIZE_LEN);
        EFX_WARN_ON_ONCE_PARANOID(rc);
  
        /* Wait for start from MCDI (or timeout) */
        timeout = jiffies + msecs_to_jiffies(MAX_SYNCHRONISE_WAIT_MS);
 -      while (!ACCESS_ONCE(*start) && (time_before(jiffies, timeout))) {
 +      while (!READ_ONCE(*start) && (time_before(jiffies, timeout))) {
                udelay(20);     /* Usually start MCDI execution quickly */
                loops++;
        }
        if (!time_before(jiffies, timeout))
                ++ptp->sync_timeouts;
  
 -      if (ACCESS_ONCE(*start))
 +      if (READ_ONCE(*start))
                efx_ptp_send_times(efx, &last_time);
  
        /* Collect results */
index efb66ea21f27d3d8fd458bba54bdd046a1a936a2,ea27b8a7f46502e72b48d5c02024adfa8f965d9c..0ea7e16f2e6e2c6d8106308e73327390e62074ce
@@@ -136,8 -136,8 +136,8 @@@ static void efx_tx_maybe_stop_queue(str
         */
        netif_tx_stop_queue(txq1->core_txq);
        smp_mb();
 -      txq1->old_read_count = ACCESS_ONCE(txq1->read_count);
 -      txq2->old_read_count = ACCESS_ONCE(txq2->read_count);
 +      txq1->old_read_count = READ_ONCE(txq1->read_count);
 +      txq2->old_read_count = READ_ONCE(txq2->read_count);
  
        fill_level = max(txq1->insert_count - txq1->old_read_count,
                         txq2->insert_count - txq2->old_read_count);
@@@ -663,7 -663,7 +663,7 @@@ int efx_setup_tc(struct net_device *net
        unsigned tc, num_tc;
        int rc;
  
-       if (type != TC_SETUP_MQPRIO)
+       if (type != TC_SETUP_QDISC_MQPRIO)
                return -EOPNOTSUPP;
  
        num_tc = mqprio->num_tc;
@@@ -752,7 -752,7 +752,7 @@@ void efx_xmit_done(struct efx_tx_queue 
  
        /* Check whether the hardware queue is now empty */
        if ((int)(tx_queue->read_count - tx_queue->old_write_count) >= 0) {
 -              tx_queue->old_write_count = ACCESS_ONCE(tx_queue->write_count);
 +              tx_queue->old_write_count = READ_ONCE(tx_queue->write_count);
                if (tx_queue->read_count == tx_queue->old_write_count) {
                        smp_mb();
                        tx_queue->empty_read_count =
index 8ab0fb6892d5d3562e891d574331d816acb69a51,ab502ee35fb29cbcd886355cbeeecb7d04532cc2..06001bacbe0fe8e6f648168b47aacff6c7a4d31a
@@@ -2221,9 -2221,9 +2221,9 @@@ static int niu_link_status(struct niu *
        return err;
  }
  
- static void niu_timer(unsigned long __opaque)
+ static void niu_timer(struct timer_list *t)
  {
-       struct niu *np = (struct niu *) __opaque;
+       struct niu *np = from_timer(np, t, timer);
        unsigned long off;
        int err, link_up;
  
@@@ -6123,10 -6123,8 +6123,8 @@@ static int niu_open(struct net_device *
  
        err = niu_init_hw(np);
        if (!err) {
-               init_timer(&np->timer);
+               timer_setup(&np->timer, niu_timer, 0);
                np->timer.expires = jiffies + HZ;
-               np->timer.data = (unsigned long) np;
-               np->timer.function = niu_timer;
  
                err = niu_enable_interrupts(np, 1);
                if (err)
@@@ -6245,7 -6243,7 +6243,7 @@@ static void niu_get_rx_stats(struct ni
  
        pkts = dropped = errors = bytes = 0;
  
 -      rx_rings = ACCESS_ONCE(np->rx_rings);
 +      rx_rings = READ_ONCE(np->rx_rings);
        if (!rx_rings)
                goto no_rings;
  
@@@ -6276,7 -6274,7 +6274,7 @@@ static void niu_get_tx_stats(struct ni
  
        pkts = errors = bytes = 0;
  
 -      tx_rings = ACCESS_ONCE(np->tx_rings);
 +      tx_rings = READ_ONCE(np->tx_rings);
        if (!tx_rings)
                goto no_rings;
  
@@@ -6775,10 -6773,8 +6773,8 @@@ static int niu_change_mtu(struct net_de
  
        err = niu_init_hw(np);
        if (!err) {
-               init_timer(&np->timer);
+               timer_setup(&np->timer, niu_timer, 0);
                np->timer.expires = jiffies + HZ;
-               np->timer.data = (unsigned long) np;
-               np->timer.function = niu_timer;
  
                err = niu_enable_interrupts(np, 1);
                if (err)
index 104f71fa9c5ed342df1988dc86d5389e1c49673b,b88c5cc00a6320ae3e241c8afcd569ec2d9e73e7..14c3632b8cde3cc95a25d98bc3ff826ed529205e
@@@ -157,7 -157,7 +157,7 @@@ static struct net_device *yam_devs[NR_P
  
  static struct yam_mcs *yam_data;
  
 -static DEFINE_TIMER(yam_timer, NULL, 0, 0);
 +static DEFINE_TIMER(yam_timer, NULL);
  
  /* --------------------------------------------------------------------- */
  
@@@ -647,7 -647,7 +647,7 @@@ static void yam_arbitrate(struct net_de
        yam_start_tx(dev, yp);
  }
  
- static void yam_dotimer(unsigned long dummy)
+ static void yam_dotimer(struct timer_list *unused)
  {
        int i;
  
@@@ -1164,7 -1164,7 +1164,7 @@@ static int __init yam_init_driver(void
  
        }
  
-       yam_timer.function = yam_dotimer;
+       timer_setup(&yam_timer, yam_dotimer, 0);
        yam_timer.expires = jiffies + HZ / 100;
        add_timer(&yam_timer);
  
diff --combined drivers/net/tun.c
index c1685a6d788360beb3a1a0b8cf3a01efe157618d,1a326b69722189c60366531c4635ba4f2fb66c99..6bb1e604aadd68b6060df277491478899ccc7e83
@@@ -75,6 -75,7 +75,7 @@@
  #include <linux/skb_array.h>
  #include <linux/bpf.h>
  #include <linux/bpf_trace.h>
+ #include <linux/mutex.h>
  
  #include <linux/uaccess.h>
  
@@@ -121,7 -122,8 +122,8 @@@ do {                                                               
  #define TUN_VNET_BE     0x40000000
  
  #define TUN_FEATURES (IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR | \
-                     IFF_MULTI_QUEUE)
+                     IFF_MULTI_QUEUE | IFF_NAPI | IFF_NAPI_FRAGS)
  #define GOODCOPY_LEN 128
  
  #define FLT_EXACT_COUNT 8
@@@ -172,6 -174,9 +174,9 @@@ struct tun_file 
                u16 queue_index;
                unsigned int ifindex;
        };
+       struct napi_struct napi;
+       bool napi_enabled;
+       struct mutex napi_mutex;        /* Protects access to the above napi */
        struct list_head next;
        struct tun_struct *detached;
        struct skb_array tx_array;
@@@ -229,6 -234,75 +234,75 @@@ struct tun_struct 
        struct bpf_prog __rcu *xdp_prog;
  };
  
+ static int tun_napi_receive(struct napi_struct *napi, int budget)
+ {
+       struct tun_file *tfile = container_of(napi, struct tun_file, napi);
+       struct sk_buff_head *queue = &tfile->sk.sk_write_queue;
+       struct sk_buff_head process_queue;
+       struct sk_buff *skb;
+       int received = 0;
+       __skb_queue_head_init(&process_queue);
+       spin_lock(&queue->lock);
+       skb_queue_splice_tail_init(queue, &process_queue);
+       spin_unlock(&queue->lock);
+       while (received < budget && (skb = __skb_dequeue(&process_queue))) {
+               napi_gro_receive(napi, skb);
+               ++received;
+       }
+       if (!skb_queue_empty(&process_queue)) {
+               spin_lock(&queue->lock);
+               skb_queue_splice(&process_queue, queue);
+               spin_unlock(&queue->lock);
+       }
+       return received;
+ }
+ static int tun_napi_poll(struct napi_struct *napi, int budget)
+ {
+       unsigned int received;
+       received = tun_napi_receive(napi, budget);
+       if (received < budget)
+               napi_complete_done(napi, received);
+       return received;
+ }
+ static void tun_napi_init(struct tun_struct *tun, struct tun_file *tfile,
+                         bool napi_en)
+ {
+       tfile->napi_enabled = napi_en;
+       if (napi_en) {
+               netif_napi_add(tun->dev, &tfile->napi, tun_napi_poll,
+                              NAPI_POLL_WEIGHT);
+               napi_enable(&tfile->napi);
+               mutex_init(&tfile->napi_mutex);
+       }
+ }
+ static void tun_napi_disable(struct tun_struct *tun, struct tun_file *tfile)
+ {
+       if (tfile->napi_enabled)
+               napi_disable(&tfile->napi);
+ }
+ static void tun_napi_del(struct tun_struct *tun, struct tun_file *tfile)
+ {
+       if (tfile->napi_enabled)
+               netif_napi_del(&tfile->napi);
+ }
+ static bool tun_napi_frags_enabled(const struct tun_struct *tun)
+ {
+       return READ_ONCE(tun->flags) & IFF_NAPI_FRAGS;
+ }
  #ifdef CONFIG_TUN_VNET_CROSS_LE
  static inline bool tun_legacy_is_little_endian(struct tun_struct *tun)
  {
@@@ -380,25 -454,28 +454,28 @@@ static void tun_flow_cleanup(unsigned l
  
        tun_debug(KERN_INFO, tun, "tun_flow_cleanup\n");
  
-       spin_lock_bh(&tun->lock);
+       spin_lock(&tun->lock);
        for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) {
                struct tun_flow_entry *e;
                struct hlist_node *n;
  
                hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) {
                        unsigned long this_timer;
-                       count++;
                        this_timer = e->updated + delay;
-                       if (time_before_eq(this_timer, jiffies))
+                       if (time_before_eq(this_timer, jiffies)) {
                                tun_flow_delete(tun, e);
-                       else if (time_before(this_timer, next_timer))
+                               continue;
+                       }
+                       count++;
+                       if (time_before(this_timer, next_timer))
                                next_timer = this_timer;
                }
        }
  
        if (count)
                mod_timer(&tun->flow_gc_timer, round_jiffies_up(next_timer));
-       spin_unlock_bh(&tun->lock);
+       spin_unlock(&tun->lock);
  }
  
  static void tun_flow_update(struct tun_struct *tun, u32 rxhash,
@@@ -469,7 -546,7 +546,7 @@@ static u16 tun_select_queue(struct net_
        u32 numqueues = 0;
  
        rcu_read_lock();
 -      numqueues = ACCESS_ONCE(tun->numqueues);
 +      numqueues = READ_ONCE(tun->numqueues);
  
        txq = __skb_get_hash_symmetric(skb);
        if (txq) {
@@@ -541,6 -618,11 +618,11 @@@ static void __tun_detach(struct tun_fil
  
        tun = rtnl_dereference(tfile->tun);
  
+       if (tun && clean) {
+               tun_napi_disable(tun, tfile);
+               tun_napi_del(tun, tfile);
+       }
        if (tun && !tfile->detached) {
                u16 index = tfile->queue_index;
                BUG_ON(index >= tun->numqueues);
@@@ -598,6 -680,7 +680,7 @@@ static void tun_detach_all(struct net_d
        for (i = 0; i < n; i++) {
                tfile = rtnl_dereference(tun->tfiles[i]);
                BUG_ON(!tfile);
+               tun_napi_disable(tun, tfile);
                tfile->socket.sk->sk_shutdown = RCV_SHUTDOWN;
                tfile->socket.sk->sk_data_ready(tfile->socket.sk);
                RCU_INIT_POINTER(tfile->tun, NULL);
        synchronize_net();
        for (i = 0; i < n; i++) {
                tfile = rtnl_dereference(tun->tfiles[i]);
+               tun_napi_del(tun, tfile);
                /* Drop read queue */
                tun_queue_purge(tfile);
                sock_put(&tfile->sk);
                module_put(THIS_MODULE);
  }
  
- static int tun_attach(struct tun_struct *tun, struct file *file, bool skip_filter)
+ static int tun_attach(struct tun_struct *tun, struct file *file,
+                     bool skip_filter, bool napi)
  {
        struct tun_file *tfile = file->private_data;
        struct net_device *dev = tun->dev;
        rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile);
        tun->numqueues++;
  
-       if (tfile->detached)
+       if (tfile->detached) {
                tun_enable_queue(tfile);
-       else
+       } else {
                sock_hold(&tfile->sk);
+               tun_napi_init(tun, tfile, napi);
+       }
  
        tun_set_real_num_queues(tun);
  
@@@ -692,7 -779,7 +779,7 @@@ out
        return err;
  }
  
- static struct tun_struct *__tun_get(struct tun_file *tfile)
+ static struct tun_struct *tun_get(struct tun_file *tfile)
  {
        struct tun_struct *tun;
  
        return tun;
  }
  
- static struct tun_struct *tun_get(struct file *file)
- {
-       return __tun_get(file->private_data);
- }
  static void tun_put(struct tun_struct *tun)
  {
        dev_put(tun->dev);
@@@ -864,7 -946,7 +946,7 @@@ static netdev_tx_t tun_net_xmit(struct 
  
        rcu_read_lock();
        tfile = rcu_dereference(tun->tfiles[txq]);
 -      numqueues = ACCESS_ONCE(tun->numqueues);
 +      numqueues = READ_ONCE(tun->numqueues);
  
        /* Drop packet if interface is not attached */
        if (txq >= numqueues)
@@@ -956,13 -1038,33 +1038,33 @@@ static void tun_poll_controller(struct 
         * Tun only receives frames when:
         * 1) the char device endpoint gets data from user space
         * 2) the tun socket gets a sendmsg call from user space
-        * Since both of those are synchronous operations, we are guaranteed
-        * never to have pending data when we poll for it
-        * so there is nothing to do here but return.
+        * If NAPI is not enabled, since both of those are synchronous
+        * operations, we are guaranteed never to have pending data when we poll
+        * for it so there is nothing to do here but return.
         * We need this though so netpoll recognizes us as an interface that
         * supports polling, which enables bridge devices in virt setups to
         * still use netconsole
+        * If NAPI is enabled, however, we need to schedule polling for all
+        * queues unless we are using napi_gro_frags(), which we call in
+        * process context and not in NAPI context.
         */
+       struct tun_struct *tun = netdev_priv(dev);
+       if (tun->flags & IFF_NAPI) {
+               struct tun_file *tfile;
+               int i;
+               if (tun_napi_frags_enabled(tun))
+                       return;
+               rcu_read_lock();
+               for (i = 0; i < tun->numqueues; i++) {
+                       tfile = rcu_dereference(tun->tfiles[i]);
+                       if (tfile->napi_enabled)
+                               napi_schedule(&tfile->napi);
+               }
+               rcu_read_unlock();
+       }
        return;
  }
  #endif
@@@ -1039,7 -1141,7 +1141,7 @@@ static u32 tun_xdp_query(struct net_dev
        return 0;
  }
  
- static int tun_xdp(struct net_device *dev, struct netdev_xdp *xdp)
+ static int tun_xdp(struct net_device *dev, struct netdev_bpf *xdp)
  {
        switch (xdp->command) {
        case XDP_SETUP_PROG:
@@@ -1083,7 -1185,7 +1185,7 @@@ static const struct net_device_ops tap_
        .ndo_features_check     = passthru_features_check,
        .ndo_set_rx_headroom    = tun_set_headroom,
        .ndo_get_stats64        = tun_net_get_stats64,
-       .ndo_xdp                = tun_xdp,
+       .ndo_bpf                = tun_xdp,
  };
  
  static void tun_flow_init(struct tun_struct *tun)
  
        tun->ageing_time = TUN_FLOW_EXPIRE;
        setup_timer(&tun->flow_gc_timer, tun_flow_cleanup, (unsigned long)tun);
-       mod_timer(&tun->flow_gc_timer,
-                 round_jiffies_up(jiffies + tun->ageing_time));
  }
  
  static void tun_flow_uninit(struct tun_struct *tun)
@@@ -1149,7 -1249,7 +1249,7 @@@ static void tun_net_init(struct net_dev
  static unsigned int tun_chr_poll(struct file *file, poll_table *wait)
  {
        struct tun_file *tfile = file->private_data;
-       struct tun_struct *tun = __tun_get(tfile);
+       struct tun_struct *tun = tun_get(tfile);
        struct sock *sk;
        unsigned int mask = 0;
  
        return mask;
  }
  
+ static struct sk_buff *tun_napi_alloc_frags(struct tun_file *tfile,
+                                           size_t len,
+                                           const struct iov_iter *it)
+ {
+       struct sk_buff *skb;
+       size_t linear;
+       int err;
+       int i;
+       if (it->nr_segs > MAX_SKB_FRAGS + 1)
+               return ERR_PTR(-ENOMEM);
+       local_bh_disable();
+       skb = napi_get_frags(&tfile->napi);
+       local_bh_enable();
+       if (!skb)
+               return ERR_PTR(-ENOMEM);
+       linear = iov_iter_single_seg_count(it);
+       err = __skb_grow(skb, linear);
+       if (err)
+               goto free;
+       skb->len = len;
+       skb->data_len = len - linear;
+       skb->truesize += skb->data_len;
+       for (i = 1; i < it->nr_segs; i++) {
+               size_t fragsz = it->iov[i].iov_len;
+               unsigned long offset;
+               struct page *page;
+               void *data;
+               if (fragsz == 0 || fragsz > PAGE_SIZE) {
+                       err = -EINVAL;
+                       goto free;
+               }
+               local_bh_disable();
+               data = napi_alloc_frag(fragsz);
+               local_bh_enable();
+               if (!data) {
+                       err = -ENOMEM;
+                       goto free;
+               }
+               page = virt_to_head_page(data);
+               offset = data - page_address(page);
+               skb_fill_page_desc(skb, i - 1, page, offset, fragsz);
+       }
+       return skb;
+ free:
+       /* frees skb and all frags allocated with napi_alloc_frag() */
+       napi_free_frags(&tfile->napi);
+       return ERR_PTR(err);
+ }
  /* prepad is the amount to reserve at front.  len is length after that.
   * linear is a hint as to how much to copy (usually headers). */
  static struct sk_buff *tun_alloc_skb(struct tun_file *tfile,
@@@ -1315,6 -1473,7 +1473,7 @@@ static struct sk_buff *tun_build_skb(st
  
                xdp.data_hard_start = buf;
                xdp.data = buf + pad;
+               xdp_set_data_meta_invalid(&xdp);
                xdp.data_end = xdp.data + len;
                orig_data = xdp.data;
                act = bpf_prog_run_xdp(xdp_prog, &xdp);
@@@ -1391,6 -1550,7 +1550,7 @@@ static ssize_t tun_get_user(struct tun_
        int err;
        u32 rxhash;
        int skb_xdp = 1;
+       bool frags = tun_napi_frags_enabled(tun);
  
        if (!(tun->dev->flags & IFF_UP))
                return -EIO;
                        zerocopy = true;
        }
  
-       if (tun_can_build_skb(tun, tfile, len, noblock, zerocopy)) {
+       if (!frags && tun_can_build_skb(tun, tfile, len, noblock, zerocopy)) {
                /* For the packet that is not easy to be processed
                 * (e.g gso or jumbo packet), we will do it at after
                 * skb was created with generic XDP routine.
                                linear = tun16_to_cpu(tun, gso.hdr_len);
                }
  
-               skb = tun_alloc_skb(tfile, align, copylen, linear, noblock);
+               if (frags) {
+                       mutex_lock(&tfile->napi_mutex);
+                       skb = tun_napi_alloc_frags(tfile, copylen, from);
+                       /* tun_napi_alloc_frags() enforces a layout for the skb.
+                        * If zerocopy is enabled, then this layout will be
+                        * overwritten by zerocopy_sg_from_iter().
+                        */
+                       zerocopy = false;
+               } else {
+                       skb = tun_alloc_skb(tfile, align, copylen, linear,
+                                           noblock);
+               }
                if (IS_ERR(skb)) {
                        if (PTR_ERR(skb) != -EAGAIN)
                                this_cpu_inc(tun->pcpu_stats->rx_dropped);
+                       if (frags)
+                               mutex_unlock(&tfile->napi_mutex);
                        return PTR_ERR(skb);
                }
  
                if (err) {
                        this_cpu_inc(tun->pcpu_stats->rx_dropped);
                        kfree_skb(skb);
+                       if (frags) {
+                               tfile->napi.skb = NULL;
+                               mutex_unlock(&tfile->napi_mutex);
+                       }
                        return -EFAULT;
                }
        }
        if (virtio_net_hdr_to_skb(skb, &gso, tun_is_little_endian(tun))) {
                this_cpu_inc(tun->pcpu_stats->rx_frame_errors);
                kfree_skb(skb);
+               if (frags) {
+                       tfile->napi.skb = NULL;
+                       mutex_unlock(&tfile->napi_mutex);
+               }
                return -EINVAL;
        }
  
                skb->dev = tun->dev;
                break;
        case IFF_TAP:
-               skb->protocol = eth_type_trans(skb, tun->dev);
+               if (!frags)
+                       skb->protocol = eth_type_trans(skb, tun->dev);
                break;
        }
  
        }
  
        rxhash = __skb_get_hash_symmetric(skb);
- #ifndef CONFIG_4KSTACKS
-       tun_rx_batched(tun, tfile, skb, more);
- #else
-       netif_rx_ni(skb);
- #endif
+       if (frags) {
+               /* Exercise flow dissector code path. */
+               u32 headlen = eth_get_headlen(skb->data, skb_headlen(skb));
+               if (unlikely(headlen > skb_headlen(skb))) {
+                       this_cpu_inc(tun->pcpu_stats->rx_dropped);
+                       napi_free_frags(&tfile->napi);
+                       mutex_unlock(&tfile->napi_mutex);
+                       WARN_ON(1);
+                       return -ENOMEM;
+               }
+               local_bh_disable();
+               napi_gro_frags(&tfile->napi);
+               local_bh_enable();
+               mutex_unlock(&tfile->napi_mutex);
+       } else if (tfile->napi_enabled) {
+               struct sk_buff_head *queue = &tfile->sk.sk_write_queue;
+               int queue_len;
+               spin_lock_bh(&queue->lock);
+               __skb_queue_tail(queue, skb);
+               queue_len = skb_queue_len(queue);
+               spin_unlock(&queue->lock);
+               if (!more || queue_len > NAPI_POLL_WEIGHT)
+                       napi_schedule(&tfile->napi);
+               local_bh_enable();
+       } else if (!IS_ENABLED(CONFIG_4KSTACKS)) {
+               tun_rx_batched(tun, tfile, skb, more);
+       } else {
+               netif_rx_ni(skb);
+       }
  
        stats = get_cpu_ptr(tun->pcpu_stats);
        u64_stats_update_begin(&stats->syncp);
  static ssize_t tun_chr_write_iter(struct kiocb *iocb, struct iov_iter *from)
  {
        struct file *file = iocb->ki_filp;
-       struct tun_struct *tun = tun_get(file);
        struct tun_file *tfile = file->private_data;
+       struct tun_struct *tun = tun_get(tfile);
        ssize_t result;
  
        if (!tun)
@@@ -1757,7 -1972,7 +1972,7 @@@ static ssize_t tun_chr_read_iter(struc
  {
        struct file *file = iocb->ki_filp;
        struct tun_file *tfile = file->private_data;
-       struct tun_struct *tun = __tun_get(tfile);
+       struct tun_struct *tun = tun_get(tfile);
        ssize_t len = iov_iter_count(to), ret;
  
        if (!tun)
@@@ -1834,7 -2049,7 +2049,7 @@@ static int tun_sendmsg(struct socket *s
  {
        int ret;
        struct tun_file *tfile = container_of(sock, struct tun_file, socket);
-       struct tun_struct *tun = __tun_get(tfile);
+       struct tun_struct *tun = tun_get(tfile);
  
        if (!tun)
                return -EBADFD;
@@@ -1850,7 -2065,7 +2065,7 @@@ static int tun_recvmsg(struct socket *s
                       int flags)
  {
        struct tun_file *tfile = container_of(sock, struct tun_file, socket);
-       struct tun_struct *tun = __tun_get(tfile);
+       struct tun_struct *tun = tun_get(tfile);
        int ret;
  
        if (!tun)
@@@ -1882,7 -2097,7 +2097,7 @@@ static int tun_peek_len(struct socket *
        struct tun_struct *tun;
        int ret = 0;
  
-       tun = __tun_get(tfile);
+       tun = tun_get(tfile);
        if (!tun)
                return 0;
  
@@@ -1962,6 -2177,15 +2177,15 @@@ static int tun_set_iff(struct net *net
        if (tfile->detached)
                return -EINVAL;
  
+       if ((ifr->ifr_flags & IFF_NAPI_FRAGS)) {
+               if (!capable(CAP_NET_ADMIN))
+                       return -EPERM;
+               if (!(ifr->ifr_flags & IFF_NAPI) ||
+                   (ifr->ifr_flags & TUN_TYPE_MASK) != IFF_TAP)
+                       return -EINVAL;
+       }
        dev = __dev_get_by_name(net, ifr->ifr_name);
        if (dev) {
                if (ifr->ifr_flags & IFF_TUN_EXCL)
                if (err < 0)
                        return err;
  
-               err = tun_attach(tun, file, ifr->ifr_flags & IFF_NOFILTER);
+               err = tun_attach(tun, file, ifr->ifr_flags & IFF_NOFILTER,
+                                ifr->ifr_flags & IFF_NAPI);
                if (err < 0)
                        return err;
  
                                       NETIF_F_HW_VLAN_STAG_TX);
  
                INIT_LIST_HEAD(&tun->disabled);
-               err = tun_attach(tun, file, false);
+               err = tun_attach(tun, file, false, ifr->ifr_flags & IFF_NAPI);
                if (err < 0)
                        goto err_free_flow;
  
@@@ -2222,7 -2447,7 +2447,7 @@@ static int tun_set_queue(struct file *f
                ret = security_tun_dev_attach_queue(tun->security);
                if (ret < 0)
                        goto unlock;
-               ret = tun_attach(tun, file, false);
+               ret = tun_attach(tun, file, false, tun->flags & IFF_NAPI);
        } else if (ifr->ifr_flags & IFF_DETACH_QUEUE) {
                tun = rtnl_dereference(tfile->tun);
                if (!tun || !(tun->flags & IFF_MULTI_QUEUE) || tfile->detached)
@@@ -2271,7 -2496,7 +2496,7 @@@ static long __tun_chr_ioctl(struct fil
        ret = 0;
        rtnl_lock();
  
-       tun = __tun_get(tfile);
+       tun = tun_get(tfile);
        if (cmd == TUNSETIFF) {
                ret = -EEXIST;
                if (tun)
@@@ -2622,15 -2847,16 +2847,16 @@@ static int tun_chr_close(struct inode *
  }
  
  #ifdef CONFIG_PROC_FS
- static void tun_chr_show_fdinfo(struct seq_file *m, struct file *f)
+ static void tun_chr_show_fdinfo(struct seq_file *m, struct file *file)
  {
+       struct tun_file *tfile = file->private_data;
        struct tun_struct *tun;
        struct ifreq ifr;
  
        memset(&ifr, 0, sizeof(ifr));
  
        rtnl_lock();
-       tun = tun_get(f);
+       tun = tun_get(tfile);
        if (tun)
                tun_get_iff(current->nsproxy->net_ns, tun, &ifr);
        rtnl_unlock();
diff --combined drivers/net/vxlan.c
index 3247d2feda07f8a671fe32aa45225cfbd34c50ff,c437707a8549ea02c38c2d4a6646470154e8760c..7ac487031b4bca89b13f6c6fa5312651e1901661
@@@ -1623,26 -1623,19 +1623,19 @@@ static struct sk_buff *vxlan_na_create(
  static int neigh_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
  {
        struct vxlan_dev *vxlan = netdev_priv(dev);
-       struct nd_msg *msg;
-       const struct ipv6hdr *iphdr;
        const struct in6_addr *daddr;
-       struct neighbour *n;
+       const struct ipv6hdr *iphdr;
        struct inet6_dev *in6_dev;
+       struct neighbour *n;
+       struct nd_msg *msg;
  
        in6_dev = __in6_dev_get(dev);
        if (!in6_dev)
                goto out;
  
-       if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + sizeof(struct nd_msg)))
-               goto out;
        iphdr = ipv6_hdr(skb);
        daddr = &iphdr->daddr;
        msg = (struct nd_msg *)(iphdr + 1);
-       if (msg->icmph.icmp6_code != 0 ||
-           msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION)
-               goto out;
  
        if (ipv6_addr_loopback(daddr) ||
            ipv6_addr_is_multicast(&msg->target))
@@@ -2240,11 -2233,11 +2233,11 @@@ tx_error
  static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
  {
        struct vxlan_dev *vxlan = netdev_priv(dev);
+       struct vxlan_rdst *rdst, *fdst = NULL;
        const struct ip_tunnel_info *info;
-       struct ethhdr *eth;
        bool did_rsc = false;
-       struct vxlan_rdst *rdst, *fdst = NULL;
        struct vxlan_fdb *f;
+       struct ethhdr *eth;
        __be32 vni = 0;
  
        info = skb_tunnel_info(skb);
                if (ntohs(eth->h_proto) == ETH_P_ARP)
                        return arp_reduce(dev, skb, vni);
  #if IS_ENABLED(CONFIG_IPV6)
-               else if (ntohs(eth->h_proto) == ETH_P_IPV6) {
-                       struct ipv6hdr *hdr, _hdr;
-                       if ((hdr = skb_header_pointer(skb,
-                                                     skb_network_offset(skb),
-                                                     sizeof(_hdr), &_hdr)) &&
-                           hdr->nexthdr == IPPROTO_ICMPV6)
+               else if (ntohs(eth->h_proto) == ETH_P_IPV6 &&
+                        pskb_may_pull(skb, sizeof(struct ipv6hdr) +
+                                           sizeof(struct nd_msg)) &&
+                        ipv6_hdr(skb)->nexthdr == IPPROTO_ICMPV6) {
+                       struct nd_msg *m = (struct nd_msg *)(ipv6_hdr(skb) + 1);
+                       if (m->icmph.icmp6_code == 0 &&
+                           m->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION)
                                return neigh_reduce(dev, skb, vni);
                }
  #endif
  }
  
  /* Walk the forwarding table and purge stale entries */
 -static void vxlan_cleanup(unsigned long arg)
 +static void vxlan_cleanup(struct timer_list *t)
  {
 -      struct vxlan_dev *vxlan = (struct vxlan_dev *) arg;
 +      struct vxlan_dev *vxlan = from_timer(vxlan, t, age_timer);
        unsigned long next_timer = jiffies + FDB_AGE_INTERVAL;
        unsigned int h;
  
@@@ -2647,7 -2642,9 +2642,7 @@@ static void vxlan_setup(struct net_devi
        INIT_LIST_HEAD(&vxlan->next);
        spin_lock_init(&vxlan->hash_lock);
  
 -      init_timer_deferrable(&vxlan->age_timer);
 -      vxlan->age_timer.function = vxlan_cleanup;
 -      vxlan->age_timer.data = (unsigned long) vxlan;
 +      timer_setup(&vxlan->age_timer, vxlan_cleanup, TIMER_DEFERRABLE);
  
        vxlan->dev = dev;
  
@@@ -3702,6 -3699,7 +3697,7 @@@ static void __net_exit vxlan_exit_net(s
        struct vxlan_net *vn = net_generic(net, vxlan_net_id);
        struct vxlan_dev *vxlan, *next;
        struct net_device *dev, *aux;
+       unsigned int h;
        LIST_HEAD(list);
  
        rtnl_lock();
  
        unregister_netdevice_many(&list);
        rtnl_unlock();
+       for (h = 0; h < PORT_HASH_SIZE; ++h)
+               WARN_ON_ONCE(!hlist_empty(&vn->sock_list[h]));
  }
  
  static struct pernet_operations vxlan_net_ops = {
index 785a0f33b7e66ece24efc4f5b9e97904a2b07957,b2256aa76eb6af2780eaf3b7bd5864262f40806f..e3495ea95553fb2d2056421d5ff0845b8f26e83a
@@@ -260,10 -260,11 +260,11 @@@ struct rte_console 
  #define I_HMB_HOST_INT        I_HMB_SW3       /* Miscellaneous Interrupt */
  
  /* tohostmailboxdata */
- #define HMB_DATA_NAKHANDLED   1       /* retransmit NAK'd frame */
- #define HMB_DATA_DEVREADY     2       /* talk to host after enable */
- #define HMB_DATA_FC           4       /* per prio flowcontrol update flag */
- #define HMB_DATA_FWREADY      8       /* fw ready for protocol activity */
+ #define HMB_DATA_NAKHANDLED   0x0001  /* retransmit NAK'd frame */
+ #define HMB_DATA_DEVREADY     0x0002  /* talk to host after enable */
+ #define HMB_DATA_FC           0x0004  /* per prio flowcontrol update flag */
+ #define HMB_DATA_FWREADY      0x0008  /* fw ready for protocol activity */
+ #define HMB_DATA_FWHALT               0x0010  /* firmware halted */
  
  #define HMB_DATA_FCDATA_MASK  0xff000000
  #define HMB_DATA_FCDATA_SHIFT 24
@@@ -1094,6 -1095,10 +1095,10 @@@ static u32 brcmf_sdio_hostmail(struct b
                          offsetof(struct sdpcmd_regs, tosbmailbox));
        bus->sdcnt.f1regdata += 2;
  
+       /* dongle indicates the firmware has halted/crashed */
+       if (hmb_data & HMB_DATA_FWHALT)
+               brcmf_err("mailbox indicates firmware halted\n");
        /* Dongle recomposed rx frames, accept them again */
        if (hmb_data & HMB_DATA_NAKHANDLED) {
                brcmf_dbg(SDIO, "Dongle reports NAK handled, expect rtx of %d\n",
                         HMB_DATA_NAKHANDLED |
                         HMB_DATA_FC |
                         HMB_DATA_FWREADY |
+                        HMB_DATA_FWHALT |
                         HMB_DATA_FCDATA_MASK | HMB_DATA_VERSION_MASK))
                brcmf_err("Unknown mailbox data content: 0x%02x\n",
                          hmb_data);
@@@ -3628,7 -3634,7 +3634,7 @@@ static void brcmf_sdio_dataworker(struc
  
        bus->dpc_running = true;
        wmb();
 -      while (ACCESS_ONCE(bus->dpc_triggered)) {
 +      while (READ_ONCE(bus->dpc_triggered)) {
                bus->dpc_triggered = false;
                brcmf_sdio_dpc(bus);
                bus->idlecount = 0;
@@@ -3979,6 -3985,24 +3985,24 @@@ brcmf_sdio_watchdog(unsigned long data
        }
  }
  
+ static int brcmf_sdio_get_fwname(struct device *dev, u32 chip, u32 chiprev,
+                                u8 *fw_name)
+ {
+       struct brcmf_bus *bus_if = dev_get_drvdata(dev);
+       struct brcmf_sdio_dev *sdiodev = bus_if->bus_priv.sdio;
+       int ret = 0;
+       if (sdiodev->fw_name[0] != '\0')
+               strlcpy(fw_name, sdiodev->fw_name, BRCMF_FW_NAME_LEN);
+       else
+               ret = brcmf_fw_map_chip_to_name(chip, chiprev,
+                                               brcmf_sdio_fwnames,
+                                               ARRAY_SIZE(brcmf_sdio_fwnames),
+                                               fw_name, NULL);
+       return ret;
+ }
  static const struct brcmf_bus_ops brcmf_sdio_bus_ops = {
        .stop = brcmf_sdio_bus_stop,
        .preinit = brcmf_sdio_bus_preinit,
        .wowl_config = brcmf_sdio_wowl_config,
        .get_ramsize = brcmf_sdio_bus_get_ramsize,
        .get_memdump = brcmf_sdio_bus_get_memdump,
+       .get_fwname = brcmf_sdio_get_fwname,
  };
  
  static void brcmf_sdio_firmware_callback(struct device *dev, int err,
@@@ -4144,10 -4169,8 +4169,8 @@@ struct brcmf_sdio *brcmf_sdio_probe(str
        init_waitqueue_head(&bus->dcmd_resp_wait);
  
        /* Set up the watchdog timer */
-       init_timer(&bus->timer);
-       bus->timer.data = (unsigned long)bus;
-       bus->timer.function = brcmf_sdio_watchdog;
+       setup_timer(&bus->timer, brcmf_sdio_watchdog,
+                   (unsigned long)bus);
        /* Initialize watchdog thread */
        init_completion(&bus->watchdog_wait);
        bus->watchdog_tsk = kthread_run(brcmf_sdio_watchdog_thread,
index 0f45f34e39d3d549b78743d4683ef715296c7918,ce718e9c63ec17d66ab6315196f0eaef1471fa47..7078b7e458be84d59e691e88bf791bce9ca9154d
@@@ -86,6 -86,7 +86,7 @@@
  #include "time-event.h"
  #include "fw-api.h"
  #include "fw/api/scan.h"
+ #include "fw/acpi.h"
  
  #define DRV_DESCRIPTION       "The new Intel(R) wireless AGN driver for Linux"
  MODULE_DESCRIPTION(DRV_DESCRIPTION);
@@@ -423,8 -424,6 +424,6 @@@ static const struct iwl_hcmd_names iwl_
   * Access is done through binary search
   */
  static const struct iwl_hcmd_names iwl_mvm_mac_conf_names[] = {
-       HCMD_NAME(LINK_QUALITY_MEASUREMENT_CMD),
-       HCMD_NAME(LINK_QUALITY_MEASUREMENT_COMPLETE_NOTIF),
        HCMD_NAME(CHANNEL_SWITCH_NOA_NOTIF),
  };
  
@@@ -490,18 -489,21 +489,21 @@@ static const struct iwl_hcmd_arr iwl_mv
  static void iwl_mvm_async_handlers_wk(struct work_struct *wk);
  static void iwl_mvm_d0i3_exit_work(struct work_struct *wk);
  
- static u32 calc_min_backoff(struct iwl_trans *trans, const struct iwl_cfg *cfg)
+ static u32 iwl_mvm_min_backoff(struct iwl_mvm *mvm)
  {
-       const struct iwl_pwr_tx_backoff *pwr_tx_backoff = cfg->pwr_tx_backoffs;
+       const struct iwl_pwr_tx_backoff *backoff = mvm->cfg->pwr_tx_backoffs;
+       u64 dflt_pwr_limit;
  
-       if (!pwr_tx_backoff)
+       if (!backoff)
                return 0;
  
-       while (pwr_tx_backoff->pwr) {
-               if (trans->dflt_pwr_limit >= pwr_tx_backoff->pwr)
-                       return pwr_tx_backoff->backoff;
+       dflt_pwr_limit = iwl_acpi_get_pwr_limit(mvm->dev);
  
-               pwr_tx_backoff++;
+       while (backoff->pwr) {
+               if (dflt_pwr_limit >= backoff->pwr)
+                       return backoff->backoff;
+               backoff++;
        }
  
        return 0;
@@@ -701,7 -703,6 +703,6 @@@ iwl_op_mode_mvm_start(struct iwl_trans 
        trans_cfg.cb_data_offs = offsetof(struct ieee80211_tx_info,
                                          driver_data[2]);
  
-       trans_cfg.sdio_adma_addr = fw->sdio_adma_addr;
        trans_cfg.sw_csum_tx = IWL_MVM_SW_TX_CSUM_OFFLOAD;
  
        /* Set a short watchdog for the command queue */
                goto out_free;
        mvm->hw_registered = true;
  
-       min_backoff = calc_min_backoff(trans, cfg);
+       min_backoff = iwl_mvm_min_backoff(mvm);
        iwl_mvm_thermal_initialize(mvm, min_backoff);
  
        err = iwl_mvm_dbgfs_register(mvm, dbgfs_dir);
@@@ -1118,7 -1119,7 +1119,7 @@@ void iwl_mvm_set_hw_ctkill_state(struc
  static bool iwl_mvm_set_hw_rfkill_state(struct iwl_op_mode *op_mode, bool state)
  {
        struct iwl_mvm *mvm = IWL_OP_MODE_GET_MVM(op_mode);
 -      bool calibrating = ACCESS_ONCE(mvm->calibrating);
 +      bool calibrating = READ_ONCE(mvm->calibrating);
  
        if (state)
                set_bit(IWL_MVM_STATUS_HW_RFKILL, &mvm->status);
index 6e9d3289b9d0ef68ccb40603f8601b3552abeab9,d88c3685a6ddc61e13cfa14c58d66919abed4f55..593b7f97b29c103f8faf28dd905dd36aed34e763
@@@ -652,7 -652,7 +652,7 @@@ int iwl_mvm_tx_skb_non_sta(struct iwl_m
                                return -1;
                } else if (info.control.vif->type == NL80211_IFTYPE_STATION &&
                           is_multicast_ether_addr(hdr->addr1)) {
 -                      u8 ap_sta_id = ACCESS_ONCE(mvmvif->ap_sta_id);
 +                      u8 ap_sta_id = READ_ONCE(mvmvif->ap_sta_id);
  
                        if (ap_sta_id != IWL_MVM_INVALID_STA)
                                sta_id = ap_sta_id;
@@@ -700,7 -700,7 +700,7 @@@ static int iwl_mvm_tx_tso(struct iwl_mv
        snap_ip_tcp = 8 + skb_transport_header(skb) - skb_network_header(skb) +
                tcp_hdrlen(skb);
  
 -      dbg_max_amsdu_len = ACCESS_ONCE(mvm->max_amsdu_len);
 +      dbg_max_amsdu_len = READ_ONCE(mvm->max_amsdu_len);
  
        if (!sta->max_amsdu_len ||
            !ieee80211_is_data_qos(hdr->frame_control) ||
@@@ -1594,8 -1594,7 +1594,7 @@@ static void iwl_mvm_rx_tx_cmd_agg(struc
                mvmsta->tid_data[tid].tx_time =
                        le16_to_cpu(tx_resp->wireless_media_time);
                mvmsta->tid_data[tid].lq_color =
-                       (tx_resp->tlc_info & TX_RES_RATE_TABLE_COLOR_MSK) >>
-                       TX_RES_RATE_TABLE_COLOR_POS;
+                       TX_RES_RATE_TABLE_COL_GET(tx_resp->tlc_info);
        }
  
        rcu_read_unlock();
@@@ -1746,6 -1745,7 +1745,7 @@@ void iwl_mvm_rx_ba_notif(struct iwl_mv
        if (iwl_mvm_has_new_tx_api(mvm)) {
                struct iwl_mvm_compressed_ba_notif *ba_res =
                        (void *)pkt->data;
+               u8 lq_color = TX_RES_RATE_TABLE_COL_GET(ba_res->tlc_rate_info);
                int i;
  
                sta_id = ba_res->sta_id;
                if (!le16_to_cpu(ba_res->tfd_cnt))
                        goto out;
  
+               rcu_read_lock();
+               mvmsta = iwl_mvm_sta_from_staid_rcu(mvm, sta_id);
+               if (!mvmsta)
+                       goto out_unlock;
                /* Free per TID */
                for (i = 0; i < le16_to_cpu(ba_res->tfd_cnt); i++) {
                        struct iwl_mvm_compressed_ba_tfd *ba_tfd =
                                &ba_res->tfd[i];
  
+                       mvmsta->tid_data[i].lq_color = lq_color;
                        iwl_mvm_tx_reclaim(mvm, sta_id, ba_tfd->tid,
                                           (int)(le16_to_cpu(ba_tfd->q_num)),
                                           le16_to_cpu(ba_tfd->tfd_index),
                                           le32_to_cpu(ba_res->tx_rate));
                }
  
+ out_unlock:
+               rcu_read_unlock();
  out:
                IWL_DEBUG_TX_REPLY(mvm,
                                   "BA_NOTIFICATION Received from sta_id = %d, flags %x, sent:%d, acked:%d\n",
index 9ad3f4fe589417ed752e397554575fdc4553fb0e,8d992d5ba0644935f31497d6c4aa92af1f852682..b7a51603465b20752616639cd3f663a6844dcdd8
  #define IWL_FW_MEM_EXTENDED_START     0x40000
  #define IWL_FW_MEM_EXTENDED_END               0x57FFF
  
+ static void iwl_trans_pcie_dump_regs(struct iwl_trans *trans)
+ {
+ #define PCI_DUMP_SIZE 64
+ #define PREFIX_LEN    32
+       struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+       struct pci_dev *pdev = trans_pcie->pci_dev;
+       u32 i, pos, alloc_size, *ptr, *buf;
+       char *prefix;
+       if (trans_pcie->pcie_dbg_dumped_once)
+               return;
+       /* Should be a multiple of 4 */
+       BUILD_BUG_ON(PCI_DUMP_SIZE > 4096 || PCI_DUMP_SIZE & 0x3);
+       /* Alloc a max size buffer */
+       if (PCI_ERR_ROOT_ERR_SRC +  4 > PCI_DUMP_SIZE)
+               alloc_size = PCI_ERR_ROOT_ERR_SRC +  4 + PREFIX_LEN;
+       else
+               alloc_size = PCI_DUMP_SIZE + PREFIX_LEN;
+       buf = kmalloc(alloc_size, GFP_ATOMIC);
+       if (!buf)
+               return;
+       prefix = (char *)buf + alloc_size - PREFIX_LEN;
+       IWL_ERR(trans, "iwlwifi transaction failed, dumping registers\n");
+       /* Print wifi device registers */
+       sprintf(prefix, "iwlwifi %s: ", pci_name(pdev));
+       IWL_ERR(trans, "iwlwifi device config registers:\n");
+       for (i = 0, ptr = buf; i < PCI_DUMP_SIZE; i += 4, ptr++)
+               if (pci_read_config_dword(pdev, i, ptr))
+                       goto err_read;
+       print_hex_dump(KERN_ERR, prefix, DUMP_PREFIX_OFFSET, 32, 4, buf, i, 0);
+       IWL_ERR(trans, "iwlwifi device memory mapped registers:\n");
+       for (i = 0, ptr = buf; i < PCI_DUMP_SIZE; i += 4, ptr++)
+               *ptr = iwl_read32(trans, i);
+       print_hex_dump(KERN_ERR, prefix, DUMP_PREFIX_OFFSET, 32, 4, buf, i, 0);
+       pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR);
+       if (pos) {
+               IWL_ERR(trans, "iwlwifi device AER capability structure:\n");
+               for (i = 0, ptr = buf; i < PCI_ERR_ROOT_COMMAND; i += 4, ptr++)
+                       if (pci_read_config_dword(pdev, pos + i, ptr))
+                               goto err_read;
+               print_hex_dump(KERN_ERR, prefix, DUMP_PREFIX_OFFSET,
+                              32, 4, buf, i, 0);
+       }
+       /* Print parent device registers next */
+       if (!pdev->bus->self)
+               goto out;
+       pdev = pdev->bus->self;
+       sprintf(prefix, "iwlwifi %s: ", pci_name(pdev));
+       IWL_ERR(trans, "iwlwifi parent port (%s) config registers:\n",
+               pci_name(pdev));
+       for (i = 0, ptr = buf; i < PCI_DUMP_SIZE; i += 4, ptr++)
+               if (pci_read_config_dword(pdev, i, ptr))
+                       goto err_read;
+       print_hex_dump(KERN_ERR, prefix, DUMP_PREFIX_OFFSET, 32, 4, buf, i, 0);
+       /* Print root port AER registers */
+       pos = 0;
+       pdev = pcie_find_root_port(pdev);
+       if (pdev)
+               pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR);
+       if (pos) {
+               IWL_ERR(trans, "iwlwifi root port (%s) AER cap structure:\n",
+                       pci_name(pdev));
+               sprintf(prefix, "iwlwifi %s: ", pci_name(pdev));
+               for (i = 0, ptr = buf; i <= PCI_ERR_ROOT_ERR_SRC; i += 4, ptr++)
+                       if (pci_read_config_dword(pdev, pos + i, ptr))
+                               goto err_read;
+               print_hex_dump(KERN_ERR, prefix, DUMP_PREFIX_OFFSET, 32,
+                              4, buf, i, 0);
+       }
+ err_read:
+       print_hex_dump(KERN_ERR, prefix, DUMP_PREFIX_OFFSET, 32, 4, buf, i, 0);
+       IWL_ERR(trans, "Read failed at 0x%X\n", i);
+ out:
+       trans_pcie->pcie_dbg_dumped_once = 1;
+       kfree(buf);
+ }
  static void iwl_pcie_free_fw_monitor(struct iwl_trans *trans)
  {
        struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
@@@ -649,6 -736,7 +736,7 @@@ static int iwl_pcie_load_firmware_chunk
                                 trans_pcie->ucode_write_complete, 5 * HZ);
        if (!ret) {
                IWL_ERR(trans, "Failed to load firmware chunk!\n");
+               iwl_trans_pcie_dump_regs(trans);
                return -ETIMEDOUT;
        }
  
@@@ -1868,6 -1956,7 +1956,7 @@@ static bool iwl_trans_pcie_grab_nic_acc
                           (CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY |
                            CSR_GP_CNTRL_REG_FLAG_GOING_TO_SLEEP), 15000);
        if (unlikely(ret < 0)) {
+               iwl_trans_pcie_dump_regs(trans);
                iwl_write32(trans, CSR_RESET, CSR_RESET_REG_FLAG_FORCE_NMI);
                WARN_ONCE(1,
                          "Timeout waiting for hardware access (CSR_GP_CNTRL 0x%08x)\n",
@@@ -2076,12 -2165,12 +2165,12 @@@ static int iwl_trans_pcie_wait_txq_empt
  
        IWL_DEBUG_TX_QUEUES(trans, "Emptying queue %d...\n", txq_idx);
        txq = trans_pcie->txq[txq_idx];
 -      wr_ptr = ACCESS_ONCE(txq->write_ptr);
 +      wr_ptr = READ_ONCE(txq->write_ptr);
  
 -      while (txq->read_ptr != ACCESS_ONCE(txq->write_ptr) &&
 +      while (txq->read_ptr != READ_ONCE(txq->write_ptr) &&
               !time_after(jiffies,
                           now + msecs_to_jiffies(IWL_FLUSH_WAIT_MS))) {
 -              u8 write_ptr = ACCESS_ONCE(txq->write_ptr);
 +              u8 write_ptr = READ_ONCE(txq->write_ptr);
  
                if (WARN_ONCE(wr_ptr != write_ptr,
                              "WR pointer moved while flushing %d -> %d\n",
@@@ -2553,7 -2642,7 +2642,7 @@@ static u32 iwl_trans_pcie_dump_rbs(stru
  
        spin_lock(&rxq->lock);
  
 -      r = le16_to_cpu(ACCESS_ONCE(rxq->rb_stts->closed_rb_num)) & 0x0FFF;
 +      r = le16_to_cpu(READ_ONCE(rxq->rb_stts->closed_rb_num)) & 0x0FFF;
  
        for (i = rxq->read, j = 0;
             i != r && j < allocated_rb_nums;
@@@ -2814,7 -2903,7 +2903,7 @@@ static struct iwl_trans_dump_dat
                /* Dump RBs is supported only for pre-9000 devices (1 queue) */
                struct iwl_rxq *rxq = &trans_pcie->rxq[0];
                /* RBs */
 -              num_rbs = le16_to_cpu(ACCESS_ONCE(rxq->rb_stts->closed_rb_num))
 +              num_rbs = le16_to_cpu(READ_ONCE(rxq->rb_stts->closed_rb_num))
                                      & 0x0FFF;
                num_rbs = (num_rbs - rxq->read) & RX_QUEUE_MASK;
                len += num_rbs * (sizeof(*data) +
@@@ -2932,6 -3021,7 +3021,7 @@@ static void iwl_trans_pcie_resume(struc
        .ref = iwl_trans_pcie_ref,                                      \
        .unref = iwl_trans_pcie_unref,                                  \
        .dump_data = iwl_trans_pcie_dump_data,                          \
+       .dump_regs = iwl_trans_pcie_dump_regs,                          \
        .d3_suspend = iwl_trans_pcie_d3_suspend,                        \
        .d3_resume = iwl_trans_pcie_d3_resume
  
index d2b3d6177a556c39530a88666afebf01d996507a,ec2f4c31425a13d633a5a532fa1c51e2918505b1..07a49f58070aa061b763c8723bab99d335a9fab7
@@@ -396,7 -396,7 +396,7 @@@ static int mac80211_hwsim_vendor_cmd_te
        if (!tb[QCA_WLAN_VENDOR_ATTR_TEST])
                return -EINVAL;
        val = nla_get_u32(tb[QCA_WLAN_VENDOR_ATTR_TEST]);
-       wiphy_debug(wiphy, "%s: test=%u\n", __func__, val);
+       wiphy_dbg(wiphy, "%s: test=%u\n", __func__, val);
  
        /* Send a vendor event as a test. Note that this would not normally be
         * done within a command handler, but rather, based on some other
@@@ -643,9 -643,9 +643,9 @@@ static void hwsim_send_ps_poll(void *da
        if (!vp->assoc)
                return;
  
-       wiphy_debug(data->hw->wiphy,
-                   "%s: send PS-Poll to %pM for aid %d\n",
-                   __func__, vp->bssid, vp->aid);
+       wiphy_dbg(data->hw->wiphy,
+                 "%s: send PS-Poll to %pM for aid %d\n",
+                 __func__, vp->bssid, vp->aid);
  
        skb = dev_alloc_skb(sizeof(*pspoll));
        if (!skb)
@@@ -674,9 -674,9 +674,9 @@@ static void hwsim_send_nullfunc(struct 
        if (!vp->assoc)
                return;
  
-       wiphy_debug(data->hw->wiphy,
-                   "%s: send data::nullfunc to %pM ps=%d\n",
-                   __func__, vp->bssid, ps);
+       wiphy_dbg(data->hw->wiphy,
+                 "%s: send data::nullfunc to %pM ps=%d\n",
+                 __func__, vp->bssid, ps);
  
        skb = dev_alloc_skb(sizeof(*hdr));
        if (!skb)
@@@ -1034,7 -1034,7 +1034,7 @@@ static void mac80211_hwsim_tx_frame_nl(
        msg_head = genlmsg_put(skb, 0, 0, &hwsim_genl_family, 0,
                               HWSIM_CMD_FRAME);
        if (msg_head == NULL) {
-               printk(KERN_DEBUG "mac80211_hwsim: problem with msg_head\n");
+               pr_debug("mac80211_hwsim: problem with msg_head\n");
                goto nla_put_failure;
        }
  
  nla_put_failure:
        nlmsg_free(skb);
  err_free_txskb:
-       printk(KERN_DEBUG "mac80211_hwsim: error occurred in %s\n", __func__);
+       pr_debug("mac80211_hwsim: error occurred in %s\n", __func__);
        ieee80211_free_txskb(hw, my_skb);
        data->tx_failed++;
  }
@@@ -1347,7 -1347,7 +1347,7 @@@ static void mac80211_hwsim_tx(struct ie
        }
  
        if (data->idle && !data->tmp_chan) {
-               wiphy_debug(hw->wiphy, "Trying to TX when idle - reject\n");
+               wiphy_dbg(hw->wiphy, "Trying to TX when idle - reject\n");
                ieee80211_free_txskb(hw, skb);
                return;
        }
        mac80211_hwsim_monitor_rx(hw, skb, channel);
  
        /* wmediumd mode check */
 -      _portid = ACCESS_ONCE(data->wmediumd);
 +      _portid = READ_ONCE(data->wmediumd);
  
        if (_portid)
                return mac80211_hwsim_tx_frame_nl(hw, skb, _portid);
  static int mac80211_hwsim_start(struct ieee80211_hw *hw)
  {
        struct mac80211_hwsim_data *data = hw->priv;
-       wiphy_debug(hw->wiphy, "%s\n", __func__);
+       wiphy_dbg(hw->wiphy, "%s\n", __func__);
        data->started = true;
        return 0;
  }
@@@ -1419,16 -1419,16 +1419,16 @@@ static void mac80211_hwsim_stop(struct 
        struct mac80211_hwsim_data *data = hw->priv;
        data->started = false;
        tasklet_hrtimer_cancel(&data->beacon_timer);
-       wiphy_debug(hw->wiphy, "%s\n", __func__);
+       wiphy_dbg(hw->wiphy, "%s\n", __func__);
  }
  
  
  static int mac80211_hwsim_add_interface(struct ieee80211_hw *hw,
                                        struct ieee80211_vif *vif)
  {
-       wiphy_debug(hw->wiphy, "%s (type=%d mac_addr=%pM)\n",
-                   __func__, ieee80211_vif_type_p2p(vif),
-                   vif->addr);
+       wiphy_dbg(hw->wiphy, "%s (type=%d mac_addr=%pM)\n",
+                 __func__, ieee80211_vif_type_p2p(vif),
+                 vif->addr);
        hwsim_set_magic(vif);
  
        vif->cab_queue = 0;
@@@ -1447,9 -1447,9 +1447,9 @@@ static int mac80211_hwsim_change_interf
                                           bool newp2p)
  {
        newtype = ieee80211_iftype_p2p(newtype, newp2p);
-       wiphy_debug(hw->wiphy,
-                   "%s (old type=%d, new type=%d, mac_addr=%pM)\n",
-                   __func__, ieee80211_vif_type_p2p(vif),
+       wiphy_dbg(hw->wiphy,
+                 "%s (old type=%d, new type=%d, mac_addr=%pM)\n",
+                 __func__, ieee80211_vif_type_p2p(vif),
                    newtype, vif->addr);
        hwsim_check_magic(vif);
  
  static void mac80211_hwsim_remove_interface(
        struct ieee80211_hw *hw, struct ieee80211_vif *vif)
  {
-       wiphy_debug(hw->wiphy, "%s (type=%d mac_addr=%pM)\n",
-                   __func__, ieee80211_vif_type_p2p(vif),
-                   vif->addr);
+       wiphy_dbg(hw->wiphy, "%s (type=%d mac_addr=%pM)\n",
+                 __func__, ieee80211_vif_type_p2p(vif),
+                 vif->addr);
        hwsim_check_magic(vif);
        hwsim_clear_magic(vif);
  }
@@@ -1477,7 -1477,7 +1477,7 @@@ static void mac80211_hwsim_tx_frame(str
                                    struct ieee80211_channel *chan)
  {
        struct mac80211_hwsim_data *data = hw->priv;
 -      u32 _pid = ACCESS_ONCE(data->wmediumd);
 +      u32 _pid = READ_ONCE(data->wmediumd);
  
        if (ieee80211_hw_check(hw, SUPPORTS_RC_TABLE)) {
                struct ieee80211_tx_info *txi = IEEE80211_SKB_CB(skb);
@@@ -1589,23 -1589,23 +1589,23 @@@ static int mac80211_hwsim_config(struc
        int idx;
  
        if (conf->chandef.chan)
-               wiphy_debug(hw->wiphy,
-                           "%s (freq=%d(%d - %d)/%s idle=%d ps=%d smps=%s)\n",
-                           __func__,
-                           conf->chandef.chan->center_freq,
-                           conf->chandef.center_freq1,
-                           conf->chandef.center_freq2,
-                           hwsim_chanwidths[conf->chandef.width],
-                           !!(conf->flags & IEEE80211_CONF_IDLE),
-                           !!(conf->flags & IEEE80211_CONF_PS),
-                           smps_modes[conf->smps_mode]);
+               wiphy_dbg(hw->wiphy,
+                         "%s (freq=%d(%d - %d)/%s idle=%d ps=%d smps=%s)\n",
+                         __func__,
+                         conf->chandef.chan->center_freq,
+                         conf->chandef.center_freq1,
+                         conf->chandef.center_freq2,
+                         hwsim_chanwidths[conf->chandef.width],
+                         !!(conf->flags & IEEE80211_CONF_IDLE),
+                         !!(conf->flags & IEEE80211_CONF_PS),
+                         smps_modes[conf->smps_mode]);
        else
-               wiphy_debug(hw->wiphy,
-                           "%s (freq=0 idle=%d ps=%d smps=%s)\n",
-                           __func__,
-                           !!(conf->flags & IEEE80211_CONF_IDLE),
-                           !!(conf->flags & IEEE80211_CONF_PS),
-                           smps_modes[conf->smps_mode]);
+               wiphy_dbg(hw->wiphy,
+                         "%s (freq=0 idle=%d ps=%d smps=%s)\n",
+                         __func__,
+                         !!(conf->flags & IEEE80211_CONF_IDLE),
+                         !!(conf->flags & IEEE80211_CONF_PS),
+                         smps_modes[conf->smps_mode]);
  
        data->idle = !!(conf->flags & IEEE80211_CONF_IDLE);
  
@@@ -1659,7 -1659,7 +1659,7 @@@ static void mac80211_hwsim_configure_fi
  {
        struct mac80211_hwsim_data *data = hw->priv;
  
-       wiphy_debug(hw->wiphy, "%s\n", __func__);
+       wiphy_dbg(hw->wiphy, "%s\n", __func__);
  
        data->rx_filter = 0;
        if (*total_flags & FIF_ALLMULTI)
@@@ -1688,25 -1688,25 +1688,25 @@@ static void mac80211_hwsim_bss_info_cha
  
        hwsim_check_magic(vif);
  
-       wiphy_debug(hw->wiphy, "%s(changed=0x%x vif->addr=%pM)\n",
-                   __func__, changed, vif->addr);
+       wiphy_dbg(hw->wiphy, "%s(changed=0x%x vif->addr=%pM)\n",
+                 __func__, changed, vif->addr);
  
        if (changed & BSS_CHANGED_BSSID) {
-               wiphy_debug(hw->wiphy, "%s: BSSID changed: %pM\n",
-                           __func__, info->bssid);
+               wiphy_dbg(hw->wiphy, "%s: BSSID changed: %pM\n",
+                         __func__, info->bssid);
                memcpy(vp->bssid, info->bssid, ETH_ALEN);
        }
  
        if (changed & BSS_CHANGED_ASSOC) {
-               wiphy_debug(hw->wiphy, "  ASSOC: assoc=%d aid=%d\n",
-                           info->assoc, info->aid);
+               wiphy_dbg(hw->wiphy, "  ASSOC: assoc=%d aid=%d\n",
+                         info->assoc, info->aid);
                vp->assoc = info->assoc;
                vp->aid = info->aid;
        }
  
        if (changed & BSS_CHANGED_BEACON_ENABLED) {
-               wiphy_debug(hw->wiphy, "  BCN EN: %d (BI=%u)\n",
-                           info->enable_beacon, info->beacon_int);
+               wiphy_dbg(hw->wiphy, "  BCN EN: %d (BI=%u)\n",
+                         info->enable_beacon, info->beacon_int);
                vp->bcn_en = info->enable_beacon;
                if (data->started &&
                    !hrtimer_is_queued(&data->beacon_timer.timer) &&
                        ieee80211_iterate_active_interfaces_atomic(
                                data->hw, IEEE80211_IFACE_ITER_NORMAL,
                                mac80211_hwsim_bcn_en_iter, &count);
-                       wiphy_debug(hw->wiphy, "  beaconing vifs remaining: %u",
-                                   count);
+                       wiphy_dbg(hw->wiphy, "  beaconing vifs remaining: %u",
+                                 count);
                        if (count == 0) {
                                tasklet_hrtimer_cancel(&data->beacon_timer);
                                data->beacon_int = 0;
        }
  
        if (changed & BSS_CHANGED_ERP_CTS_PROT) {
-               wiphy_debug(hw->wiphy, "  ERP_CTS_PROT: %d\n",
-                           info->use_cts_prot);
+               wiphy_dbg(hw->wiphy, "  ERP_CTS_PROT: %d\n",
+                         info->use_cts_prot);
        }
  
        if (changed & BSS_CHANGED_ERP_PREAMBLE) {
-               wiphy_debug(hw->wiphy, "  ERP_PREAMBLE: %d\n",
-                           info->use_short_preamble);
+               wiphy_dbg(hw->wiphy, "  ERP_PREAMBLE: %d\n",
+                         info->use_short_preamble);
        }
  
        if (changed & BSS_CHANGED_ERP_SLOT) {
-               wiphy_debug(hw->wiphy, "  ERP_SLOT: %d\n", info->use_short_slot);
+               wiphy_dbg(hw->wiphy, "  ERP_SLOT: %d\n", info->use_short_slot);
        }
  
        if (changed & BSS_CHANGED_HT) {
-               wiphy_debug(hw->wiphy, "  HT: op_mode=0x%x\n",
-                           info->ht_operation_mode);
+               wiphy_dbg(hw->wiphy, "  HT: op_mode=0x%x\n",
+                         info->ht_operation_mode);
        }
  
        if (changed & BSS_CHANGED_BASIC_RATES) {
-               wiphy_debug(hw->wiphy, "  BASIC_RATES: 0x%llx\n",
-                           (unsigned long long) info->basic_rates);
+               wiphy_dbg(hw->wiphy, "  BASIC_RATES: 0x%llx\n",
+                         (unsigned long long) info->basic_rates);
        }
  
        if (changed & BSS_CHANGED_TXPOWER)
-               wiphy_debug(hw->wiphy, "  TX Power: %d dBm\n", info->txpower);
+               wiphy_dbg(hw->wiphy, "  TX Power: %d dBm\n", info->txpower);
  }
  
  static int mac80211_hwsim_sta_add(struct ieee80211_hw *hw,
@@@ -1813,11 -1813,11 +1813,11 @@@ static int mac80211_hwsim_conf_tx
        struct ieee80211_vif *vif, u16 queue,
        const struct ieee80211_tx_queue_params *params)
  {
-       wiphy_debug(hw->wiphy,
-                   "%s (queue=%d txop=%d cw_min=%d cw_max=%d aifs=%d)\n",
-                   __func__, queue,
-                   params->txop, params->cw_min,
-                   params->cw_max, params->aifs);
+       wiphy_dbg(hw->wiphy,
+                 "%s (queue=%d txop=%d cw_min=%d cw_max=%d aifs=%d)\n",
+                 __func__, queue,
+                 params->txop, params->cw_min,
+                 params->cw_max, params->aifs);
        return 0;
  }
  
@@@ -1981,7 -1981,7 +1981,7 @@@ static void hw_scan_work(struct work_st
                        .aborted = false,
                };
  
-               wiphy_debug(hwsim->hw->wiphy, "hw scan complete\n");
+               wiphy_dbg(hwsim->hw->wiphy, "hw scan complete\n");
                ieee80211_scan_completed(hwsim->hw, &info);
                hwsim->hw_scan_request = NULL;
                hwsim->hw_scan_vif = NULL;
                return;
        }
  
-       wiphy_debug(hwsim->hw->wiphy, "hw scan %d MHz\n",
-                   req->channels[hwsim->scan_chan_idx]->center_freq);
+       wiphy_dbg(hwsim->hw->wiphy, "hw scan %d MHz\n",
+                 req->channels[hwsim->scan_chan_idx]->center_freq);
  
        hwsim->tmp_chan = req->channels[hwsim->scan_chan_idx];
        if (hwsim->tmp_chan->flags & (IEEE80211_CHAN_NO_IR |
@@@ -2060,7 -2060,7 +2060,7 @@@ static int mac80211_hwsim_hw_scan(struc
        memset(hwsim->survey_data, 0, sizeof(hwsim->survey_data));
        mutex_unlock(&hwsim->mutex);
  
-       wiphy_debug(hw->wiphy, "hwsim hw_scan request\n");
+       wiphy_dbg(hw->wiphy, "hwsim hw_scan request\n");
  
        ieee80211_queue_delayed_work(hwsim->hw, &hwsim->hw_scan, 0);
  
@@@ -2075,7 -2075,7 +2075,7 @@@ static void mac80211_hwsim_cancel_hw_sc
                .aborted = true,
        };
  
-       wiphy_debug(hw->wiphy, "hwsim cancel_hw_scan\n");
+       wiphy_dbg(hw->wiphy, "hwsim cancel_hw_scan\n");
  
        cancel_delayed_work_sync(&hwsim->hw_scan);
  
@@@ -2096,11 -2096,11 +2096,11 @@@ static void mac80211_hwsim_sw_scan(stru
        mutex_lock(&hwsim->mutex);
  
        if (hwsim->scanning) {
-               printk(KERN_DEBUG "two hwsim sw_scans detected!\n");
+               pr_debug("two hwsim sw_scans detected!\n");
                goto out;
        }
  
-       printk(KERN_DEBUG "hwsim sw_scan request, prepping stuff\n");
+       pr_debug("hwsim sw_scan request, prepping stuff\n");
  
        memcpy(hwsim->scan_addr, mac_addr, ETH_ALEN);
        hwsim->scanning = true;
@@@ -2117,7 -2117,7 +2117,7 @@@ static void mac80211_hwsim_sw_scan_comp
  
        mutex_lock(&hwsim->mutex);
  
-       printk(KERN_DEBUG "hwsim sw_scan_complete\n");
+       pr_debug("hwsim sw_scan_complete\n");
        hwsim->scanning = false;
        eth_zero_addr(hwsim->scan_addr);
  
@@@ -2131,7 -2131,7 +2131,7 @@@ static void hw_roc_start(struct work_st
  
        mutex_lock(&hwsim->mutex);
  
-       wiphy_debug(hwsim->hw->wiphy, "hwsim ROC begins\n");
+       wiphy_dbg(hwsim->hw->wiphy, "hwsim ROC begins\n");
        hwsim->tmp_chan = hwsim->roc_chan;
        ieee80211_ready_on_channel(hwsim->hw);
  
@@@ -2151,7 -2151,7 +2151,7 @@@ static void hw_roc_done(struct work_str
        hwsim->tmp_chan = NULL;
        mutex_unlock(&hwsim->mutex);
  
-       wiphy_debug(hwsim->hw->wiphy, "hwsim ROC expired\n");
+       wiphy_dbg(hwsim->hw->wiphy, "hwsim ROC expired\n");
  }
  
  static int mac80211_hwsim_roc(struct ieee80211_hw *hw,
        hwsim->roc_duration = duration;
        mutex_unlock(&hwsim->mutex);
  
-       wiphy_debug(hw->wiphy, "hwsim ROC (%d MHz, %d ms)\n",
-                   chan->center_freq, duration);
+       wiphy_dbg(hw->wiphy, "hwsim ROC (%d MHz, %d ms)\n",
+                 chan->center_freq, duration);
        ieee80211_queue_delayed_work(hw, &hwsim->roc_start, HZ/50);
  
        return 0;
@@@ -2190,7 -2190,7 +2190,7 @@@ static int mac80211_hwsim_croc(struct i
        hwsim->tmp_chan = NULL;
        mutex_unlock(&hwsim->mutex);
  
-       wiphy_debug(hw->wiphy, "hwsim ROC canceled\n");
+       wiphy_dbg(hw->wiphy, "hwsim ROC canceled\n");
  
        return 0;
  }
@@@ -2199,20 -2199,20 +2199,20 @@@ static int mac80211_hwsim_add_chanctx(s
                                      struct ieee80211_chanctx_conf *ctx)
  {
        hwsim_set_chanctx_magic(ctx);
-       wiphy_debug(hw->wiphy,
-                   "add channel context control: %d MHz/width: %d/cfreqs:%d/%d MHz\n",
-                   ctx->def.chan->center_freq, ctx->def.width,
-                   ctx->def.center_freq1, ctx->def.center_freq2);
+       wiphy_dbg(hw->wiphy,
+                 "add channel context control: %d MHz/width: %d/cfreqs:%d/%d MHz\n",
+                 ctx->def.chan->center_freq, ctx->def.width,
+                 ctx->def.center_freq1, ctx->def.center_freq2);
        return 0;
  }
  
  static void mac80211_hwsim_remove_chanctx(struct ieee80211_hw *hw,
                                          struct ieee80211_chanctx_conf *ctx)
  {
-       wiphy_debug(hw->wiphy,
-                   "remove channel context control: %d MHz/width: %d/cfreqs:%d/%d MHz\n",
-                   ctx->def.chan->center_freq, ctx->def.width,
-                   ctx->def.center_freq1, ctx->def.center_freq2);
+       wiphy_dbg(hw->wiphy,
+                 "remove channel context control: %d MHz/width: %d/cfreqs:%d/%d MHz\n",
+                 ctx->def.chan->center_freq, ctx->def.width,
+                 ctx->def.center_freq1, ctx->def.center_freq2);
        hwsim_check_chanctx_magic(ctx);
        hwsim_clear_chanctx_magic(ctx);
  }
@@@ -2222,10 -2222,10 +2222,10 @@@ static void mac80211_hwsim_change_chanc
                                          u32 changed)
  {
        hwsim_check_chanctx_magic(ctx);
-       wiphy_debug(hw->wiphy,
-                   "change channel context control: %d MHz/width: %d/cfreqs:%d/%d MHz\n",
-                   ctx->def.chan->center_freq, ctx->def.width,
-                   ctx->def.center_freq1, ctx->def.center_freq2);
+       wiphy_dbg(hw->wiphy,
+                 "change channel context control: %d MHz/width: %d/cfreqs:%d/%d MHz\n",
+                 ctx->def.chan->center_freq, ctx->def.width,
+                 ctx->def.center_freq1, ctx->def.center_freq2);
  }
  
  static int mac80211_hwsim_assign_vif_chanctx(struct ieee80211_hw *hw,
@@@ -2479,7 -2479,7 +2479,7 @@@ static int mac80211_hwsim_new_radio(str
                ops = &mac80211_hwsim_mchan_ops;
        hw = ieee80211_alloc_hw_nm(sizeof(*data), ops, param->hwname);
        if (!hw) {
-               printk(KERN_DEBUG "mac80211_hwsim: ieee80211_alloc_hw failed\n");
+               pr_debug("mac80211_hwsim: ieee80211_alloc_hw failed\n");
                err = -ENOMEM;
                goto failed;
        }
        data->dev->driver = &mac80211_hwsim_driver.driver;
        err = device_bind_driver(data->dev);
        if (err != 0) {
-               printk(KERN_DEBUG "mac80211_hwsim: device_bind_driver failed (%d)\n",
+               pr_debug("mac80211_hwsim: device_bind_driver failed (%d)\n",
                       err);
                goto failed_bind;
        }
  
        err = ieee80211_register_hw(hw);
        if (err < 0) {
-               printk(KERN_DEBUG "mac80211_hwsim: ieee80211_register_hw failed (%d)\n",
+               pr_debug("mac80211_hwsim: ieee80211_register_hw failed (%d)\n",
                       err);
                goto failed_hw;
        }
  
-       wiphy_debug(hw->wiphy, "hwaddr %pM registered\n", hw->wiphy->perm_addr);
+       wiphy_dbg(hw->wiphy, "hwaddr %pM registered\n", hw->wiphy->perm_addr);
  
        if (param->reg_alpha2) {
                data->alpha2[0] = param->reg_alpha2[0];
@@@ -3067,7 -3067,7 +3067,7 @@@ static int hwsim_cloned_frame_received_
  
        return 0;
  err:
-       printk(KERN_DEBUG "mac80211_hwsim: error occurred in %s\n", __func__);
+       pr_debug("mac80211_hwsim: error occurred in %s\n", __func__);
  out:
        dev_kfree_skb(skb);
        return -EINVAL;
@@@ -3098,7 -3098,7 +3098,7 @@@ static int hwsim_register_received_nl(s
  
        hwsim_register_wmediumd(net, info->snd_portid);
  
-       printk(KERN_DEBUG "mac80211_hwsim: received a REGISTER, "
+       pr_debug("mac80211_hwsim: received a REGISTER, "
               "switching to wmediumd mode with pid %d\n", info->snd_portid);
  
        return 0;
@@@ -3387,7 -3387,7 +3387,7 @@@ static int __init hwsim_init_netlink(vo
        return 0;
  
  failure:
-       printk(KERN_DEBUG "mac80211_hwsim: error occurred in %s\n", __func__);
+       pr_debug("mac80211_hwsim: error occurred in %s\n", __func__);
        return -EINVAL;
  }
  
@@@ -3578,7 -3578,7 +3578,7 @@@ module_init(init_mac80211_hwsim)
  
  static void __exit exit_mac80211_hwsim(void)
  {
-       printk(KERN_DEBUG "mac80211_hwsim: unregister radios\n");
+       pr_debug("mac80211_hwsim: unregister radios\n");
  
        hwsim_exit_netlink();
  
index 61cf3e9c0acb80683d60e27802b259a568b0a6ac,457a4b4e82120639a68e2edc17e76262c7b89a86..49b9efeba1bda1e2390289b8ba536fa7bad0542c
@@@ -52,7 -52,6 +52,6 @@@ EXPORT_SYMBOL_GPL(qeth_core_header_cach
  static struct kmem_cache *qeth_qdio_outbuf_cache;
  
  static struct device *qeth_core_root_dev;
- static unsigned int known_devices[][6] = QETH_MODELLIST_ARRAY;
  static struct lock_class_key qdio_out_skb_queue_key;
  static struct mutex qeth_mod_mutex;
  
@@@ -1386,6 -1385,7 +1385,7 @@@ static void qeth_init_qdio_info(struct 
        QETH_DBF_TEXT(SETUP, 4, "intqdinf");
        atomic_set(&card->qdio.state, QETH_QDIO_UNINITIALIZED);
        /* inbound */
+       card->qdio.no_in_queues = 1;
        card->qdio.in_buf_size = QETH_IN_BUF_SIZE_DEFAULT;
        if (card->info.type == QETH_CARD_TYPE_IQD)
                card->qdio.init_pool.buf_count = QETH_IN_BUF_COUNT_HSDEFAULT;
        return NULL;
  }
  
- static int qeth_determine_card_type(struct qeth_card *card)
+ static void qeth_determine_card_type(struct qeth_card *card)
  {
-       int i = 0;
        QETH_DBF_TEXT(SETUP, 2, "detcdtyp");
  
        card->qdio.do_prio_queueing = QETH_PRIOQ_DEFAULT;
        card->qdio.default_out_queue = QETH_DEFAULT_QUEUE;
-       while (known_devices[i][QETH_DEV_MODEL_IND]) {
-               if ((CARD_RDEV(card)->id.dev_type ==
-                               known_devices[i][QETH_DEV_TYPE_IND]) &&
-                   (CARD_RDEV(card)->id.dev_model ==
-                               known_devices[i][QETH_DEV_MODEL_IND])) {
-                       card->info.type = known_devices[i][QETH_DEV_MODEL_IND];
-                       card->qdio.no_out_queues =
-                               known_devices[i][QETH_QUEUE_NO_IND];
-                       card->qdio.no_in_queues = 1;
-                       card->info.is_multicast_different =
-                               known_devices[i][QETH_MULTICAST_IND];
-                       qeth_update_from_chp_desc(card);
-                       return 0;
-               }
-               i++;
-       }
-       card->info.type = QETH_CARD_TYPE_UNKNOWN;
-       dev_err(&card->gdev->dev, "The adapter hardware is of an "
-               "unknown type\n");
-       return -ENOENT;
+       card->info.type = CARD_RDEV(card)->id.driver_info;
+       card->qdio.no_out_queues = QETH_MAX_QUEUES;
+       if (card->info.type == QETH_CARD_TYPE_IQD)
+               card->info.is_multicast_different = 0x0103;
+       qeth_update_from_chp_desc(card);
  }
  
  static int qeth_clear_channel(struct qeth_channel *channel)
@@@ -2090,7 -2073,6 +2073,6 @@@ int qeth_send_control_data(struct qeth_
        spin_lock_irqsave(&card->lock, flags);
        list_add_tail(&reply->list, &card->cmd_waiter_list);
        spin_unlock_irqrestore(&card->lock, flags);
-       QETH_DBF_HEX(CTRL, 2, iob->data, QETH_DBF_CTRL_LEN);
  
        while (atomic_cmpxchg(&card->write.irq_pending, 0, 1)) ;
        qeth_prepare_control_data(card, len, iob);
@@@ -2233,23 -2215,15 +2215,15 @@@ static int qeth_cm_setup(struct qeth_ca
  static int qeth_get_initial_mtu_for_card(struct qeth_card *card)
  {
        switch (card->info.type) {
-       case QETH_CARD_TYPE_UNKNOWN:
-               return 1500;
        case QETH_CARD_TYPE_IQD:
                return card->info.max_mtu;
        case QETH_CARD_TYPE_OSD:
-               switch (card->info.link_type) {
-               case QETH_LINK_TYPE_HSTR:
-               case QETH_LINK_TYPE_LANE_TR:
-                       return 2000;
-               default:
-                       return card->options.layer2 ? 1500 : 1492;
-               }
-       case QETH_CARD_TYPE_OSM:
        case QETH_CARD_TYPE_OSX:
-               return card->options.layer2 ? 1500 : 1492;
+               if (!card->options.layer2)
+                       return ETH_DATA_LEN - 8; /* L3: allow for LLC + SNAP */
+               /* fall through */
        default:
-               return 1500;
+               return ETH_DATA_LEN;
        }
  }
  
@@@ -2279,7 -2253,6 +2253,6 @@@ static int qeth_mtu_is_valid(struct qet
                return ((mtu >= 576) &&
                        (mtu <= card->info.max_mtu));
        case QETH_CARD_TYPE_OSN:
-       case QETH_CARD_TYPE_UNKNOWN:
        default:
                return 1;
        }
@@@ -4040,35 -4013,23 +4013,23 @@@ static int qeth_fill_buffer(struct qeth
        return flush_cnt;
  }
  
- int qeth_do_send_packet_fast(struct qeth_card *card,
-                            struct qeth_qdio_out_q *queue, struct sk_buff *skb,
+ int qeth_do_send_packet_fast(struct qeth_qdio_out_q *queue, struct sk_buff *skb,
                             struct qeth_hdr *hdr, unsigned int offset,
                             unsigned int hd_len)
  {
-       struct qeth_qdio_out_buffer *buffer;
-       int index;
+       int index = queue->next_buf_to_fill;
+       struct qeth_qdio_out_buffer *buffer = queue->bufs[index];
  
-       /* spin until we get the queue ... */
-       while (atomic_cmpxchg(&queue->state, QETH_OUT_Q_UNLOCKED,
-                             QETH_OUT_Q_LOCKED) != QETH_OUT_Q_UNLOCKED);
-       /* ... now we've got the queue */
-       index = queue->next_buf_to_fill;
-       buffer = queue->bufs[queue->next_buf_to_fill];
        /*
         * check if buffer is empty to make sure that we do not 'overtake'
         * ourselves and try to fill a buffer that is already primed
         */
        if (atomic_read(&buffer->state) != QETH_QDIO_BUF_EMPTY)
-               goto out;
-       queue->next_buf_to_fill = (queue->next_buf_to_fill + 1) %
-                                         QDIO_MAX_BUFFERS_PER_Q;
-       atomic_set(&queue->state, QETH_OUT_Q_UNLOCKED);
+               return -EBUSY;
+       queue->next_buf_to_fill = (index + 1) % QDIO_MAX_BUFFERS_PER_Q;
        qeth_fill_buffer(queue, buffer, skb, hdr, offset, hd_len);
        qeth_flush_buffers(queue, index, 1);
        return 0;
- out:
-       atomic_set(&queue->state, QETH_OUT_Q_UNLOCKED);
-       return -EBUSY;
  }
  EXPORT_SYMBOL_GPL(qeth_do_send_packet_fast);
  
@@@ -4923,7 -4884,6 +4884,6 @@@ static void qeth_qdio_establish_cq(stru
        if (card->options.cq == QETH_CQ_ENABLED) {
                int offset = QDIO_MAX_BUFFERS_PER_Q *
                             (card->qdio.no_in_queues - 1);
-               i = QDIO_MAX_BUFFERS_PER_Q * (card->qdio.no_in_queues - 1);
                for (i = 0; i < QDIO_MAX_BUFFERS_PER_Q; ++i) {
                        in_sbal_ptrs[offset + i] = (struct qdio_buffer *)
                                virt_to_phys(card->qdio.c_q->bufs[i].buffer);
  }
  EXPORT_SYMBOL_GPL(qeth_core_hardsetup_card);
  
- static int qeth_create_skb_frag(struct qeth_qdio_buffer *qethbuffer,
-                               struct qdio_buffer_element *element,
-                               struct sk_buff **pskb, int offset, int *pfrag,
-                               int data_len)
+ static void qeth_create_skb_frag(struct qdio_buffer_element *element,
+                                struct sk_buff *skb, int offset, int data_len)
  {
        struct page *page = virt_to_page(element->addr);
-       if (*pskb == NULL) {
-               if (qethbuffer->rx_skb) {
-                       /* only if qeth_card.options.cq == QETH_CQ_ENABLED */
-                       *pskb = qethbuffer->rx_skb;
-                       qethbuffer->rx_skb = NULL;
-               } else {
-                       *pskb = dev_alloc_skb(QETH_RX_PULL_LEN + ETH_HLEN);
-                       if (!(*pskb))
-                               return -ENOMEM;
-               }
+       unsigned int next_frag;
  
-               skb_reserve(*pskb, ETH_HLEN);
-               if (data_len <= QETH_RX_PULL_LEN) {
-                       skb_put_data(*pskb, element->addr + offset, data_len);
-               } else {
-                       get_page(page);
-                       skb_put_data(*pskb, element->addr + offset,
-                                    QETH_RX_PULL_LEN);
-                       skb_fill_page_desc(*pskb, *pfrag, page,
-                               offset + QETH_RX_PULL_LEN,
-                               data_len - QETH_RX_PULL_LEN);
-                       (*pskb)->data_len += data_len - QETH_RX_PULL_LEN;
-                       (*pskb)->len      += data_len - QETH_RX_PULL_LEN;
-                       (*pskb)->truesize += data_len - QETH_RX_PULL_LEN;
-                       (*pfrag)++;
-               }
-       } else {
-               get_page(page);
-               skb_fill_page_desc(*pskb, *pfrag, page, offset, data_len);
-               (*pskb)->data_len += data_len;
-               (*pskb)->len      += data_len;
-               (*pskb)->truesize += data_len;
-               (*pfrag)++;
-       }
+       /* first fill the linear space */
+       if (!skb->len) {
+               unsigned int linear = min(data_len, skb_tailroom(skb));
  
+               skb_put_data(skb, element->addr + offset, linear);
+               data_len -= linear;
+               if (!data_len)
+                       return;
+               offset += linear;
+               /* fall through to add page frag for remaining data */
+       }
  
-       return 0;
+       next_frag = skb_shinfo(skb)->nr_frags;
+       get_page(page);
+       skb_add_rx_frag(skb, next_frag, page, offset, data_len, data_len);
  }
  
  static inline int qeth_is_last_sbale(struct qdio_buffer_element *sbale)
@@@ -5267,22 -5205,19 +5205,19 @@@ struct sk_buff *qeth_core_get_next_skb(
        struct qdio_buffer_element *element = *__element;
        struct qdio_buffer *buffer = qethbuffer->buffer;
        int offset = *__offset;
-       struct sk_buff *skb = NULL;
+       struct sk_buff *skb;
        int skb_len = 0;
        void *data_ptr;
        int data_len;
        int headroom = 0;
        int use_rx_sg = 0;
-       int frag = 0;
  
        /* qeth_hdr must not cross element boundaries */
-       if (element->length < offset + sizeof(struct qeth_hdr)) {
+       while (element->length < offset + sizeof(struct qeth_hdr)) {
                if (qeth_is_last_sbale(element))
                        return NULL;
                element++;
                offset = 0;
-               if (element->length < sizeof(struct qeth_hdr))
-                       return NULL;
        }
        *hdr = element->addr + offset;
  
        if (((skb_len >= card->options.rx_sg_cb) &&
             (!(card->info.type == QETH_CARD_TYPE_OSN)) &&
             (!atomic_read(&card->force_alloc_skb))) ||
-           (card->options.cq == QETH_CQ_ENABLED)) {
+           (card->options.cq == QETH_CQ_ENABLED))
                use_rx_sg = 1;
+       if (use_rx_sg && qethbuffer->rx_skb) {
+               /* QETH_CQ_ENABLED only: */
+               skb = qethbuffer->rx_skb;
+               qethbuffer->rx_skb = NULL;
        } else {
-               skb = dev_alloc_skb(skb_len + headroom);
-               if (!skb)
-                       goto no_mem;
-               if (headroom)
-                       skb_reserve(skb, headroom);
+               unsigned int linear = (use_rx_sg) ? QETH_RX_PULL_LEN : skb_len;
+               skb = dev_alloc_skb(linear + headroom);
        }
+       if (!skb)
+               goto no_mem;
+       if (headroom)
+               skb_reserve(skb, headroom);
  
        data_ptr = element->addr + offset;
        while (skb_len) {
                data_len = min(skb_len, (int)(element->length - offset));
                if (data_len) {
-                       if (use_rx_sg) {
-                               if (qeth_create_skb_frag(qethbuffer, element,
-                                   &skb, offset, &frag, data_len))
-                                       goto no_mem;
-                       } else {
+                       if (use_rx_sg)
+                               qeth_create_skb_frag(element, skb, offset,
+                                                    data_len);
+                       else
                                skb_put_data(skb, data_ptr, data_len);
-                       }
                }
                skb_len -= data_len;
                if (skb_len) {
@@@ -5429,7 -5369,7 +5369,7 @@@ int qeth_poll(struct napi_struct *napi
                }
        }
  
-       napi_complete(napi);
+       napi_complete_done(napi, work_done);
        if (qdio_start_irq(card->data.ccwdev, 0))
                napi_schedule(&card->napi);
  out:
@@@ -5737,11 -5677,7 +5677,7 @@@ static int qeth_core_probe_device(struc
        gdev->cdev[1]->handler = qeth_irq;
        gdev->cdev[2]->handler = qeth_irq;
  
-       rc = qeth_determine_card_type(card);
-       if (rc) {
-               QETH_DBF_TEXT_(SETUP, 2, "3err%d", rc);
-               goto err_card;
-       }
+       qeth_determine_card_type(card);
        rc = qeth_setup_card(card);
        if (rc) {
                QETH_DBF_TEXT_(SETUP, 2, "2err%d", rc);
@@@ -5875,7 -5811,6 +5811,7 @@@ static struct ccwgroup_driver qeth_core
                .owner = THIS_MODULE,
                .name = "qeth",
        },
 +      .ccw_driver = &qeth_ccw_driver,
        .setup = qeth_core_probe_device,
        .remove = qeth_core_remove_device,
        .set_online = qeth_core_set_online,
@@@ -6417,32 -6352,29 +6353,29 @@@ static int qeth_set_ipa_tso(struct qeth
        return rc;
  }
  
- /* try to restore device features on a device after recovery */
- int qeth_recover_features(struct net_device *dev)
+ #define QETH_HW_FEATURES (NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_TSO)
+ /**
+  * qeth_recover_features() - Restore device features after recovery
+  * @dev:      the recovering net_device
+  *
+  * Caller must hold rtnl lock.
+  */
+ void qeth_recover_features(struct net_device *dev)
  {
+       netdev_features_t features = dev->features;
        struct qeth_card *card = dev->ml_priv;
-       netdev_features_t recover = dev->features;
  
-       if (recover & NETIF_F_IP_CSUM) {
-               if (qeth_set_ipa_csum(card, 1, IPA_OUTBOUND_CHECKSUM))
-                       recover ^= NETIF_F_IP_CSUM;
-       }
-       if (recover & NETIF_F_RXCSUM) {
-               if (qeth_set_ipa_csum(card, 1, IPA_INBOUND_CHECKSUM))
-                       recover ^= NETIF_F_RXCSUM;
-       }
-       if (recover & NETIF_F_TSO) {
-               if (qeth_set_ipa_tso(card, 1))
-                       recover ^= NETIF_F_TSO;
-       }
-       if (recover == dev->features)
-               return 0;
+       /* force-off any feature that needs an IPA sequence.
+        * netdev_update_features() will restart them.
+        */
+       dev->features &= ~QETH_HW_FEATURES;
+       netdev_update_features(dev);
  
+       if (features == dev->features)
+               return;
        dev_warn(&card->gdev->dev,
                 "Device recovery failed to restore all offload features\n");
-       dev->features = recover;
-       return -EIO;
  }
  EXPORT_SYMBOL_GPL(qeth_recover_features);
  
@@@ -6499,8 -6431,7 +6432,7 @@@ netdev_features_t qeth_fix_features(str
        /* if the card isn't up, remove features that require hw changes */
        if (card->state == CARD_STATE_DOWN ||
            card->state == CARD_STATE_RECOVER)
-               features = features & ~(NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
-                                       NETIF_F_TSO);
+               features &= ~QETH_HW_FEATURES;
        QETH_DBF_HEX(SETUP, 2, &features, sizeof(features));
        return features;
  }
index 023eae69398c4c393ea18f698b4957c7ee7fbaf1,be12147315059730e4e7c2e0dc24d13e27beee7c..99fc06f0afc1b6c818ddc19b09ecdebc29d2bed2
@@@ -89,7 -89,7 +89,7 @@@ static inline void dql_queued(struct dq
  /* Returns how many objects can be queued, < 0 indicates over limit. */
  static inline int dql_avail(const struct dql *dql)
  {
 -      return ACCESS_ONCE(dql->adj_limit) - ACCESS_ONCE(dql->num_queued);
 +      return READ_ONCE(dql->adj_limit) - READ_ONCE(dql->num_queued);
  }
  
  /* Record number of completed objects and recalculate the limit. */
@@@ -99,7 -99,7 +99,7 @@@ void dql_completed(struct dql *dql, uns
  void dql_reset(struct dql *dql);
  
  /* Initialize dql state */
int dql_init(struct dql *dql, unsigned hold_time);
void dql_init(struct dql *dql, unsigned int hold_time);
  
  #endif /* _KERNEL_ */
  
diff --combined include/linux/of.h
index 43b5034c6a1e0f4f931bc2ff60ee8fc50cdd8d4c,b32d418d011a977e511e95cbc9f9aa4df4394af6..d3dea1d1e3a95ff27dc56c14cc2dcd89565be1a6
@@@ -37,15 -37,9 +37,15 @@@ struct property 
        int     length;
        void    *value;
        struct property *next;
 +#if defined(CONFIG_OF_DYNAMIC) || defined(CONFIG_SPARC)
        unsigned long _flags;
 +#endif
 +#if defined(CONFIG_OF_PROMTREE)
        unsigned int unique_id;
 +#endif
 +#if defined(CONFIG_OF_KOBJ)
        struct bin_attribute attr;
 +#endif
  };
  
  #if defined(CONFIG_SPARC)
@@@ -64,9 -58,7 +64,9 @@@ struct device_node 
        struct  device_node *parent;
        struct  device_node *child;
        struct  device_node *sibling;
 +#if defined(CONFIG_OF_KOBJ)
        struct  kobject kobj;
 +#endif
        unsigned long _flags;
        void    *data;
  #if defined(CONFIG_SPARC)
@@@ -111,17 -103,21 +111,17 @@@ extern struct kobj_type of_node_ktype
  extern const struct fwnode_operations of_fwnode_ops;
  static inline void of_node_init(struct device_node *node)
  {
 +#if defined(CONFIG_OF_KOBJ)
        kobject_init(&node->kobj, &of_node_ktype);
 +#endif
        node->fwnode.ops = &of_fwnode_ops;
  }
  
 -/* true when node is initialized */
 -static inline int of_node_is_initialized(struct device_node *node)
 -{
 -      return node && node->kobj.state_initialized;
 -}
 -
 -/* true when node is attached (i.e. present on sysfs) */
 -static inline int of_node_is_attached(struct device_node *node)
 -{
 -      return node && node->kobj.state_in_sysfs;
 -}
 +#if defined(CONFIG_OF_KOBJ)
 +#define of_node_kobj(n) (&(n)->kobj)
 +#else
 +#define of_node_kobj(n) NULL
 +#endif
  
  #ifdef CONFIG_OF_DYNAMIC
  extern struct device_node *of_node_get(struct device_node *node);
@@@ -207,7 -203,6 +207,7 @@@ static inline void of_node_clear_flag(s
        clear_bit(flag, &n->_flags);
  }
  
 +#if defined(CONFIG_OF_DYNAMIC) || defined(CONFIG_SPARC)
  static inline int of_property_check_flag(struct property *p, unsigned long flag)
  {
        return test_bit(flag, &p->_flags);
@@@ -222,7 -217,6 +222,7 @@@ static inline void of_property_clear_fl
  {
        clear_bit(flag, &p->_flags);
  }
 +#endif
  
  extern struct device_node *__of_find_all_nodes(struct device_node *prev);
  extern struct device_node *of_find_all_nodes(struct device_node *prev);
@@@ -681,12 -675,6 +681,6 @@@ static inline int of_property_count_ele
        return -ENOSYS;
  }
  
- static inline int of_property_read_u32_index(const struct device_node *np,
-                       const char *propname, u32 index, u32 *out_value)
- {
-       return -ENOSYS;
- }
  static inline int of_property_read_u8_array(const struct device_node *np,
                        const char *propname, u8 *out_values, size_t sz)
  {
@@@ -713,16 -701,14 +707,14 @@@ static inline int of_property_read_u64_
        return -ENOSYS;
  }
  
- static inline int of_property_read_string(const struct device_node *np,
-                                         const char *propname,
-                                         const char **out_string)
+ static inline int of_property_read_u32_index(const struct device_node *np,
+                       const char *propname, u32 index, u32 *out_value)
  {
        return -ENOSYS;
  }
  
- static inline int of_property_read_string_helper(const struct device_node *np,
-                                                const char *propname,
-                                                const char **out_strs, size_t sz, int index)
+ static inline int of_property_read_u64_index(const struct device_node *np,
+                       const char *propname, u32 index, u64 *out_value)
  {
        return -ENOSYS;
  }
@@@ -750,12 -736,51 +742,51 @@@ static inline int of_n_size_cells(struc
        return 0;
  }
  
+ static inline int of_property_read_variable_u8_array(const struct device_node *np,
+                                       const char *propname, u8 *out_values,
+                                       size_t sz_min, size_t sz_max)
+ {
+       return -ENOSYS;
+ }
+ static inline int of_property_read_variable_u16_array(const struct device_node *np,
+                                       const char *propname, u16 *out_values,
+                                       size_t sz_min, size_t sz_max)
+ {
+       return -ENOSYS;
+ }
+ static inline int of_property_read_variable_u32_array(const struct device_node *np,
+                                       const char *propname,
+                                       u32 *out_values,
+                                       size_t sz_min,
+                                       size_t sz_max)
+ {
+       return -ENOSYS;
+ }
  static inline int of_property_read_u64(const struct device_node *np,
                                       const char *propname, u64 *out_value)
  {
        return -ENOSYS;
  }
  
+ static inline int of_property_read_variable_u64_array(const struct device_node *np,
+                                       const char *propname,
+                                       u64 *out_values,
+                                       size_t sz_min,
+                                       size_t sz_max)
+ {
+       return -ENOSYS;
+ }
+ static inline int of_property_read_string(const struct device_node *np,
+                                         const char *propname,
+                                         const char **out_string)
+ {
+       return -ENOSYS;
+ }
  static inline int of_property_match_string(const struct device_node *np,
                                           const char *propname,
                                           const char *string)
        return -ENOSYS;
  }
  
+ static inline int of_property_read_string_helper(const struct device_node *np,
+                                                const char *propname,
+                                                const char **out_strs, size_t sz, int index)
+ {
+       return -ENOSYS;
+ }
  static inline struct device_node *of_parse_phandle(const struct device_node *np,
                                                   const char *phandle_name,
                                                   int index)
@@@ -1289,6 -1321,9 +1327,6 @@@ static inline int of_reconfig_get_state
  }
  #endif /* CONFIG_OF_DYNAMIC */
  
 -/* CONFIG_OF_RESOLVE api */
 -extern int of_resolve_phandles(struct device_node *tree);
 -
  /**
   * of_device_is_system_power_controller - Tells if system-power-controller is found for device_node
   * @np: Pointer to the given device_node
@@@ -1305,7 -1340,7 +1343,7 @@@ static inline bool of_device_is_system_
   */
  
  enum of_overlay_notify_action {
 -      OF_OVERLAY_PRE_APPLY,
 +      OF_OVERLAY_PRE_APPLY = 0,
        OF_OVERLAY_POST_APPLY,
        OF_OVERLAY_PRE_REMOVE,
        OF_OVERLAY_POST_REMOVE,
@@@ -1319,26 -1354,26 +1357,26 @@@ struct of_overlay_notify_data 
  #ifdef CONFIG_OF_OVERLAY
  
  /* ID based overlays; the API for external users */
 -int of_overlay_create(struct device_node *tree);
 -int of_overlay_destroy(int id);
 -int of_overlay_destroy_all(void);
 +int of_overlay_apply(struct device_node *tree, int *ovcs_id);
 +int of_overlay_remove(int *ovcs_id);
 +int of_overlay_remove_all(void);
  
  int of_overlay_notifier_register(struct notifier_block *nb);
  int of_overlay_notifier_unregister(struct notifier_block *nb);
  
  #else
  
 -static inline int of_overlay_create(struct device_node *tree)
 +static inline int of_overlay_apply(struct device_node *tree, int *ovcs_id)
  {
        return -ENOTSUPP;
  }
  
 -static inline int of_overlay_destroy(int id)
 +static inline int of_overlay_remove(int *ovcs_id)
  {
        return -ENOTSUPP;
  }
  
 -static inline int of_overlay_destroy_all(void)
 +static inline int of_overlay_remove_all(void)
  {
        return -ENOTSUPP;
  }
index 54bcd970bfd3c9586ac2be2d836ebe72f18261bf,d090d466cd0bbe09b5bc8141c6de55ac8fa13a56..2032ce2eb20bff492698a1309aa043470de0991f
@@@ -18,9 -18,11 +18,11 @@@ extern int rtnl_put_cacheinfo(struct sk
                              u32 id, long expires, u32 error);
  
  void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change, gfp_t flags);
+ void rtmsg_ifinfo_newnet(int type, struct net_device *dev, unsigned int change,
+                        gfp_t flags, int *new_nsid);
  struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
                                       unsigned change, u32 event,
-                                      gfp_t flags);
+                                      gfp_t flags, int *new_nsid);
  void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev,
                       gfp_t flags);
  
@@@ -68,7 -70,7 +70,7 @@@ static inline bool lockdep_rtnl_is_held
   * @p: The pointer to read, prior to dereferencing
   *
   * Return the value of the specified RCU-protected pointer, but omit
 - * both the smp_read_barrier_depends() and the ACCESS_ONCE(), because
 + * both the smp_read_barrier_depends() and the READ_ONCE(), because
   * caller holds RTNL.
   */
  #define rtnl_dereference(p)                                   \
index 470c1c71e7f4443e296f031e92d4743385a4610e,01570a8f298217bc08c8972e2202e0ce9f5739e3..fecc6112c768a7aa0acf711fd89130eaf53d7fd1
@@@ -312,6 -312,7 +312,7 @@@ struct nft_expr
   *    @flush: deactivate element in the next generation
   *    @remove: remove element from set
   *    @walk: iterate over all set elemeennts
+  *    @get: get set elements
   *    @privsize: function to return size of set private data
   *    @init: initialize private data of new set instance
   *    @destroy: destroy private data of set instance
@@@ -351,6 -352,10 +352,10 @@@ struct nft_set_ops 
        void                            (*walk)(const struct nft_ctx *ctx,
                                                struct nft_set *set,
                                                struct nft_set_iter *iter);
+       void *                          (*get)(const struct net *net,
+                                              const struct nft_set *set,
+                                              const struct nft_set_elem *elem,
+                                              unsigned int flags);
  
        unsigned int                    (*privsize)(const struct nlattr * const nla[],
                                                    const struct nft_set_desc *desc);
@@@ -1165,8 -1170,8 +1170,8 @@@ static inline u8 nft_genmask_next(cons
  
  static inline u8 nft_genmask_cur(const struct net *net)
  {
 -      /* Use ACCESS_ONCE() to prevent refetching the value for atomicity */
 -      return 1 << ACCESS_ONCE(net->nft.gencursor);
 +      /* Use READ_ONCE() to prevent refetching the value for atomicity */
 +      return 1 << READ_ONCE(net->nft.gencursor);
  }
  
  #define NFT_GENMASK_ANY               ((1 << 0) | (1 << 1))
diff --combined kernel/events/core.c
index 4c39c05e029a7f357cb872ffa982d6ca2381fddb,42d24bd64ea4676ba740495f58504918c6db315d..3939a4674e0ae48395f290edc20d61f4c307c89b
@@@ -209,7 -209,7 +209,7 @@@ static int event_function(void *info
        struct perf_event_context *task_ctx = cpuctx->task_ctx;
        int ret = 0;
  
 -      WARN_ON_ONCE(!irqs_disabled());
 +      lockdep_assert_irqs_disabled();
  
        perf_ctx_lock(cpuctx, task_ctx);
        /*
@@@ -306,7 -306,7 +306,7 @@@ static void event_function_local(struc
        struct task_struct *task = READ_ONCE(ctx->task);
        struct perf_event_context *task_ctx = NULL;
  
 -      WARN_ON_ONCE(!irqs_disabled());
 +      lockdep_assert_irqs_disabled();
  
        if (task) {
                if (task == TASK_TOMBSTONE)
@@@ -582,88 -582,6 +582,88 @@@ static inline u64 perf_event_clock(stru
        return event->clock();
  }
  
 +/*
 + * State based event timekeeping...
 + *
 + * The basic idea is to use event->state to determine which (if any) time
 + * fields to increment with the current delta. This means we only need to
 + * update timestamps when we change state or when they are explicitly requested
 + * (read).
 + *
 + * Event groups make things a little more complicated, but not terribly so. The
 + * rules for a group are that if the group leader is OFF the entire group is
 + * OFF, irrespecive of what the group member states are. This results in
 + * __perf_effective_state().
 + *
 + * A futher ramification is that when a group leader flips between OFF and
 + * !OFF, we need to update all group member times.
 + *
 + *
 + * NOTE: perf_event_time() is based on the (cgroup) context time, and thus we
 + * need to make sure the relevant context time is updated before we try and
 + * update our timestamps.
 + */
 +
 +static __always_inline enum perf_event_state
 +__perf_effective_state(struct perf_event *event)
 +{
 +      struct perf_event *leader = event->group_leader;
 +
 +      if (leader->state <= PERF_EVENT_STATE_OFF)
 +              return leader->state;
 +
 +      return event->state;
 +}
 +
 +static __always_inline void
 +__perf_update_times(struct perf_event *event, u64 now, u64 *enabled, u64 *running)
 +{
 +      enum perf_event_state state = __perf_effective_state(event);
 +      u64 delta = now - event->tstamp;
 +
 +      *enabled = event->total_time_enabled;
 +      if (state >= PERF_EVENT_STATE_INACTIVE)
 +              *enabled += delta;
 +
 +      *running = event->total_time_running;
 +      if (state >= PERF_EVENT_STATE_ACTIVE)
 +              *running += delta;
 +}
 +
 +static void perf_event_update_time(struct perf_event *event)
 +{
 +      u64 now = perf_event_time(event);
 +
 +      __perf_update_times(event, now, &event->total_time_enabled,
 +                                      &event->total_time_running);
 +      event->tstamp = now;
 +}
 +
 +static void perf_event_update_sibling_time(struct perf_event *leader)
 +{
 +      struct perf_event *sibling;
 +
 +      list_for_each_entry(sibling, &leader->sibling_list, group_entry)
 +              perf_event_update_time(sibling);
 +}
 +
 +static void
 +perf_event_set_state(struct perf_event *event, enum perf_event_state state)
 +{
 +      if (event->state == state)
 +              return;
 +
 +      perf_event_update_time(event);
 +      /*
 +       * If a group leader gets enabled/disabled all its siblings
 +       * are affected too.
 +       */
 +      if ((event->state < 0) ^ (state < 0))
 +              perf_event_update_sibling_time(event);
 +
 +      WRITE_ONCE(event->state, state);
 +}
 +
  #ifdef CONFIG_CGROUP_PERF
  
  static inline bool
@@@ -923,6 -841,40 +923,6 @@@ perf_cgroup_set_shadow_time(struct perf
        event->shadow_ctx_time = now - t->timestamp;
  }
  
 -static inline void
 -perf_cgroup_defer_enabled(struct perf_event *event)
 -{
 -      /*
 -       * when the current task's perf cgroup does not match
 -       * the event's, we need to remember to call the
 -       * perf_mark_enable() function the first time a task with
 -       * a matching perf cgroup is scheduled in.
 -       */
 -      if (is_cgroup_event(event) && !perf_cgroup_match(event))
 -              event->cgrp_defer_enabled = 1;
 -}
 -
 -static inline void
 -perf_cgroup_mark_enabled(struct perf_event *event,
 -                       struct perf_event_context *ctx)
 -{
 -      struct perf_event *sub;
 -      u64 tstamp = perf_event_time(event);
 -
 -      if (!event->cgrp_defer_enabled)
 -              return;
 -
 -      event->cgrp_defer_enabled = 0;
 -
 -      event->tstamp_enabled = tstamp - event->total_time_enabled;
 -      list_for_each_entry(sub, &event->sibling_list, group_entry) {
 -              if (sub->state >= PERF_EVENT_STATE_INACTIVE) {
 -                      sub->tstamp_enabled = tstamp - sub->total_time_enabled;
 -                      sub->cgrp_defer_enabled = 0;
 -              }
 -      }
 -}
 -
  /*
   * Update cpuctx->cgrp so that it is set when first cgroup event is added and
   * cleared when last cgroup event is removed.
@@@ -1022,6 -974,17 +1022,6 @@@ static inline u64 perf_cgroup_event_tim
        return 0;
  }
  
 -static inline void
 -perf_cgroup_defer_enabled(struct perf_event *event)
 -{
 -}
 -
 -static inline void
 -perf_cgroup_mark_enabled(struct perf_event *event,
 -                       struct perf_event_context *ctx)
 -{
 -}
 -
  static inline void
  list_update_cgroup_event(struct perf_event *event,
                         struct perf_event_context *ctx, bool add)
@@@ -1043,7 -1006,7 +1043,7 @@@ static enum hrtimer_restart perf_mux_hr
        struct perf_cpu_context *cpuctx;
        int rotations = 0;
  
 -      WARN_ON(!irqs_disabled());
 +      lockdep_assert_irqs_disabled();
  
        cpuctx = container_of(hr, struct perf_cpu_context, hrtimer);
        rotations = perf_rotate_context(cpuctx);
@@@ -1130,7 -1093,7 +1130,7 @@@ static void perf_event_ctx_activate(str
  {
        struct list_head *head = this_cpu_ptr(&active_ctx_list);
  
 -      WARN_ON(!irqs_disabled());
 +      lockdep_assert_irqs_disabled();
  
        WARN_ON(!list_empty(&ctx->active_ctx_list));
  
  
  static void perf_event_ctx_deactivate(struct perf_event_context *ctx)
  {
 -      WARN_ON(!irqs_disabled());
 +      lockdep_assert_irqs_disabled();
  
        WARN_ON(list_empty(&ctx->active_ctx_list));
  
@@@ -1239,7 -1202,7 +1239,7 @@@ perf_event_ctx_lock_nested(struct perf_
  
  again:
        rcu_read_lock();
 -      ctx = ACCESS_ONCE(event->ctx);
 +      ctx = READ_ONCE(event->ctx);
        if (!atomic_inc_not_zero(&ctx->refcount)) {
                rcu_read_unlock();
                goto again;
@@@ -1435,6 -1398,60 +1435,6 @@@ static u64 perf_event_time(struct perf_
        return ctx ? ctx->time : 0;
  }
  
 -/*
 - * Update the total_time_enabled and total_time_running fields for a event.
 - */
 -static void update_event_times(struct perf_event *event)
 -{
 -      struct perf_event_context *ctx = event->ctx;
 -      u64 run_end;
 -
 -      lockdep_assert_held(&ctx->lock);
 -
 -      if (event->state < PERF_EVENT_STATE_INACTIVE ||
 -          event->group_leader->state < PERF_EVENT_STATE_INACTIVE)
 -              return;
 -
 -      /*
 -       * in cgroup mode, time_enabled represents
 -       * the time the event was enabled AND active
 -       * tasks were in the monitored cgroup. This is
 -       * independent of the activity of the context as
 -       * there may be a mix of cgroup and non-cgroup events.
 -       *
 -       * That is why we treat cgroup events differently
 -       * here.
 -       */
 -      if (is_cgroup_event(event))
 -              run_end = perf_cgroup_event_time(event);
 -      else if (ctx->is_active)
 -              run_end = ctx->time;
 -      else
 -              run_end = event->tstamp_stopped;
 -
 -      event->total_time_enabled = run_end - event->tstamp_enabled;
 -
 -      if (event->state == PERF_EVENT_STATE_INACTIVE)
 -              run_end = event->tstamp_stopped;
 -      else
 -              run_end = perf_event_time(event);
 -
 -      event->total_time_running = run_end - event->tstamp_running;
 -
 -}
 -
 -/*
 - * Update total_time_enabled and total_time_running for all events in a group.
 - */
 -static void update_group_times(struct perf_event *leader)
 -{
 -      struct perf_event *event;
 -
 -      update_event_times(leader);
 -      list_for_each_entry(event, &leader->sibling_list, group_entry)
 -              update_event_times(event);
 -}
 -
  static enum event_type_t get_event_type(struct perf_event *event)
  {
        struct perf_event_context *ctx = event->ctx;
@@@ -1477,8 -1494,6 +1477,8 @@@ list_add_event(struct perf_event *event
        WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
        event->attach_state |= PERF_ATTACH_CONTEXT;
  
 +      event->tstamp = perf_event_time(event);
 +
        /*
         * If we're a stand alone event or group leader, we go to the context
         * list, group events are kept attached to the group so that
@@@ -1686,6 -1701,8 +1686,6 @@@ list_del_event(struct perf_event *event
        if (event->group_leader == event)
                list_del_init(&event->group_entry);
  
 -      update_group_times(event);
 -
        /*
         * If event was in error state, then keep it
         * that way, otherwise bogus counts will be
         * of the event
         */
        if (event->state > PERF_EVENT_STATE_OFF)
 -              event->state = PERF_EVENT_STATE_OFF;
 +              perf_event_set_state(event, PERF_EVENT_STATE_OFF);
  
        ctx->generation++;
  }
@@@ -1793,24 -1810,38 +1793,24 @@@ event_sched_out(struct perf_event *even
                  struct perf_cpu_context *cpuctx,
                  struct perf_event_context *ctx)
  {
 -      u64 tstamp = perf_event_time(event);
 -      u64 delta;
 +      enum perf_event_state state = PERF_EVENT_STATE_INACTIVE;
  
        WARN_ON_ONCE(event->ctx != ctx);
        lockdep_assert_held(&ctx->lock);
  
 -      /*
 -       * An event which could not be activated because of
 -       * filter mismatch still needs to have its timings
 -       * maintained, otherwise bogus information is return
 -       * via read() for time_enabled, time_running:
 -       */
 -      if (event->state == PERF_EVENT_STATE_INACTIVE &&
 -          !event_filter_match(event)) {
 -              delta = tstamp - event->tstamp_stopped;
 -              event->tstamp_running += delta;
 -              event->tstamp_stopped = tstamp;
 -      }
 -
        if (event->state != PERF_EVENT_STATE_ACTIVE)
                return;
  
        perf_pmu_disable(event->pmu);
  
 -      event->tstamp_stopped = tstamp;
        event->pmu->del(event, 0);
        event->oncpu = -1;
 -      event->state = PERF_EVENT_STATE_INACTIVE;
 +
        if (event->pending_disable) {
                event->pending_disable = 0;
 -              event->state = PERF_EVENT_STATE_OFF;
 +              state = PERF_EVENT_STATE_OFF;
        }
 +      perf_event_set_state(event, state);
  
        if (!is_software_event(event))
                cpuctx->active_oncpu--;
@@@ -1830,9 -1861,7 +1830,9 @@@ group_sched_out(struct perf_event *grou
                struct perf_event_context *ctx)
  {
        struct perf_event *event;
 -      int state = group_event->state;
 +
 +      if (group_event->state != PERF_EVENT_STATE_ACTIVE)
 +              return;
  
        perf_pmu_disable(ctx->pmu);
  
  
        perf_pmu_enable(ctx->pmu);
  
 -      if (state == PERF_EVENT_STATE_ACTIVE && group_event->attr.exclusive)
 +      if (group_event->attr.exclusive)
                cpuctx->exclusive = 0;
  }
  
@@@ -1866,11 -1895,6 +1866,11 @@@ __perf_remove_from_context(struct perf_
  {
        unsigned long flags = (unsigned long)info;
  
 +      if (ctx->is_active & EVENT_TIME) {
 +              update_context_time(ctx);
 +              update_cgrp_time_from_cpuctx(cpuctx);
 +      }
 +
        event_sched_out(event, cpuctx, ctx);
        if (flags & DETACH_GROUP)
                perf_group_detach(event);
@@@ -1933,17 -1957,14 +1933,17 @@@ static void __perf_event_disable(struc
        if (event->state < PERF_EVENT_STATE_INACTIVE)
                return;
  
 -      update_context_time(ctx);
 -      update_cgrp_time_from_event(event);
 -      update_group_times(event);
 +      if (ctx->is_active & EVENT_TIME) {
 +              update_context_time(ctx);
 +              update_cgrp_time_from_event(event);
 +      }
 +
        if (event == event->group_leader)
                group_sched_out(event, cpuctx, ctx);
        else
                event_sched_out(event, cpuctx, ctx);
 -      event->state = PERF_EVENT_STATE_OFF;
 +
 +      perf_event_set_state(event, PERF_EVENT_STATE_OFF);
  }
  
  /*
@@@ -2000,7 -2021,8 +2000,7 @@@ void perf_event_disable_inatomic(struc
  }
  
  static void perf_set_shadow_time(struct perf_event *event,
 -                               struct perf_event_context *ctx,
 -                               u64 tstamp)
 +                               struct perf_event_context *ctx)
  {
        /*
         * use the correct time source for the time snapshot
         * is cleaner and simpler to understand.
         */
        if (is_cgroup_event(event))
 -              perf_cgroup_set_shadow_time(event, tstamp);
 +              perf_cgroup_set_shadow_time(event, event->tstamp);
        else
 -              event->shadow_ctx_time = tstamp - ctx->timestamp;
 +              event->shadow_ctx_time = event->tstamp - ctx->timestamp;
  }
  
  #define MAX_INTERRUPTS (~0ULL)
@@@ -2043,6 -2065,7 +2043,6 @@@ event_sched_in(struct perf_event *event
                 struct perf_cpu_context *cpuctx,
                 struct perf_event_context *ctx)
  {
 -      u64 tstamp = perf_event_time(event);
        int ret = 0;
  
        lockdep_assert_held(&ctx->lock);
  
        WRITE_ONCE(event->oncpu, smp_processor_id());
        /*
 -       * Order event::oncpu write to happen before the ACTIVE state
 -       * is visible.
 +       * Order event::oncpu write to happen before the ACTIVE state is
 +       * visible. This allows perf_event_{stop,read}() to observe the correct
 +       * ->oncpu if it sees ACTIVE.
         */
        smp_wmb();
 -      WRITE_ONCE(event->state, PERF_EVENT_STATE_ACTIVE);
 +      perf_event_set_state(event, PERF_EVENT_STATE_ACTIVE);
  
        /*
         * Unthrottle events, since we scheduled we might have missed several
                event->hw.interrupts = 0;
        }
  
 -      /*
 -       * The new state must be visible before we turn it on in the hardware:
 -       */
 -      smp_wmb();
 -
        perf_pmu_disable(event->pmu);
  
 -      perf_set_shadow_time(event, ctx, tstamp);
 +      perf_set_shadow_time(event, ctx);
  
        perf_log_itrace_start(event);
  
        if (event->pmu->add(event, PERF_EF_START)) {
 -              event->state = PERF_EVENT_STATE_INACTIVE;
 +              perf_event_set_state(event, PERF_EVENT_STATE_INACTIVE);
                event->oncpu = -1;
                ret = -EAGAIN;
                goto out;
        }
  
 -      event->tstamp_running += tstamp - event->tstamp_stopped;
 -
        if (!is_software_event(event))
                cpuctx->active_oncpu++;
        if (!ctx->nr_active++)
@@@ -2105,6 -2134,8 +2105,6 @@@ group_sched_in(struct perf_event *group
  {
        struct perf_event *event, *partial_group = NULL;
        struct pmu *pmu = ctx->pmu;
 -      u64 now = ctx->time;
 -      bool simulate = false;
  
        if (group_event->state == PERF_EVENT_STATE_OFF)
                return 0;
@@@ -2134,13 -2165,27 +2134,13 @@@ group_error
        /*
         * Groups can be scheduled in as one unit only, so undo any
         * partial group before returning:
 -       * The events up to the failed event are scheduled out normally,
 -       * tstamp_stopped will be updated.
 -       *
 -       * The failed events and the remaining siblings need to have
 -       * their timings updated as if they had gone thru event_sched_in()
 -       * and event_sched_out(). This is required to get consistent timings
 -       * across the group. This also takes care of the case where the group
 -       * could never be scheduled by ensuring tstamp_stopped is set to mark
 -       * the time the event was actually stopped, such that time delta
 -       * calculation in update_event_times() is correct.
 +       * The events up to the failed event are scheduled out normally.
         */
        list_for_each_entry(event, &group_event->sibling_list, group_entry) {
                if (event == partial_group)
 -                      simulate = true;
 +                      break;
  
 -              if (simulate) {
 -                      event->tstamp_running += now - event->tstamp_stopped;
 -                      event->tstamp_stopped = now;
 -              } else {
 -                      event_sched_out(event, cpuctx, ctx);
 -              }
 +              event_sched_out(event, cpuctx, ctx);
        }
        event_sched_out(group_event, cpuctx, ctx);
  
@@@ -2182,11 -2227,46 +2182,11 @@@ static int group_can_go_on(struct perf_
        return can_add_hw;
  }
  
 -/*
 - * Complement to update_event_times(). This computes the tstamp_* values to
 - * continue 'enabled' state from @now, and effectively discards the time
 - * between the prior tstamp_stopped and now (as we were in the OFF state, or
 - * just switched (context) time base).
 - *
 - * This further assumes '@event->state == INACTIVE' (we just came from OFF) and
 - * cannot have been scheduled in yet. And going into INACTIVE state means
 - * '@event->tstamp_stopped = @now'.
 - *
 - * Thus given the rules of update_event_times():
 - *
 - *   total_time_enabled = tstamp_stopped - tstamp_enabled
 - *   total_time_running = tstamp_stopped - tstamp_running
 - *
 - * We can insert 'tstamp_stopped == now' and reverse them to compute new
 - * tstamp_* values.
 - */
 -static void __perf_event_enable_time(struct perf_event *event, u64 now)
 -{
 -      WARN_ON_ONCE(event->state != PERF_EVENT_STATE_INACTIVE);
 -
 -      event->tstamp_stopped = now;
 -      event->tstamp_enabled = now - event->total_time_enabled;
 -      event->tstamp_running = now - event->total_time_running;
 -}
 -
  static void add_event_to_ctx(struct perf_event *event,
                               struct perf_event_context *ctx)
  {
 -      u64 tstamp = perf_event_time(event);
 -
        list_add_event(event, ctx);
        perf_group_attach(event);
 -      /*
 -       * We can be called with event->state == STATE_OFF when we create with
 -       * .disabled = 1. In that case the IOC_ENABLE will call this function.
 -       */
 -      if (event->state == PERF_EVENT_STATE_INACTIVE)
 -              __perf_event_enable_time(event, tstamp);
  }
  
  static void ctx_sched_out(struct perf_event_context *ctx,
@@@ -2417,6 -2497,28 +2417,6 @@@ again
        raw_spin_unlock_irq(&ctx->lock);
  }
  
 -/*
 - * Put a event into inactive state and update time fields.
 - * Enabling the leader of a group effectively enables all
 - * the group members that aren't explicitly disabled, so we
 - * have to update their ->tstamp_enabled also.
 - * Note: this works for group members as well as group leaders
 - * since the non-leader members' sibling_lists will be empty.
 - */
 -static void __perf_event_mark_enabled(struct perf_event *event)
 -{
 -      struct perf_event *sub;
 -      u64 tstamp = perf_event_time(event);
 -
 -      event->state = PERF_EVENT_STATE_INACTIVE;
 -      __perf_event_enable_time(event, tstamp);
 -      list_for_each_entry(sub, &event->sibling_list, group_entry) {
 -              /* XXX should not be > INACTIVE if event isn't */
 -              if (sub->state >= PERF_EVENT_STATE_INACTIVE)
 -                      __perf_event_enable_time(sub, tstamp);
 -      }
 -}
 -
  /*
   * Cross CPU call to enable a performance event
   */
@@@ -2435,12 -2537,14 +2435,12 @@@ static void __perf_event_enable(struct 
        if (ctx->is_active)
                ctx_sched_out(ctx, cpuctx, EVENT_TIME);
  
 -      __perf_event_mark_enabled(event);
 +      perf_event_set_state(event, PERF_EVENT_STATE_INACTIVE);
  
        if (!ctx->is_active)
                return;
  
        if (!event_filter_match(event)) {
 -              if (is_cgroup_event(event))
 -                      perf_cgroup_defer_enabled(event);
                ctx_sched_in(ctx, cpuctx, EVENT_TIME, current);
                return;
        }
@@@ -2760,10 -2864,18 +2760,10 @@@ static void __perf_event_sync_stat(stru
         * we know the event must be on the current CPU, therefore we
         * don't need to use it.
         */
 -      switch (event->state) {
 -      case PERF_EVENT_STATE_ACTIVE:
 +      if (event->state == PERF_EVENT_STATE_ACTIVE)
                event->pmu->read(event);
 -              /* fall-through */
  
 -      case PERF_EVENT_STATE_INACTIVE:
 -              update_event_times(event);
 -              break;
 -
 -      default:
 -              break;
 -      }
 +      perf_event_update_time(event);
  
        /*
         * In order to keep per-task stats reliable we need to flip the event
@@@ -3000,6 -3112,10 +3000,6 @@@ ctx_pinned_sched_in(struct perf_event_c
                if (!event_filter_match(event))
                        continue;
  
 -              /* may need to reset tstamp_enabled */
 -              if (is_cgroup_event(event))
 -                      perf_cgroup_mark_enabled(event, ctx);
 -
                if (group_can_go_on(event, cpuctx, 1))
                        group_sched_in(event, cpuctx, ctx);
  
                 * If this pinned group hasn't been scheduled,
                 * put it in error state.
                 */
 -              if (event->state == PERF_EVENT_STATE_INACTIVE) {
 -                      update_group_times(event);
 -                      event->state = PERF_EVENT_STATE_ERROR;
 -              }
 +              if (event->state == PERF_EVENT_STATE_INACTIVE)
 +                      perf_event_set_state(event, PERF_EVENT_STATE_ERROR);
        }
  }
  
@@@ -3030,6 -3148,10 +3030,6 @@@ ctx_flexible_sched_in(struct perf_event
                if (!event_filter_match(event))
                        continue;
  
 -              /* may need to reset tstamp_enabled */
 -              if (is_cgroup_event(event))
 -                      perf_cgroup_mark_enabled(event, ctx);
 -
                if (group_can_go_on(event, cpuctx, can_add_hw)) {
                        if (group_sched_in(event, cpuctx, ctx))
                                can_add_hw = 0;
@@@ -3401,7 -3523,7 +3401,7 @@@ void perf_event_task_tick(void
        struct perf_event_context *ctx, *tmp;
        int throttled;
  
 -      WARN_ON(!irqs_disabled());
 +      lockdep_assert_irqs_disabled();
  
        __this_cpu_inc(perf_throttled_seq);
        throttled = __this_cpu_xchg(perf_throttled_count, 0);
@@@ -3421,7 -3543,7 +3421,7 @@@ static int event_enable_on_exec(struct 
        if (event->state >= PERF_EVENT_STATE_INACTIVE)
                return 0;
  
 -      __perf_event_mark_enabled(event);
 +      perf_event_set_state(event, PERF_EVENT_STATE_INACTIVE);
  
        return 1;
  }
@@@ -3515,15 -3637,12 +3515,15 @@@ static void __perf_event_read(void *inf
                return;
  
        raw_spin_lock(&ctx->lock);
 -      if (ctx->is_active) {
 +      if (ctx->is_active & EVENT_TIME) {
                update_context_time(ctx);
                update_cgrp_time_from_event(event);
        }
  
 -      update_event_times(event);
 +      perf_event_update_time(event);
 +      if (data->group)
 +              perf_event_update_sibling_time(event);
 +
        if (event->state != PERF_EVENT_STATE_ACTIVE)
                goto unlock;
  
        pmu->read(event);
  
        list_for_each_entry(sub, &event->sibling_list, group_entry) {
 -              update_event_times(sub);
                if (sub->state == PERF_EVENT_STATE_ACTIVE) {
                        /*
                         * Use sibling's PMU rather than @event's since
@@@ -3571,6 -3691,7 +3571,6 @@@ int perf_event_read_local(struct perf_e
  {
        unsigned long flags;
        int ret = 0;
 -      u64 now;
  
        /*
         * Disabling interrupts avoids all counter scheduling (context
                goto out;
        }
  
 -      now = event->shadow_ctx_time + perf_clock();
 -      if (enabled)
 -              *enabled = now - event->tstamp_enabled;
        /*
         * If the event is currently on this CPU, its either a per-task event,
         * or local to this CPU. Furthermore it means its ACTIVE (otherwise
         * oncpu == -1).
         */
 -      if (event->oncpu == smp_processor_id()) {
 +      if (event->oncpu == smp_processor_id())
                event->pmu->read(event);
 -              if (running)
 -                      *running = now - event->tstamp_running;
 -      } else if (running) {
 -              *running = event->total_time_running;
 -      }
  
        *value = local64_read(&event->count);
 +      if (enabled || running) {
 +              u64 now = event->shadow_ctx_time + perf_clock();
 +              u64 __enabled, __running;
 +
 +              __perf_update_times(event, now, &__enabled, &__running);
 +              if (enabled)
 +                      *enabled = __enabled;
 +              if (running)
 +                      *running = __running;
 +      }
  out:
        local_irq_restore(flags);
  
  
  static int perf_event_read(struct perf_event *event, bool group)
  {
 +      enum perf_event_state state = READ_ONCE(event->state);
        int event_cpu, ret = 0;
  
        /*
         * If event is enabled and currently active on a CPU, update the
         * value in the event structure:
         */
 -      if (event->state == PERF_EVENT_STATE_ACTIVE) {
 -              struct perf_read_data data = {
 -                      .event = event,
 -                      .group = group,
 -                      .ret = 0,
 -              };
 +again:
 +      if (state == PERF_EVENT_STATE_ACTIVE) {
 +              struct perf_read_data data;
 +
 +              /*
 +               * Orders the ->state and ->oncpu loads such that if we see
 +               * ACTIVE we must also see the right ->oncpu.
 +               *
 +               * Matches the smp_wmb() from event_sched_in().
 +               */
 +              smp_rmb();
  
                event_cpu = READ_ONCE(event->oncpu);
                if ((unsigned)event_cpu >= nr_cpu_ids)
                        return 0;
  
 +              data = (struct perf_read_data){
 +                      .event = event,
 +                      .group = group,
 +                      .ret = 0,
 +              };
 +
                preempt_disable();
                event_cpu = __perf_event_read_cpu(event, event_cpu);
  
                (void)smp_call_function_single(event_cpu, __perf_event_read, &data, 1);
                preempt_enable();
                ret = data.ret;
 -      } else if (event->state == PERF_EVENT_STATE_INACTIVE) {
 +
 +      } else if (state == PERF_EVENT_STATE_INACTIVE) {
                struct perf_event_context *ctx = event->ctx;
                unsigned long flags;
  
                raw_spin_lock_irqsave(&ctx->lock, flags);
 +              state = event->state;
 +              if (state != PERF_EVENT_STATE_INACTIVE) {
 +                      raw_spin_unlock_irqrestore(&ctx->lock, flags);
 +                      goto again;
 +              }
 +
                /*
 -               * may read while context is not active
 -               * (e.g., thread is blocked), in that case
 -               * we cannot update context time
 +               * May read while context is not active (e.g., thread is
 +               * blocked), in that case we cannot update context time
                 */
 -              if (ctx->is_active) {
 +              if (ctx->is_active & EVENT_TIME) {
                        update_context_time(ctx);
                        update_cgrp_time_from_event(event);
                }
 +
 +              perf_event_update_time(event);
                if (group)
 -                      update_group_times(event);
 -              else
 -                      update_event_times(event);
 +                      perf_event_update_sibling_time(event);
                raw_spin_unlock_irqrestore(&ctx->lock, flags);
        }
  
@@@ -4143,7 -4243,7 +4142,7 @@@ static void perf_remove_from_owner(stru
         * indeed free this event, otherwise we need to serialize on
         * owner->perf_event_mutex.
         */
 -      owner = lockless_dereference(event->owner);
 +      owner = READ_ONCE(event->owner);
        if (owner) {
                /*
                 * Since delayed_put_task_struct() also drops the last
@@@ -4240,7 -4340,7 +4239,7 @@@ again
                 * Cannot change, child events are not migrated, see the
                 * comment with perf_event_ctx_lock_nested().
                 */
 -              ctx = lockless_dereference(child->ctx);
 +              ctx = READ_ONCE(child->ctx);
                /*
                 * Since child_mutex nests inside ctx::mutex, we must jump
                 * through hoops. We start by grabbing a reference on the ctx.
@@@ -4300,7 -4400,7 +4299,7 @@@ static int perf_release(struct inode *i
        return 0;
  }
  
 -u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
 +static u64 __perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
  {
        struct perf_event *child;
        u64 total = 0;
  
        return total;
  }
 +
 +u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
 +{
 +      struct perf_event_context *ctx;
 +      u64 count;
 +
 +      ctx = perf_event_ctx_lock(event);
 +      count = __perf_event_read_value(event, enabled, running);
 +      perf_event_ctx_unlock(event, ctx);
 +
 +      return count;
 +}
  EXPORT_SYMBOL_GPL(perf_event_read_value);
  
  static int __perf_read_group_add(struct perf_event *leader,
        if (ret)
                return ret;
  
 +      raw_spin_lock_irqsave(&ctx->lock, flags);
 +
        /*
         * Since we co-schedule groups, {enabled,running} times of siblings
         * will be identical to those of the leader, so we only publish one
        if (read_format & PERF_FORMAT_ID)
                values[n++] = primary_event_id(leader);
  
 -      raw_spin_lock_irqsave(&ctx->lock, flags);
 -
        list_for_each_entry(sub, &leader->sibling_list, group_entry) {
                values[n++] += perf_event_count(sub);
                if (read_format & PERF_FORMAT_ID)
@@@ -4442,7 -4530,7 +4441,7 @@@ static int perf_read_one(struct perf_ev
        u64 values[4];
        int n = 0;
  
 -      values[n++] = perf_event_read_value(event, &enabled, &running);
 +      values[n++] = __perf_event_read_value(event, &enabled, &running);
        if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
                values[n++] = enabled;
        if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
@@@ -4821,7 -4909,8 +4820,7 @@@ static void calc_timer_values(struct pe
  
        *now = perf_clock();
        ctx_time = event->shadow_ctx_time + *now;
 -      *enabled = ctx_time - event->tstamp_enabled;
 -      *running = ctx_time - event->tstamp_running;
 +      __perf_update_times(event, ctx_time, enabled, running);
  }
  
  static void perf_event_init_userpage(struct perf_event *event)
@@@ -5225,8 -5314,8 +5224,8 @@@ static int perf_mmap(struct file *file
                if (!rb)
                        goto aux_unlock;
  
 -              aux_offset = ACCESS_ONCE(rb->user_page->aux_offset);
 -              aux_size = ACCESS_ONCE(rb->user_page->aux_size);
 +              aux_offset = READ_ONCE(rb->user_page->aux_offset);
 +              aux_size = READ_ONCE(rb->user_page->aux_size);
  
                if (aux_offset < perf_data_size(rb) + PAGE_SIZE)
                        goto aux_unlock;
@@@ -7867,11 -7956,9 +7866,9 @@@ void perf_trace_run_bpf_submit(void *ra
                               struct pt_regs *regs, struct hlist_head *head,
                               struct task_struct *task)
  {
-       struct bpf_prog *prog = call->prog;
-       if (prog) {
+       if (bpf_prog_array_valid(call)) {
                *(struct pt_regs **)raw_data = regs;
-               if (!trace_call_bpf(prog, raw_data) || hlist_empty(head)) {
+               if (!trace_call_bpf(call, raw_data) || hlist_empty(head)) {
                        perf_swevent_put_recursion_context(rctx);
                        return;
                }
@@@ -8060,13 -8147,11 +8057,11 @@@ static int perf_event_set_bpf_prog(stru
  {
        bool is_kprobe, is_tracepoint, is_syscall_tp;
        struct bpf_prog *prog;
+       int ret;
  
        if (event->attr.type != PERF_TYPE_TRACEPOINT)
                return perf_event_set_bpf_handler(event, prog_fd);
  
-       if (event->tp_event->prog)
-               return -EEXIST;
        is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_UKPROBE;
        is_tracepoint = event->tp_event->flags & TRACE_EVENT_FL_TRACEPOINT;
        is_syscall_tp = is_syscall_trace_event(event->tp_event);
                        return -EACCES;
                }
        }
-       event->tp_event->prog = prog;
-       event->tp_event->bpf_prog_owner = event;
  
-       return 0;
+       ret = perf_event_attach_bpf_prog(event, prog);
+       if (ret)
+               bpf_prog_put(prog);
+       return ret;
  }
  
  static void perf_event_free_bpf_prog(struct perf_event *event)
  {
-       struct bpf_prog *prog;
-       perf_event_free_bpf_handler(event);
-       if (!event->tp_event)
+       if (event->attr.type != PERF_TYPE_TRACEPOINT) {
+               perf_event_free_bpf_handler(event);
                return;
-       prog = event->tp_event->prog;
-       if (prog && event->tp_event->bpf_prog_owner == event) {
-               event->tp_event->prog = NULL;
-               bpf_prog_put(prog);
        }
+       perf_event_detach_bpf_prog(event);
  }
  
  #else
@@@ -9326,11 -9405,6 +9315,11 @@@ static void account_event(struct perf_e
                inc = true;
  
        if (inc) {
 +              /*
 +               * We need the mutex here because static_branch_enable()
 +               * must complete *before* the perf_sched_count increment
 +               * becomes visible.
 +               */
                if (atomic_inc_not_zero(&perf_sched_count))
                        goto enabled;
  
@@@ -10456,7 -10530,7 +10445,7 @@@ perf_event_exit_event(struct perf_even
        if (parent_event)
                perf_group_detach(child_event);
        list_del_event(child_event, child_ctx);
 -      child_event->state = PERF_EVENT_STATE_EXIT; /* is_event_hup() */
 +      perf_event_set_state(child_event, PERF_EVENT_STATE_EXIT); /* is_event_hup() */
        raw_spin_unlock_irq(&child_ctx->lock);
  
        /*
@@@ -10694,7 -10768,7 +10683,7 @@@ inherit_event(struct perf_event *parent
              struct perf_event *group_leader,
              struct perf_event_context *child_ctx)
  {
 -      enum perf_event_active_state parent_state = parent_event->state;
 +      enum perf_event_state parent_state = parent_event->state;
        struct perf_event *child_event;
        unsigned long flags;
  
@@@ -11030,7 -11104,6 +11019,7 @@@ static void __perf_event_exit_context(v
        struct perf_event *event;
  
        raw_spin_lock(&ctx->lock);
 +      ctx_sched_out(ctx, cpuctx, EVENT_TIME);
        list_for_each_entry(event, &ctx->event_list, event_entry)
                __perf_remove_from_context(event, cpuctx, ctx, (void *)DETACH_GROUP);
        raw_spin_unlock(&ctx->lock);
index da4672a50a54a2046bb86479c57dc11552a1981c,8dbfdf6445f8aa4f9d452b267ef6a6d00e375906..e659a027036ece3714d3261deb252c9d6008ec83
@@@ -21,7 -21,7 +21,7 @@@ void dql_completed(struct dql *dql, uns
        unsigned int ovlimit, completed, num_queued;
        bool all_prev_completed;
  
 -      num_queued = ACCESS_ONCE(dql->num_queued);
 +      num_queued = READ_ONCE(dql->num_queued);
  
        /* Can't complete more than what's in queue */
        BUG_ON(count > num_queued - dql->num_completed);
@@@ -128,12 -128,11 +128,11 @@@ void dql_reset(struct dql *dql
  }
  EXPORT_SYMBOL(dql_reset);
  
int dql_init(struct dql *dql, unsigned hold_time)
void dql_init(struct dql *dql, unsigned int hold_time)
  {
        dql->max_limit = DQL_MAX_LIMIT;
        dql->min_limit = 0;
        dql->slack_hold_time = hold_time;
        dql_reset(dql);
-       return 0;
  }
  EXPORT_SYMBOL(dql_init);
diff --combined net/atm/mpc.c
index 63138c8c2269cd190b9f8d50cf2d179ee63682f8,883d25778fa47dcadf64b90f451c918ca8bc45fe..e882d8b5db05e889be00fe26a0595458ead470a4
@@@ -95,7 -95,7 +95,7 @@@ static netdev_tx_t mpc_send_packet(stru
  static int mpoa_event_listener(struct notifier_block *mpoa_notifier,
                               unsigned long event, void *dev);
  static void mpc_timer_refresh(void);
- static void mpc_cache_check(unsigned long checking_time);
+ static void mpc_cache_check(struct timer_list *unused);
  
  static struct llc_snap_hdr llc_snap_mpoa_ctrl = {
        0xaa, 0xaa, 0x03,
@@@ -121,7 -121,7 +121,7 @@@ static struct notifier_block mpoa_notif
  
  struct mpoa_client *mpcs = NULL; /* FIXME */
  static struct atm_mpoa_qos *qos_head = NULL;
 -static DEFINE_TIMER(mpc_timer, NULL, 0, 0);
 +static DEFINE_TIMER(mpc_timer, NULL);
  
  
  static struct mpoa_client *find_mpc_by_itfnum(int itf)
@@@ -799,7 -799,6 +799,6 @@@ static int atm_mpoa_mpoad_attach(struc
        int err;
  
        if (mpcs == NULL) {
-               init_timer(&mpc_timer);
                mpc_timer_refresh();
  
                /* This lets us now how our LECs are doing */
@@@ -1408,15 -1407,17 +1407,17 @@@ static void clean_up(struct k_message *
        msg_to_mpoad(msg, mpc);
  }
  
+ static unsigned long checking_time;
  static void mpc_timer_refresh(void)
  {
        mpc_timer.expires = jiffies + (MPC_P2 * HZ);
-       mpc_timer.data = mpc_timer.expires;
-       mpc_timer.function = mpc_cache_check;
+       checking_time = mpc_timer.expires;
+       mpc_timer.function = (TIMER_FUNC_TYPE)mpc_cache_check;
        add_timer(&mpc_timer);
  }
  
- static void mpc_cache_check(unsigned long checking_time)
+ static void mpc_cache_check(struct timer_list *unused)
  {
        struct mpoa_client *mpc = mpcs;
        static unsigned long previous_resolving_check_time;
diff --combined net/core/dev.c
index 61559ca3980b8d25d5faf6106d52717b16cc4cd3,ad5f90dacd92b9bee1e1f6b209f1d4999954b002..8ee29f4f5fa91894e63734cfee3ee6909fd21b26
  #include <linux/crash_dump.h>
  #include <linux/sctp.h>
  #include <net/udp_tunnel.h>
+ #include <linux/net_namespace.h>
  
  #include "net-sysfs.h"
  
@@@ -162,7 -163,6 +163,6 @@@ static struct list_head offload_base __
  
  static int netif_rx_internal(struct sk_buff *skb);
  static int call_netdevice_notifiers_info(unsigned long val,
-                                        struct net_device *dev,
                                         struct netdev_notifier_info *info);
  static struct napi_struct *napi_by_id(unsigned int napi_id);
  
  DEFINE_RWLOCK(dev_base_lock);
  EXPORT_SYMBOL(dev_base_lock);
  
+ static DEFINE_MUTEX(ifalias_mutex);
  /* protects napi_hash addition/deletion and napi_gen_id */
  static DEFINE_SPINLOCK(napi_hash_lock);
  
@@@ -1062,7 -1064,10 +1064,10 @@@ static int __dev_alloc_name(struct net 
        unsigned long *inuse;
        struct net_device *d;
  
-       p = strnchr(name, IFNAMSIZ-1, '%');
+       if (!dev_valid_name(name))
+               return -EINVAL;
+       p = strchr(name, '%');
        if (p) {
                /*
                 * Verify the string as this thing may have come from
                free_page((unsigned long) inuse);
        }
  
-       if (buf != name)
-               snprintf(buf, IFNAMSIZ, name, i);
+       snprintf(buf, IFNAMSIZ, name, i);
        if (!__dev_get_by_name(net, buf))
                return i;
  
         * when the name is long and there isn't enough space left
         * for the digits, or if all bits are used.
         */
-       return -ENFILE;
+       return p ? -ENFILE : -EEXIST;
+ }
+ static int dev_alloc_name_ns(struct net *net,
+                            struct net_device *dev,
+                            const char *name)
+ {
+       char buf[IFNAMSIZ];
+       int ret;
+       BUG_ON(!net);
+       ret = __dev_alloc_name(net, name, buf);
+       if (ret >= 0)
+               strlcpy(dev->name, buf, IFNAMSIZ);
+       return ret;
  }
  
  /**
  
  int dev_alloc_name(struct net_device *dev, const char *name)
  {
-       char buf[IFNAMSIZ];
-       struct net *net;
-       int ret;
-       BUG_ON(!dev_net(dev));
-       net = dev_net(dev);
-       ret = __dev_alloc_name(net, name, buf);
-       if (ret >= 0)
-               strlcpy(dev->name, buf, IFNAMSIZ);
-       return ret;
+       return dev_alloc_name_ns(dev_net(dev), dev, name);
  }
  EXPORT_SYMBOL(dev_alloc_name);
  
- static int dev_alloc_name_ns(struct net *net,
-                            struct net_device *dev,
-                            const char *name)
- {
-       char buf[IFNAMSIZ];
-       int ret;
-       ret = __dev_alloc_name(net, name, buf);
-       if (ret >= 0)
-               strlcpy(dev->name, buf, IFNAMSIZ);
-       return ret;
- }
  int dev_get_valid_name(struct net *net, struct net_device *dev,
                       const char *name)
  {
-       BUG_ON(!net);
-       if (!dev_valid_name(name))
-               return -EINVAL;
-       if (strchr(name, '%'))
-               return dev_alloc_name_ns(net, dev, name);
-       else if (__dev_get_by_name(net, name))
-               return -EEXIST;
-       else if (dev->name != name)
-               strlcpy(dev->name, name, IFNAMSIZ);
-       return 0;
+       return dev_alloc_name_ns(net, dev, name);
  }
  EXPORT_SYMBOL(dev_get_valid_name);
  
@@@ -1265,29 -1249,53 +1249,53 @@@ rollback
   */
  int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
  {
-       char *new_ifalias;
-       ASSERT_RTNL();
+       struct dev_ifalias *new_alias = NULL;
  
        if (len >= IFALIASZ)
                return -EINVAL;
  
-       if (!len) {
-               kfree(dev->ifalias);
-               dev->ifalias = NULL;
-               return 0;
+       if (len) {
+               new_alias = kmalloc(sizeof(*new_alias) + len + 1, GFP_KERNEL);
+               if (!new_alias)
+                       return -ENOMEM;
+               memcpy(new_alias->ifalias, alias, len);
+               new_alias->ifalias[len] = 0;
        }
  
-       new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
-       if (!new_ifalias)
-               return -ENOMEM;
-       dev->ifalias = new_ifalias;
-       memcpy(dev->ifalias, alias, len);
-       dev->ifalias[len] = 0;
+       mutex_lock(&ifalias_mutex);
+       rcu_swap_protected(dev->ifalias, new_alias,
+                          mutex_is_locked(&ifalias_mutex));
+       mutex_unlock(&ifalias_mutex);
+       if (new_alias)
+               kfree_rcu(new_alias, rcuhead);
  
        return len;
  }
  
+ /**
+  *    dev_get_alias - get ifalias of a device
+  *    @dev: device
+  *    @name: buffer to store name of ifalias
+  *    @len: size of buffer
+  *
+  *    get ifalias for a device.  Caller must make sure dev cannot go
+  *    away,  e.g. rcu read lock or own a reference count to device.
+  */
+ int dev_get_alias(const struct net_device *dev, char *name, size_t len)
+ {
+       const struct dev_ifalias *alias;
+       int ret = 0;
+       rcu_read_lock();
+       alias = rcu_dereference(dev->ifalias);
+       if (alias)
+               ret = snprintf(name, len, "%s", alias->ifalias);
+       rcu_read_unlock();
+       return ret;
+ }
  
  /**
   *    netdev_features_change - device changes features
@@@ -1312,10 -1320,11 +1320,11 @@@ EXPORT_SYMBOL(netdev_features_change)
  void netdev_state_change(struct net_device *dev)
  {
        if (dev->flags & IFF_UP) {
-               struct netdev_notifier_change_info change_info;
+               struct netdev_notifier_change_info change_info = {
+                       .info.dev = dev,
+               };
  
-               change_info.flags_changed = 0;
-               call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
+               call_netdevice_notifiers_info(NETDEV_CHANGE,
                                              &change_info.info);
                rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
        }
@@@ -1536,9 -1545,10 +1545,10 @@@ EXPORT_SYMBOL(dev_disable_lro)
  static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
                                   struct net_device *dev)
  {
-       struct netdev_notifier_info info;
+       struct netdev_notifier_info info = {
+               .dev = dev,
+       };
  
-       netdev_notifier_info_init(&info, dev);
        return nb->notifier_call(nb, val, &info);
  }
  
@@@ -1663,11 -1673,9 +1673,9 @@@ EXPORT_SYMBOL(unregister_netdevice_noti
   */
  
  static int call_netdevice_notifiers_info(unsigned long val,
-                                        struct net_device *dev,
                                         struct netdev_notifier_info *info)
  {
        ASSERT_RTNL();
-       netdev_notifier_info_init(info, dev);
        return raw_notifier_call_chain(&netdev_chain, val, info);
  }
  
  
  int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
  {
-       struct netdev_notifier_info info;
+       struct netdev_notifier_info info = {
+               .dev = dev,
+       };
  
-       return call_netdevice_notifiers_info(val, dev, &info);
+       return call_netdevice_notifiers_info(val, &info);
  }
  EXPORT_SYMBOL(call_netdevice_notifiers);
  
@@@ -2012,6 -2022,7 +2022,7 @@@ int netdev_txq_to_tc(struct net_device 
  
        return 0;
  }
+ EXPORT_SYMBOL(netdev_txq_to_tc);
  
  #ifdef CONFIG_XPS
  static DEFINE_MUTEX(xps_map_mutex);
@@@ -3245,22 -3256,22 +3256,22 @@@ EXPORT_SYMBOL(dev_loopback_xmit)
  static struct sk_buff *
  sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
  {
-       struct tcf_proto *cl = rcu_dereference_bh(dev->egress_cl_list);
+       struct mini_Qdisc *miniq = rcu_dereference_bh(dev->miniq_egress);
        struct tcf_result cl_res;
  
-       if (!cl)
+       if (!miniq)
                return skb;
  
        /* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */
-       qdisc_bstats_cpu_update(cl->q, skb);
+       mini_qdisc_bstats_cpu_update(miniq, skb);
  
-       switch (tcf_classify(skb, cl, &cl_res, false)) {
+       switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) {
        case TC_ACT_OK:
        case TC_ACT_RECLASSIFY:
                skb->tc_index = TC_H_MIN(cl_res.classid);
                break;
        case TC_ACT_SHOT:
-               qdisc_qstats_cpu_drop(cl->q);
+               mini_qdisc_qstats_cpu_drop(miniq);
                *ret = NET_XMIT_DROP;
                kfree_skb(skb);
                return NULL;
@@@ -3725,7 -3736,7 +3736,7 @@@ bool rps_may_expire_flow(struct net_dev
        flow_table = rcu_dereference(rxqueue->rps_flow_table);
        if (flow_table && flow_id <= flow_table->mask) {
                rflow = &flow_table->flows[flow_id];
 -              cpu = ACCESS_ONCE(rflow->cpu);
 +              cpu = READ_ONCE(rflow->cpu);
                if (rflow->filter == filter_id && cpu < nr_cpu_ids &&
                    ((int)(per_cpu(softnet_data, cpu).input_queue_head -
                           rflow->last_qtail) <
@@@ -3864,8 -3875,8 +3875,8 @@@ drop
  static u32 netif_receive_generic_xdp(struct sk_buff *skb,
                                     struct bpf_prog *xdp_prog)
  {
+       u32 metalen, act = XDP_DROP;
        struct xdp_buff xdp;
-       u32 act = XDP_DROP;
        void *orig_data;
        int hlen, off;
        u32 mac_len;
        if (skb_cloned(skb))
                return XDP_PASS;
  
-       if (skb_linearize(skb))
-               goto do_drop;
+       /* XDP packets must be linear and must have sufficient headroom
+        * of XDP_PACKET_HEADROOM bytes. This is the guarantee that also
+        * native XDP provides, thus we need to do it here as well.
+        */
+       if (skb_is_nonlinear(skb) ||
+           skb_headroom(skb) < XDP_PACKET_HEADROOM) {
+               int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
+               int troom = skb->tail + skb->data_len - skb->end;
+               /* In case we have to go down the path and also linearize,
+                * then lets do the pskb_expand_head() work just once here.
+                */
+               if (pskb_expand_head(skb,
+                                    hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
+                                    troom > 0 ? troom + 128 : 0, GFP_ATOMIC))
+                       goto do_drop;
+               if (troom > 0 && __skb_linearize(skb))
+                       goto do_drop;
+       }
  
        /* The XDP program wants to see the packet starting at the MAC
         * header.
        mac_len = skb->data - skb_mac_header(skb);
        hlen = skb_headlen(skb) + mac_len;
        xdp.data = skb->data - mac_len;
+       xdp.data_meta = xdp.data;
        xdp.data_end = xdp.data + hlen;
        xdp.data_hard_start = skb->data - skb_headroom(skb);
        orig_data = xdp.data;
        case XDP_REDIRECT:
        case XDP_TX:
                __skb_push(skb, mac_len);
-               /* fall through */
+               break;
        case XDP_PASS:
+               metalen = xdp.data - xdp.data_meta;
+               if (metalen)
+                       skb_metadata_set(skb, metalen);
                break;
        default:
                bpf_warn_invalid_xdp_action(act);
                /* fall through */
@@@ -4140,7 -4171,7 +4171,7 @@@ sch_handle_ingress(struct sk_buff *skb
                   struct net_device *orig_dev)
  {
  #ifdef CONFIG_NET_CLS_ACT
-       struct tcf_proto *cl = rcu_dereference_bh(skb->dev->ingress_cl_list);
+       struct mini_Qdisc *miniq = rcu_dereference_bh(skb->dev->miniq_ingress);
        struct tcf_result cl_res;
  
        /* If there's at least one ingress present somewhere (so
         * that are not configured with an ingress qdisc will bail
         * out here.
         */
-       if (!cl)
+       if (!miniq)
                return skb;
        if (*pt_prev) {
                *ret = deliver_skb(skb, *pt_prev, orig_dev);
                *pt_prev = NULL;
  
        qdisc_skb_cb(skb)->pkt_len = skb->len;
        skb->tc_at_ingress = 1;
-       qdisc_bstats_cpu_update(cl->q, skb);
+       mini_qdisc_bstats_cpu_update(miniq, skb);
  
-       switch (tcf_classify(skb, cl, &cl_res, false)) {
+       switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) {
        case TC_ACT_OK:
        case TC_ACT_RECLASSIFY:
                skb->tc_index = TC_H_MIN(cl_res.classid);
                break;
        case TC_ACT_SHOT:
-               qdisc_qstats_cpu_drop(cl->q);
+               mini_qdisc_qstats_cpu_drop(miniq);
                kfree_skb(skb);
                return NULL;
        case TC_ACT_STOLEN:
        return ret;
  }
  
+ /**
+  *    netif_receive_skb_core - special purpose version of netif_receive_skb
+  *    @skb: buffer to process
+  *
+  *    More direct receive version of netif_receive_skb().  It should
+  *    only be used by callers that have a need to skip RPS and Generic XDP.
+  *    Caller must also take care of handling if (page_is_)pfmemalloc.
+  *
+  *    This function may only be called from softirq context and interrupts
+  *    should be enabled.
+  *
+  *    Return values (usually ignored):
+  *    NET_RX_SUCCESS: no congestion
+  *    NET_RX_DROP: packet was dropped
+  */
+ int netif_receive_skb_core(struct sk_buff *skb)
+ {
+       int ret;
+       rcu_read_lock();
+       ret = __netif_receive_skb_core(skb, false);
+       rcu_read_unlock();
+       return ret;
+ }
+ EXPORT_SYMBOL(netif_receive_skb_core);
  static int __netif_receive_skb(struct sk_buff *skb)
  {
        int ret;
        return ret;
  }
  
- static int generic_xdp_install(struct net_device *dev, struct netdev_xdp *xdp)
+ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
  {
        struct bpf_prog *old = rtnl_dereference(dev->xdp_prog);
        struct bpf_prog *new = xdp->prog;
@@@ -4695,6 -4754,7 +4754,7 @@@ static void gro_list_prepare(struct nap
                diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
                diffs |= p->vlan_tci ^ skb->vlan_tci;
                diffs |= skb_metadata_dst_cmp(p, skb);
+               diffs |= skb_metadata_differs(p, skb);
                if (maclen == ETH_HLEN)
                        diffs |= compare_ether_header(skb_mac_header(p),
                                                      skb_mac_header(skb));
@@@ -6228,9 -6288,19 +6288,19 @@@ static void __netdev_adjacent_dev_unlin
  
  static int __netdev_upper_dev_link(struct net_device *dev,
                                   struct net_device *upper_dev, bool master,
-                                  void *upper_priv, void *upper_info)
- {
-       struct netdev_notifier_changeupper_info changeupper_info;
+                                  void *upper_priv, void *upper_info,
+                                  struct netlink_ext_ack *extack)
+ {
+       struct netdev_notifier_changeupper_info changeupper_info = {
+               .info = {
+                       .dev = dev,
+                       .extack = extack,
+               },
+               .upper_dev = upper_dev,
+               .master = master,
+               .linking = true,
+               .upper_info = upper_info,
+       };
        int ret = 0;
  
        ASSERT_RTNL();
        if (master && netdev_master_upper_dev_get(dev))
                return -EBUSY;
  
-       changeupper_info.upper_dev = upper_dev;
-       changeupper_info.master = master;
-       changeupper_info.linking = true;
-       changeupper_info.upper_info = upper_info;
-       ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
+       ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER,
                                            &changeupper_info.info);
        ret = notifier_to_errno(ret);
        if (ret)
        if (ret)
                return ret;
  
-       ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
+       ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER,
                                            &changeupper_info.info);
        ret = notifier_to_errno(ret);
        if (ret)
@@@ -6289,9 -6354,11 +6354,11 @@@ rollback
   * returns zero.
   */
  int netdev_upper_dev_link(struct net_device *dev,
-                         struct net_device *upper_dev)
+                         struct net_device *upper_dev,
+                         struct netlink_ext_ack *extack)
  {
-       return __netdev_upper_dev_link(dev, upper_dev, false, NULL, NULL);
+       return __netdev_upper_dev_link(dev, upper_dev, false,
+                                      NULL, NULL, extack);
  }
  EXPORT_SYMBOL(netdev_upper_dev_link);
  
   */
  int netdev_master_upper_dev_link(struct net_device *dev,
                                 struct net_device *upper_dev,
-                                void *upper_priv, void *upper_info)
+                                void *upper_priv, void *upper_info,
+                                struct netlink_ext_ack *extack)
  {
        return __netdev_upper_dev_link(dev, upper_dev, true,
-                                      upper_priv, upper_info);
+                                      upper_priv, upper_info, extack);
  }
  EXPORT_SYMBOL(netdev_master_upper_dev_link);
  
  void netdev_upper_dev_unlink(struct net_device *dev,
                             struct net_device *upper_dev)
  {
-       struct netdev_notifier_changeupper_info changeupper_info;
+       struct netdev_notifier_changeupper_info changeupper_info = {
+               .info = {
+                       .dev = dev,
+               },
+               .upper_dev = upper_dev,
+               .linking = false,
+       };
  
        ASSERT_RTNL();
  
-       changeupper_info.upper_dev = upper_dev;
        changeupper_info.master = netdev_master_upper_dev_get(dev) == upper_dev;
-       changeupper_info.linking = false;
  
-       call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
+       call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER,
                                      &changeupper_info.info);
  
        __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
  
-       call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
+       call_netdevice_notifiers_info(NETDEV_CHANGEUPPER,
                                      &changeupper_info.info);
  }
  EXPORT_SYMBOL(netdev_upper_dev_unlink);
  void netdev_bonding_info_change(struct net_device *dev,
                                struct netdev_bonding_info *bonding_info)
  {
-       struct netdev_notifier_bonding_info     info;
+       struct netdev_notifier_bonding_info info = {
+               .info.dev = dev,
+       };
  
        memcpy(&info.bonding_info, bonding_info,
               sizeof(struct netdev_bonding_info));
-       call_netdevice_notifiers_info(NETDEV_BONDING_INFO, dev,
+       call_netdevice_notifiers_info(NETDEV_BONDING_INFO,
                                      &info.info);
  }
  EXPORT_SYMBOL(netdev_bonding_info_change);
@@@ -6487,11 -6561,13 +6561,13 @@@ EXPORT_SYMBOL(dev_get_nest_level)
  void netdev_lower_state_changed(struct net_device *lower_dev,
                                void *lower_state_info)
  {
-       struct netdev_notifier_changelowerstate_info changelowerstate_info;
+       struct netdev_notifier_changelowerstate_info changelowerstate_info = {
+               .info.dev = lower_dev,
+       };
  
        ASSERT_RTNL();
        changelowerstate_info.lower_state_info = lower_state_info;
-       call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE, lower_dev,
+       call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE,
                                      &changelowerstate_info.info);
  }
  EXPORT_SYMBOL(netdev_lower_state_changed);
@@@ -6782,11 -6858,14 +6858,14 @@@ void __dev_notify_flags(struct net_devi
  
        if (dev->flags & IFF_UP &&
            (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) {
-               struct netdev_notifier_change_info change_info;
+               struct netdev_notifier_change_info change_info = {
+                       .info = {
+                               .dev = dev,
+                       },
+                       .flags_changed = changes,
+               };
  
-               change_info.flags_changed = changes;
-               call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
-                                             &change_info.info);
+               call_netdevice_notifiers_info(NETDEV_CHANGE, &change_info.info);
        }
  }
  
@@@ -6993,26 -7072,26 +7072,26 @@@ int dev_change_proto_down(struct net_de
  }
  EXPORT_SYMBOL(dev_change_proto_down);
  
- u8 __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op, u32 *prog_id)
+ u8 __dev_xdp_attached(struct net_device *dev, bpf_op_t bpf_op, u32 *prog_id)
  {
-       struct netdev_xdp xdp;
+       struct netdev_bpf xdp;
  
        memset(&xdp, 0, sizeof(xdp));
        xdp.command = XDP_QUERY_PROG;
  
        /* Query must always succeed. */
-       WARN_ON(xdp_op(dev, &xdp) < 0);
+       WARN_ON(bpf_op(dev, &xdp) < 0);
        if (prog_id)
                *prog_id = xdp.prog_id;
  
        return xdp.prog_attached;
  }
  
- static int dev_xdp_install(struct net_device *dev, xdp_op_t xdp_op,
+ static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op,
                           struct netlink_ext_ack *extack, u32 flags,
                           struct bpf_prog *prog)
  {
-       struct netdev_xdp xdp;
+       struct netdev_bpf xdp;
  
        memset(&xdp, 0, sizeof(xdp));
        if (flags & XDP_FLAGS_HW_MODE)
        xdp.flags = flags;
        xdp.prog = prog;
  
-       return xdp_op(dev, &xdp);
+       return bpf_op(dev, &xdp);
  }
  
  /**
@@@ -7040,32 -7119,36 +7119,36 @@@ int dev_change_xdp_fd(struct net_devic
  {
        const struct net_device_ops *ops = dev->netdev_ops;
        struct bpf_prog *prog = NULL;
-       xdp_op_t xdp_op, xdp_chk;
+       bpf_op_t bpf_op, bpf_chk;
        int err;
  
        ASSERT_RTNL();
  
-       xdp_op = xdp_chk = ops->ndo_xdp;
-       if (!xdp_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE)))
+       bpf_op = bpf_chk = ops->ndo_bpf;
+       if (!bpf_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE)))
                return -EOPNOTSUPP;
-       if (!xdp_op || (flags & XDP_FLAGS_SKB_MODE))
-               xdp_op = generic_xdp_install;
-       if (xdp_op == xdp_chk)
-               xdp_chk = generic_xdp_install;
+       if (!bpf_op || (flags & XDP_FLAGS_SKB_MODE))
+               bpf_op = generic_xdp_install;
+       if (bpf_op == bpf_chk)
+               bpf_chk = generic_xdp_install;
  
        if (fd >= 0) {
-               if (xdp_chk && __dev_xdp_attached(dev, xdp_chk, NULL))
+               if (bpf_chk && __dev_xdp_attached(dev, bpf_chk, NULL))
                        return -EEXIST;
                if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) &&
-                   __dev_xdp_attached(dev, xdp_op, NULL))
+                   __dev_xdp_attached(dev, bpf_op, NULL))
                        return -EBUSY;
  
-               prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP);
+               if (bpf_op == ops->ndo_bpf)
+                       prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP,
+                                                    dev);
+               else
+                       prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP);
                if (IS_ERR(prog))
                        return PTR_ERR(prog);
        }
  
-       err = dev_xdp_install(dev, xdp_op, extack, flags, prog);
+       err = dev_xdp_install(dev, bpf_op, extack, flags, prog);
        if (err < 0 && prog)
                bpf_prog_put(prog);
  
@@@ -7157,7 -7240,7 +7240,7 @@@ static void rollback_registered_many(st
                if (!dev->rtnl_link_ops ||
                    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
                        skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0,
-                                                    GFP_KERNEL);
+                                                    GFP_KERNEL, NULL);
  
                /*
                 *      Flush the unicast and multicast chains
@@@ -7994,7 -8077,7 +8077,7 @@@ struct net_device *alloc_netdev_mqs(in
                unsigned int txqs, unsigned int rxqs)
  {
        struct net_device *dev;
-       size_t alloc_size;
+       unsigned int alloc_size;
        struct net_device *p;
  
        BUG_ON(strlen(name) >= sizeof(dev->name));
@@@ -8244,7 -8327,7 +8327,7 @@@ EXPORT_SYMBOL(unregister_netdev)
  
  int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
  {
-       int err;
+       int err, new_nsid;
  
        ASSERT_RTNL();
  
        call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
        rcu_barrier();
        call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
-       rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
+       if (dev->rtnl_link_ops && dev->rtnl_link_ops->get_link_net)
+               new_nsid = peernet2id_alloc(dev_net(dev), net);
+       else
+               new_nsid = peernet2id(dev_net(dev), net);
+       rtmsg_ifinfo_newnet(RTM_DELLINK, dev, ~0U, GFP_KERNEL, &new_nsid);
  
        /*
         *      Flush the unicast and multicast chains
@@@ -8562,6 -8649,8 +8649,8 @@@ static void __net_exit netdev_exit(stru
  {
        kfree(net->dev_name_head);
        kfree(net->dev_index_head);
+       if (net != &init_net)
+               WARN_ON_ONCE(!list_empty(&net->dev_base_head));
  }
  
  static struct pernet_operations __net_initdata netdev_net_ops = {
diff --combined net/core/pktgen.c
index 3b2034f6d49d20a0df890d02ea30ebd05cdb87b4,40db0b7e37ac9a9dd358236aeecc56963813fbe3..f95a150862250be5704c31e443928542d18d848f
@@@ -2165,7 -2165,7 +2165,7 @@@ static void pktgen_setup_inject(struct 
                                                + pkt_dev->pkt_overhead;
                }
  
-               for (i = 0; i < IN6_ADDR_HSIZE; i++)
+               for (i = 0; i < sizeof(struct in6_addr); i++)
                        if (pkt_dev->cur_in6_saddr.s6_addr[i]) {
                                set = 1;
                                break;
@@@ -2711,7 -2711,7 +2711,7 @@@ static inline __be16 build_tci(unsigne
  static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb,
                                int datalen)
  {
-       struct timeval timestamp;
+       struct timespec64 timestamp;
        struct pktgen_hdr *pgh;
  
        pgh = skb_put(skb, sizeof(*pgh));
                pgh->tv_sec = 0;
                pgh->tv_usec = 0;
        } else {
-               do_gettimeofday(&timestamp);
+               /*
+                * pgh->tv_sec wraps in y2106 when interpreted as unsigned
+                * as done by wireshark, or y2038 when interpreted as signed.
+                * This is probably harmless, but if anyone wants to improve
+                * it, we could introduce a variant that puts 64-bit nanoseconds
+                * into the respective header bytes.
+                * This would also be slightly faster to read.
+                */
+               ktime_get_real_ts64(&timestamp);
                pgh->tv_sec = htonl(timestamp.tv_sec);
-               pgh->tv_usec = htonl(timestamp.tv_usec);
+               pgh->tv_usec = htonl(timestamp.tv_nsec / NSEC_PER_USEC);
        }
  }
  
@@@ -3377,7 -3385,7 +3385,7 @@@ static void pktgen_wait_for_skb(struct 
  
  static void pktgen_xmit(struct pktgen_dev *pkt_dev)
  {
 -      unsigned int burst = ACCESS_ONCE(pkt_dev->burst);
 +      unsigned int burst = READ_ONCE(pkt_dev->burst);
        struct net_device *odev = pkt_dev->odev;
        struct netdev_queue *txq;
        struct sk_buff *skb;
diff --combined net/decnet/dn_route.c
index 6538632fbd0342d4fe22aca8dcd66401a150c7a4,bff5ab88cdbb4e0496223271fcd2798c3edc8395..b36dceab0dc12000a73e6fec63e28ffa98691f59
@@@ -131,7 -131,7 +131,7 @@@ static struct dn_rt_hash_bucket *dn_rt_
  static unsigned int dn_rt_hash_mask;
  
  static struct timer_list dn_route_timer;
 -static DEFINE_TIMER(dn_rt_flush_timer, dn_run_flush, 0, 0);
 +static DEFINE_TIMER(dn_rt_flush_timer, dn_run_flush);
  int decnet_dst_gc_interval = 2;
  
  static struct dst_ops dn_dst_ops = {
@@@ -338,7 -338,7 +338,7 @@@ static int dn_insert_route(struct dn_ro
                                           dn_rt_hash_table[hash].chain);
                        rcu_assign_pointer(dn_rt_hash_table[hash].chain, rth);
  
-                       dst_use(&rth->dst, now);
+                       dst_hold_and_use(&rth->dst, now);
                        spin_unlock_bh(&dn_rt_hash_table[hash].lock);
  
                        dst_release_immediate(&rt->dst);
        rcu_assign_pointer(rt->dst.dn_next, dn_rt_hash_table[hash].chain);
        rcu_assign_pointer(dn_rt_hash_table[hash].chain, rt);
  
-       dst_use(&rt->dst, now);
+       dst_hold_and_use(&rt->dst, now);
        spin_unlock_bh(&dn_rt_hash_table[hash].lock);
        *rp = rt;
        return 0;
@@@ -1258,7 -1258,7 +1258,7 @@@ static int __dn_route_output_key(struc
                            (flp->flowidn_mark == rt->fld.flowidn_mark) &&
                            dn_is_output_route(rt) &&
                            (rt->fld.flowidn_oif == flp->flowidn_oif)) {
-                               dst_use(&rt->dst, jiffies);
+                               dst_hold_and_use(&rt->dst, jiffies);
                                rcu_read_unlock_bh();
                                *pprt = &rt->dst;
                                return 0;
@@@ -1535,7 -1535,7 +1535,7 @@@ static int dn_route_input(struct sk_buf
                    (rt->fld.flowidn_oif == 0) &&
                    (rt->fld.flowidn_mark == skb->mark) &&
                    (rt->fld.flowidn_iif == cb->iif)) {
-                       dst_use(&rt->dst, jiffies);
+                       dst_hold_and_use(&rt->dst, jiffies);
                        rcu_read_unlock();
                        skb_dst_set(skb, (struct dst_entry *)rt);
                        return 0;
diff --combined net/ipv4/inet_fragment.c
index f9597ba2659986408b3d43c4821e0b7793fa6670,7f3ef5c287a10d107577377db2718b676949b021..26a3d0315728ed2b16ca46080a3546668100bc8e
@@@ -147,7 -147,7 +147,7 @@@ inet_evict_bucket(struct inet_frags *f
        spin_unlock(&hb->chain_lock);
  
        hlist_for_each_entry_safe(fq, n, &expired, list_evictor)
-               f->frag_expire((unsigned long) fq);
+               f->frag_expire(&fq->timer);
  
        return evicted;
  }
@@@ -164,7 -164,7 +164,7 @@@ static void inet_frag_worker(struct wor
  
        local_bh_disable();
  
 -      for (i = ACCESS_ONCE(f->next_bucket); budget; --budget) {
 +      for (i = READ_ONCE(f->next_bucket); budget; --budget) {
                evicted += inet_evict_bucket(f, &f->hash[i]);
                i = (i + 1) & (INETFRAGS_HASHSZ - 1);
                if (evicted > INETFRAGS_EVICT_MAX)
@@@ -366,7 -366,7 +366,7 @@@ static struct inet_frag_queue *inet_fra
        f->constructor(q, arg);
        add_frag_mem_limit(nf, f->qsize);
  
-       setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
+       timer_setup(&q->timer, f->frag_expire, 0);
        spin_lock_init(&q->lock);
        refcount_set(&q->refcnt, 1);
  
diff --combined net/ipv4/route.c
index c0864562083b58e8a9143e051eb62a8a4e723d4a,bc40bd4111969f640603dd7c5b04fdb7fdcd4afd..3b427757b1f8ecfee63c0f0667dfa0c38c1653ae
@@@ -495,7 -495,7 +495,7 @@@ u32 ip_idents_reserve(u32 hash, int seg
  {
        u32 *p_tstamp = ip_tstamps + hash % IP_IDENTS_SZ;
        atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ;
 -      u32 old = ACCESS_ONCE(*p_tstamp);
 +      u32 old = READ_ONCE(*p_tstamp);
        u32 now = (u32)jiffies;
        u32 new, delta = 0;
  
@@@ -1250,7 -1250,7 +1250,7 @@@ static void set_class_tag(struct rtabl
  static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
  {
        unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr);
-       unsigned int advmss = max_t(unsigned int, dst->dev->mtu - header_size,
+       unsigned int advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size,
                                    ip_rt_min_advmss);
  
        return min(advmss, IPV4_MAX_PMTU - header_size);
@@@ -3038,7 -3038,6 +3038,6 @@@ struct ip_rt_acct __percpu *ip_rt_acct 
  
  int __init ip_rt_init(void)
  {
-       int rc = 0;
        int cpu;
  
        ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL);
  #endif
        register_pernet_subsys(&rt_genid_ops);
        register_pernet_subsys(&ipv4_inetpeer_ops);
-       return rc;
+       return 0;
  }
  
  #ifdef CONFIG_SYSCTL
diff --combined net/ipv4/tcp_input.c
index 887585045b271af66600f1814ac9d3a601f38773,f0b572fe959ae5b7c47989bd724859d0794de31b..dabbf1d392fb98c4ec3ef42cc814383dde9304aa
  #include <linux/ipsec.h>
  #include <asm/unaligned.h>
  #include <linux/errqueue.h>
+ #include <trace/events/tcp.h>
+ #include <linux/static_key.h>
  
- int sysctl_tcp_fack __read_mostly;
- int sysctl_tcp_max_reordering __read_mostly = 300;
- int sysctl_tcp_dsack __read_mostly = 1;
- int sysctl_tcp_app_win __read_mostly = 31;
- int sysctl_tcp_adv_win_scale __read_mostly = 1;
- EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
- /* rfc5961 challenge ack rate limiting */
- int sysctl_tcp_challenge_ack_limit = 1000;
- int sysctl_tcp_stdurg __read_mostly;
- int sysctl_tcp_rfc1337 __read_mostly;
  int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
- int sysctl_tcp_frto __read_mostly = 2;
- int sysctl_tcp_min_rtt_wlen __read_mostly = 300;
- int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
- int sysctl_tcp_early_retrans __read_mostly = 3;
- int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
  
  #define FLAG_DATA             0x01 /* Incoming frame contained data.          */
  #define FLAG_WIN_UPDATE               0x02 /* Incoming ACK was a window update.       */
@@@ -335,7 -320,7 +320,7 @@@ static void tcp_sndbuf_expand(struct so
        sndmem *= nr_segs * per_mss;
  
        if (sk->sk_sndbuf < sndmem)
-               sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
+               sk->sk_sndbuf = min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2]);
  }
  
  /* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
@@@ -368,8 -353,8 +353,8 @@@ static int __tcp_grow_window(const stru
  {
        struct tcp_sock *tp = tcp_sk(sk);
        /* Optimize this! */
-       int truesize = tcp_win_from_space(skb->truesize) >> 1;
-       int window = tcp_win_from_space(sysctl_tcp_rmem[2]) >> 1;
+       int truesize = tcp_win_from_space(sk, skb->truesize) >> 1;
+       int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
  
        while (tp->rcv_ssthresh <= window) {
                if (truesize <= skb->len)
@@@ -394,7 -379,7 +379,7 @@@ static void tcp_grow_window(struct soc
                /* Check #2. Increase window, if skb with such overhead
                 * will fit to rcvbuf in future.
                 */
-               if (tcp_win_from_space(skb->truesize) <= skb->len)
+               if (tcp_win_from_space(sk, skb->truesize) <= skb->len)
                        incr = 2 * tp->advmss;
                else
                        incr = __tcp_grow_window(sk, skb);
@@@ -420,11 -405,11 +405,11 @@@ static void tcp_fixup_rcvbuf(struct soc
        /* Dynamic Right Sizing (DRS) has 2 to 3 RTT latency
         * Allow enough cushion so that sender is not limited by our window
         */
-       if (sysctl_tcp_moderate_rcvbuf)
+       if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf)
                rcvmem <<= 2;
  
        if (sk->sk_rcvbuf < rcvmem)
-               sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]);
+               sk->sk_rcvbuf = min(rcvmem, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
  }
  
  /* 4. Try to fixup all. It is made immediately after connection enters
   */
  void tcp_init_buffer_space(struct sock *sk)
  {
+       int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win;
        struct tcp_sock *tp = tcp_sk(sk);
        int maxwin;
  
        if (tp->window_clamp >= maxwin) {
                tp->window_clamp = maxwin;
  
-               if (sysctl_tcp_app_win && maxwin > 4 * tp->advmss)
+               if (tcp_app_win && maxwin > 4 * tp->advmss)
                        tp->window_clamp = max(maxwin -
-                                              (maxwin >> sysctl_tcp_app_win),
+                                              (maxwin >> tcp_app_win),
                                               4 * tp->advmss);
        }
  
        /* Force reservation of one segment. */
-       if (sysctl_tcp_app_win &&
+       if (tcp_app_win &&
            tp->window_clamp > 2 * tp->advmss &&
            tp->window_clamp + tp->advmss > maxwin)
                tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
@@@ -471,15 -457,16 +457,16 @@@ static void tcp_clamp_window(struct soc
  {
        struct tcp_sock *tp = tcp_sk(sk);
        struct inet_connection_sock *icsk = inet_csk(sk);
+       struct net *net = sock_net(sk);
  
        icsk->icsk_ack.quick = 0;
  
-       if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
+       if (sk->sk_rcvbuf < net->ipv4.sysctl_tcp_rmem[2] &&
            !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
            !tcp_under_memory_pressure(sk) &&
            sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
                sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
-                                   sysctl_tcp_rmem[2]);
+                                   net->ipv4.sysctl_tcp_rmem[2]);
        }
        if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
                tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
@@@ -610,7 -597,7 +597,7 @@@ void tcp_rcv_space_adjust(struct sock *
         * <prev RTT . ><current RTT .. ><next RTT .... >
         */
  
-       if (sysctl_tcp_moderate_rcvbuf &&
+       if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
            !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
                int rcvwin, rcvmem, rcvbuf;
  
                }
  
                rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
-               while (tcp_win_from_space(rcvmem) < tp->advmss)
+               while (tcp_win_from_space(sk, rcvmem) < tp->advmss)
                        rcvmem += 128;
  
-               rcvbuf = min(rcvwin / tp->advmss * rcvmem, sysctl_tcp_rmem[2]);
+               rcvbuf = min(rcvwin / tp->advmss * rcvmem,
+                            sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
                if (rcvbuf > sk->sk_rcvbuf) {
                        sk->sk_rcvbuf = rcvbuf;
  
@@@ -781,15 -769,6 +769,6 @@@ static void tcp_rtt_estimator(struct so
        tp->srtt_us = max(1U, srtt);
  }
  
- /* Set the sk_pacing_rate to allow proper sizing of TSO packets.
-  * Note: TCP stack does not yet implement pacing.
-  * FQ packet scheduler can be used to implement cheap but effective
-  * TCP pacing, to smooth the burst on large writes when packets
-  * in flight is significantly lower than cwnd (or rwin)
-  */
- int sysctl_tcp_pacing_ss_ratio __read_mostly = 200;
- int sysctl_tcp_pacing_ca_ratio __read_mostly = 120;
  static void tcp_update_pacing_rate(struct sock *sk)
  {
        const struct tcp_sock *tp = tcp_sk(sk);
         *       end of slow start and should slow down.
         */
        if (tp->snd_cwnd < tp->snd_ssthresh / 2)
-               rate *= sysctl_tcp_pacing_ss_ratio;
+               rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio;
        else
-               rate *= sysctl_tcp_pacing_ca_ratio;
+               rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio;
  
        rate *= max(tp->snd_cwnd, tp->packets_out);
  
        if (likely(tp->srtt_us))
                do_div(rate, tp->srtt_us);
  
 -      /* ACCESS_ONCE() is needed because sch_fq fetches sk_pacing_rate
 +      /* WRITE_ONCE() is needed because sch_fq fetches sk_pacing_rate
         * without any lock. We want to make sure compiler wont store
         * intermediate values in this location.
         */
 -      ACCESS_ONCE(sk->sk_pacing_rate) = min_t(u64, rate,
 -                                              sk->sk_max_pacing_rate);
 +      WRITE_ONCE(sk->sk_pacing_rate, min_t(u64, rate,
 +                                           sk->sk_max_pacing_rate));
  }
  
  /* Calculate rto without backoff.  This is the second half of Van Jacobson's
@@@ -863,60 -842,46 +842,46 @@@ __u32 tcp_init_cwnd(const struct tcp_so
        return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
  }
  
- /*
-  * Packet counting of FACK is based on in-order assumptions, therefore TCP
-  * disables it when reordering is detected
-  */
- void tcp_disable_fack(struct tcp_sock *tp)
- {
-       /* RFC3517 uses different metric in lost marker => reset on change */
-       if (tcp_is_fack(tp))
-               tp->lost_skb_hint = NULL;
-       tp->rx_opt.sack_ok &= ~TCP_FACK_ENABLED;
- }
  /* Take a notice that peer is sending D-SACKs */
  static void tcp_dsack_seen(struct tcp_sock *tp)
  {
        tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
+       tp->rack.dsack_seen = 1;
  }
  
- static void tcp_update_reordering(struct sock *sk, const int metric,
-                                 const int ts)
+ /* It's reordering when higher sequence was delivered (i.e. sacked) before
+  * some lower never-retransmitted sequence ("low_seq"). The maximum reordering
+  * distance is approximated in full-mss packet distance ("reordering").
+  */
+ static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq,
+                                     const int ts)
  {
        struct tcp_sock *tp = tcp_sk(sk);
-       int mib_idx;
+       const u32 mss = tp->mss_cache;
+       u32 fack, metric;
  
-       if (WARN_ON_ONCE(metric < 0))
+       fack = tcp_highest_sack_seq(tp);
+       if (!before(low_seq, fack))
                return;
  
-       if (metric > tp->reordering) {
-               tp->reordering = min(sysctl_tcp_max_reordering, metric);
+       metric = fack - low_seq;
+       if ((metric > tp->reordering * mss) && mss) {
  #if FASTRETRANS_DEBUG > 1
                pr_debug("Disorder%d %d %u f%u s%u rr%d\n",
                         tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
                         tp->reordering,
-                        tp->fackets_out,
+                        0,
                         tp->sacked_out,
                         tp->undo_marker ? tp->undo_retrans : 0);
  #endif
-               tcp_disable_fack(tp);
+               tp->reordering = min_t(u32, (metric + mss - 1) / mss,
+                                      sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
        }
  
        tp->rack.reord = 1;
        /* This exciting event is worth to be remembered. 8) */
-       if (ts)
-               mib_idx = LINUX_MIB_TCPTSREORDER;
-       else if (tcp_is_reno(tp))
-               mib_idx = LINUX_MIB_TCPRENOREORDER;
-       else if (tcp_is_fack(tp))
-               mib_idx = LINUX_MIB_TCPFACKREORDER;
-       else
-               mib_idx = LINUX_MIB_TCPSACKREORDER;
-       NET_INC_STATS(sock_net(sk), mib_idx);
+       NET_INC_STATS(sock_net(sk),
+                     ts ? LINUX_MIB_TCPTSREORDER : LINUX_MIB_TCPSACKREORDER);
  }
  
  /* This must be called before lost_out is incremented */
@@@ -990,7 -955,6 +955,6 @@@ void tcp_skb_mark_lost_uncond_verify(st
   * 3. Loss detection event of two flavors:
   *    A. Scoreboard estimator decided the packet is lost.
   *       A'. Reno "three dupacks" marks head of queue lost.
-  *       A''. Its FACK modification, head until snd.fack is lost.
   *    B. SACK arrives sacking SND.NXT at the moment, when the
   *       segment was retransmitted.
   * 4. D-SACK added new rule: D-SACK changes any tag to S.
@@@ -1133,8 -1097,7 +1097,7 @@@ static bool tcp_check_dsack(struct soc
  }
  
  struct tcp_sacktag_state {
-       int     reord;
-       int     fack_count;
+       u32     reord;
        /* Timestamps for earliest and latest never-retransmitted segment
         * that was SACKed. RTO needs the earliest RTT to stay conservative,
         * but congestion control should still get an accurate delay signal.
        u64     last_sackt;
        struct rate_sample *rate;
        int     flag;
+       unsigned int mss_now;
  };
  
  /* Check if skb is fully within the SACK block. In presence of GSO skbs,
@@@ -1192,7 -1156,8 +1156,8 @@@ static int tcp_match_skb_to_sack(struc
                if (pkt_len >= skb->len && !in_sack)
                        return 0;
  
-               err = tcp_fragment(sk, skb, pkt_len, mss, GFP_ATOMIC);
+               err = tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
+                                  pkt_len, mss, GFP_ATOMIC);
                if (err < 0)
                        return err;
        }
@@@ -1208,15 -1173,15 +1173,15 @@@ static u8 tcp_sacktag_one(struct sock *
                          u64 xmit_time)
  {
        struct tcp_sock *tp = tcp_sk(sk);
-       int fack_count = state->fack_count;
  
        /* Account D-SACK for retransmitted packet. */
        if (dup_sack && (sacked & TCPCB_RETRANS)) {
                if (tp->undo_marker && tp->undo_retrans > 0 &&
                    after(end_seq, tp->undo_marker))
                        tp->undo_retrans--;
-               if (sacked & TCPCB_SACKED_ACKED)
-                       state->reord = min(fack_count, state->reord);
+               if ((sacked & TCPCB_SACKED_ACKED) &&
+                   before(start_seq, state->reord))
+                               state->reord = start_seq;
        }
  
        /* Nothing to do; acked frame is about to be dropped (was ACKed). */
                                 * which was in hole. It is reordering.
                                 */
                                if (before(start_seq,
-                                          tcp_highest_sack_seq(tp)))
-                                       state->reord = min(fack_count,
-                                                          state->reord);
+                                          tcp_highest_sack_seq(tp)) &&
+                                   before(start_seq, state->reord))
+                                       state->reord = start_seq;
                                if (!after(end_seq, tp->high_seq))
                                        state->flag |= FLAG_ORIG_SACK_ACKED;
                                if (state->first_sackt == 0)
                tp->sacked_out += pcount;
                tp->delivered += pcount;  /* Out-of-order packets delivered */
  
-               fack_count += pcount;
                /* Lost marker hint past SACKed? Tweak RFC3517 cnt */
-               if (!tcp_is_fack(tp) && tp->lost_skb_hint &&
+               if (tp->lost_skb_hint &&
                    before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq))
                        tp->lost_cnt_hint += pcount;
-               if (fack_count > tp->fackets_out)
-                       tp->fackets_out = fack_count;
        }
  
        /* D-SACK. We can detect redundant retransmission in S|R and plain R
  /* Shift newly-SACKed bytes from this skb to the immediately previous
   * already-SACKed sk_buff. Mark the newly-SACKed bytes as such.
   */
- static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
+ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev,
+                           struct sk_buff *skb,
                            struct tcp_sacktag_state *state,
                            unsigned int pcount, int shifted, int mss,
                            bool dup_sack)
  {
        struct tcp_sock *tp = tcp_sk(sk);
-       struct sk_buff *prev = tcp_write_queue_prev(sk, skb);
        u32 start_seq = TCP_SKB_CB(skb)->seq;   /* start of newly-SACKed */
        u32 end_seq = start_seq + shifted;      /* end of newly-SACKed */
  
        if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp))
                TCP_SKB_CB(prev)->tx.delivered_mstamp = 0;
  
-       tcp_unlink_write_queue(skb, sk);
-       sk_wmem_free_skb(sk, skb);
+       tcp_rtx_queue_unlink_and_free(skb, sk);
  
        NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKMERGED);
  
@@@ -1415,9 -1375,9 +1375,9 @@@ static struct sk_buff *tcp_shift_skb_da
                goto fallback;
  
        /* Can only happen with delayed DSACK + discard craziness */
-       if (unlikely(skb == tcp_write_queue_head(sk)))
+       prev = skb_rb_prev(skb);
+       if (!prev)
                goto fallback;
-       prev = tcp_write_queue_prev(sk, skb);
  
        if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED)
                goto fallback;
  
        if (!skb_shift(prev, skb, len))
                goto fallback;
-       if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack))
+       if (!tcp_shifted_skb(sk, prev, skb, state, pcount, len, mss, dup_sack))
                goto out;
  
        /* Hole filled allows collapsing with the next as well, this is very
         * useful when hole on every nth skb pattern happens
         */
-       if (prev == tcp_write_queue_tail(sk))
+       skb = skb_rb_next(prev);
+       if (!skb)
                goto out;
-       skb = tcp_write_queue_next(sk, prev);
  
        if (!skb_can_shift(skb) ||
-           (skb == tcp_send_head(sk)) ||
            ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) ||
            (mss != tcp_skb_seglen(skb)))
                goto out;
        len = skb->len;
        if (skb_shift(prev, skb, len)) {
                pcount += tcp_skb_pcount(skb);
-               tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss, 0);
+               tcp_shifted_skb(sk, prev, skb, state, tcp_skb_pcount(skb),
+                               len, mss, 0);
        }
  
  out:
-       state->fack_count += pcount;
        return prev;
  
  noop:
@@@ -1539,13 -1498,10 +1498,10 @@@ static struct sk_buff *tcp_sacktag_walk
        struct tcp_sock *tp = tcp_sk(sk);
        struct sk_buff *tmp;
  
-       tcp_for_write_queue_from(skb, sk) {
+       skb_rbtree_walk_from(skb) {
                int in_sack = 0;
                bool dup_sack = dup_sack_in;
  
-               if (skb == tcp_send_head(sk))
-                       break;
                /* queue is in-order => we can short-circuit the walk early */
                if (!before(TCP_SKB_CB(skb)->seq, end_seq))
                        break;
                                                tcp_skb_pcount(skb),
                                                skb->skb_mstamp);
                        tcp_rate_skb_delivered(sk, skb, state->rate);
+                       if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
+                               list_del_init(&skb->tcp_tsorted_anchor);
  
                        if (!before(TCP_SKB_CB(skb)->seq,
                                    tcp_highest_sack_seq(tp)))
                                tcp_advance_highest_sack(sk, skb);
                }
-               state->fack_count += tcp_skb_pcount(skb);
        }
        return skb;
  }
  
- /* Avoid all extra work that is being done by sacktag while walking in
-  * a normal way
-  */
+ static struct sk_buff *tcp_sacktag_bsearch(struct sock *sk,
+                                          struct tcp_sacktag_state *state,
+                                          u32 seq)
+ {
+       struct rb_node *parent, **p = &sk->tcp_rtx_queue.rb_node;
+       struct sk_buff *skb;
+       while (*p) {
+               parent = *p;
+               skb = rb_to_skb(parent);
+               if (before(seq, TCP_SKB_CB(skb)->seq)) {
+                       p = &parent->rb_left;
+                       continue;
+               }
+               if (!before(seq, TCP_SKB_CB(skb)->end_seq)) {
+                       p = &parent->rb_right;
+                       continue;
+               }
+               return skb;
+       }
+       return NULL;
+ }
  static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
                                        struct tcp_sacktag_state *state,
                                        u32 skip_to_seq)
  {
-       tcp_for_write_queue_from(skb, sk) {
-               if (skb == tcp_send_head(sk))
-                       break;
-               if (after(TCP_SKB_CB(skb)->end_seq, skip_to_seq))
-                       break;
+       if (skb && after(TCP_SKB_CB(skb)->seq, skip_to_seq))
+               return skb;
  
-               state->fack_count += tcp_skb_pcount(skb);
-       }
-       return skb;
+       return tcp_sacktag_bsearch(sk, state, skip_to_seq);
  }
  
  static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
@@@ -1666,13 -1636,10 +1636,10 @@@ tcp_sacktag_write_queue(struct sock *sk
        int first_sack_index;
  
        state->flag = 0;
-       state->reord = tp->packets_out;
+       state->reord = tp->snd_nxt;
  
-       if (!tp->sacked_out) {
-               if (WARN_ON(tp->fackets_out))
-                       tp->fackets_out = 0;
+       if (!tp->sacked_out)
                tcp_highest_sack_reset(sk);
-       }
  
        found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
                                         num_sacks, prior_snd_una);
                }
        }
  
-       skb = tcp_write_queue_head(sk);
-       state->fack_count = 0;
+       state->mss_now = tcp_current_mss(sk);
+       skb = NULL;
        i = 0;
  
        if (!tp->sacked_out) {
                                skb = tcp_highest_sack(sk);
                                if (!skb)
                                        break;
-                               state->fack_count = tp->fackets_out;
                                cache++;
                                goto walk;
                        }
                        skb = tcp_highest_sack(sk);
                        if (!skb)
                                break;
-                       state->fack_count = tp->fackets_out;
                }
                skb = tcp_sacktag_skip(skb, sk, state, start_seq);
  
@@@ -1836,9 -1801,8 +1801,8 @@@ advance_sp
        for (j = 0; j < used_sacks; j++)
                tp->recv_sack_cache[i++] = sp[j];
  
-       if ((state->reord < tp->fackets_out) &&
-           ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker))
-               tcp_update_reordering(sk, tp->fackets_out - state->reord, 0);
+       if (inet_csk(sk)->icsk_ca_state != TCP_CA_Loss || tp->undo_marker)
+               tcp_check_sack_reordering(sk, state->reord, 0);
  
        tcp_verify_left_out(tp);
  out:
@@@ -1876,8 -1840,13 +1840,13 @@@ static bool tcp_limit_reno_sacked(struc
  static void tcp_check_reno_reordering(struct sock *sk, const int addend)
  {
        struct tcp_sock *tp = tcp_sk(sk);
-       if (tcp_limit_reno_sacked(tp))
-               tcp_update_reordering(sk, tp->packets_out + addend, 0);
+       if (!tcp_limit_reno_sacked(tp))
+               return;
+       tp->reordering = min_t(u32, tp->packets_out + addend,
+                              sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
+       NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER);
  }
  
  /* Emulate SACKs for SACKless connection: account for a new dupack. */
@@@ -1923,7 -1892,6 +1892,6 @@@ void tcp_clear_retrans(struct tcp_sock 
        tp->lost_out = 0;
        tp->undo_marker = 0;
        tp->undo_retrans = -1;
-       tp->fackets_out = 0;
        tp->sacked_out = 0;
  }
  
@@@ -1968,19 -1936,15 +1936,15 @@@ void tcp_enter_loss(struct sock *sk
        if (tcp_is_reno(tp))
                tcp_reset_reno_sack(tp);
  
-       skb = tcp_write_queue_head(sk);
+       skb = tcp_rtx_queue_head(sk);
        is_reneg = skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED);
        if (is_reneg) {
                NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
                tp->sacked_out = 0;
-               tp->fackets_out = 0;
        }
        tcp_clear_all_retrans_hints(tp);
  
-       tcp_for_write_queue(skb, sk) {
-               if (skb == tcp_send_head(sk))
-                       break;
+       skb_rbtree_walk_from(skb) {
                mark_lost = (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
                             is_reneg);
                if (mark_lost)
         * falsely raise the receive window, which results in repeated
         * timeouts and stop-and-go behavior.
         */
-       tp->frto = sysctl_tcp_frto &&
+       tp->frto = net->ipv4.sysctl_tcp_frto &&
                   (new_recovery || icsk->icsk_retransmits) &&
                   !inet_csk(sk)->icsk_mtup.probe_size;
  }
@@@ -2043,19 -2007,10 +2007,10 @@@ static bool tcp_check_sack_reneging(str
        return false;
  }
  
- static inline int tcp_fackets_out(const struct tcp_sock *tp)
- {
-       return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out;
- }
  /* Heurestics to calculate number of duplicate ACKs. There's no dupACKs
   * counter when SACK is enabled (without SACK, sacked_out is used for
   * that purpose).
   *
-  * Instead, with FACK TCP uses fackets_out that includes both SACKed
-  * segments up to the highest received SACK block so far and holes in
-  * between them.
-  *
   * With reordering, holes may still be in flight, so RFC3517 recovery
   * uses pure sacked_out (total number of SACKed segments) even though
   * it violates the RFC that uses duplicate ACKs, often these are equal
   */
  static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
  {
-       return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
+       return tp->sacked_out + 1;
  }
  
- /* Linux NewReno/SACK/FACK/ECN state machine.
+ /* Linux NewReno/SACK/ECN state machine.
   * --------------------------------------
   *
   * "Open"     Normal state, no dubious events, fast path.
   *            dynamically measured and adjusted. This is implemented in
   *            tcp_rack_mark_lost.
   *
-  *            FACK (Disabled by default. Subsumbed by RACK):
-  *            It is the simplest heuristics. As soon as we decided
-  *            that something is lost, we decide that _all_ not SACKed
-  *            packets until the most forward SACK are lost. I.e.
-  *            lost_out = fackets_out - sacked_out and left_out = fackets_out.
-  *            It is absolutely correct estimate, if network does not reorder
-  *            packets. And it loses any connection to reality when reordering
-  *            takes place. We use FACK by default until reordering
-  *            is suspected on the path to this destination.
-  *
   *            If the receiver does not support SACK:
   *
   *            NewReno (RFC6582): in Recovery we assume that one segment
@@@ -2191,7 -2136,7 +2136,7 @@@ static bool tcp_time_to_recover(struct 
  }
  
  /* Detect loss in event "A" above by marking head of queue up as lost.
-  * For FACK or non-SACK(Reno) senders, the first "packets" number of segments
+  * For non-SACK(Reno) senders, the first "packets" number of segments
   * are considered lost. For RFC3517 SACK, a segment is considered lost if it
   * has at least tp->reordering SACKed seqments above it; "packets" refers to
   * the maximum SACKed segments to pass before reaching this limit.
@@@ -2206,20 -2151,18 +2151,18 @@@ static void tcp_mark_head_lost(struct s
        const u32 loss_high = tcp_is_sack(tp) ?  tp->snd_nxt : tp->high_seq;
  
        WARN_ON(packets > tp->packets_out);
-       if (tp->lost_skb_hint) {
-               skb = tp->lost_skb_hint;
-               cnt = tp->lost_cnt_hint;
+       skb = tp->lost_skb_hint;
+       if (skb) {
                /* Head already handled? */
-               if (mark_head && skb != tcp_write_queue_head(sk))
+               if (mark_head && after(TCP_SKB_CB(skb)->seq, tp->snd_una))
                        return;
+               cnt = tp->lost_cnt_hint;
        } else {
-               skb = tcp_write_queue_head(sk);
+               skb = tcp_rtx_queue_head(sk);
                cnt = 0;
        }
  
-       tcp_for_write_queue_from(skb, sk) {
-               if (skb == tcp_send_head(sk))
-                       break;
+       skb_rbtree_walk_from(skb) {
                /* TODO: do this better */
                /* this is not the most efficient way to do this... */
                tp->lost_skb_hint = skb;
                        break;
  
                oldcnt = cnt;
-               if (tcp_is_fack(tp) || tcp_is_reno(tp) ||
+               if (tcp_is_reno(tp) ||
                    (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
                        cnt += tcp_skb_pcount(skb);
  
                if (cnt > packets) {
-                       if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) ||
+                       if (tcp_is_sack(tp) ||
                            (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
                            (oldcnt >= packets))
                                break;
                        /* If needed, chop off the prefix to mark as lost. */
                        lost = (packets - oldcnt) * mss;
                        if (lost < skb->len &&
-                           tcp_fragment(sk, skb, lost, mss, GFP_ATOMIC) < 0)
+                           tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
+                                        lost, mss, GFP_ATOMIC) < 0)
                                break;
                        cnt = packets;
                }
@@@ -2264,11 -2208,6 +2208,6 @@@ static void tcp_update_scoreboard(struc
  
        if (tcp_is_reno(tp)) {
                tcp_mark_head_lost(sk, 1, 1);
-       } else if (tcp_is_fack(tp)) {
-               int lost = tp->fackets_out - tp->reordering;
-               if (lost <= 0)
-                       lost = 1;
-               tcp_mark_head_lost(sk, lost, 0);
        } else {
                int sacked_upto = tp->sacked_out - tp->reordering;
                if (sacked_upto >= 0)
@@@ -2327,16 -2266,16 +2266,16 @@@ static bool tcp_any_retrans_done(const 
        if (tp->retrans_out)
                return true;
  
-       skb = tcp_write_queue_head(sk);
+       skb = tcp_rtx_queue_head(sk);
        if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
                return true;
  
        return false;
  }
  
- #if FASTRETRANS_DEBUG > 1
  static void DBGUNDO(struct sock *sk, const char *msg)
  {
+ #if FASTRETRANS_DEBUG > 1
        struct tcp_sock *tp = tcp_sk(sk);
        struct inet_sock *inet = inet_sk(sk);
  
                         tp->packets_out);
        }
  #endif
- }
- #else
- #define DBGUNDO(x...) do { } while (0)
  #endif
+ }
  
  static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
  {
        if (unmark_loss) {
                struct sk_buff *skb;
  
-               tcp_for_write_queue(skb, sk) {
-                       if (skb == tcp_send_head(sk))
-                               break;
+               skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
                        TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
                }
                tp->lost_out = 0;
@@@ -2417,6 -2352,8 +2352,8 @@@ static bool tcp_try_undo_recovery(struc
                        mib_idx = LINUX_MIB_TCPFULLUNDO;
  
                NET_INC_STATS(sock_net(sk), mib_idx);
+       } else if (tp->rack.reo_wnd_persist) {
+               tp->rack.reo_wnd_persist--;
        }
        if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
                /* Hold old state until something *above* high_seq
@@@ -2436,6 -2373,8 +2373,8 @@@ static bool tcp_try_undo_dsack(struct s
        struct tcp_sock *tp = tcp_sk(sk);
  
        if (tp->undo_marker && !tp->undo_retrans) {
+               tp->rack.reo_wnd_persist = min(TCP_RACK_RECOVERY_THRESH,
+                                              tp->rack.reo_wnd_persist + 1);
                DBGUNDO(sk, "D-SACK");
                tcp_undo_cwnd_reduction(sk, false);
                NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
@@@ -2616,9 -2555,7 +2555,7 @@@ void tcp_simple_retransmit(struct sock 
        struct sk_buff *skb;
        unsigned int mss = tcp_current_mss(sk);
  
-       tcp_for_write_queue(skb, sk) {
-               if (skb == tcp_send_head(sk))
-                       break;
+       skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
                if (tcp_skb_seglen(skb) > mss &&
                    !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
                        if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
@@@ -2712,7 -2649,7 +2649,7 @@@ static void tcp_process_loss(struct soc
                         * is updated in tcp_ack()). Otherwise fall back to
                         * the conventional recovery.
                         */
-                       if (tcp_send_head(sk) &&
+                       if (!tcp_write_queue_empty(sk) &&
                            after(tcp_wnd_end(tp), tp->snd_nxt)) {
                                *rexmit = REXMIT_NEW;
                                return;
  }
  
  /* Undo during fast recovery after partial ACK. */
- static bool tcp_try_undo_partial(struct sock *sk, const int acked)
+ static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una)
  {
        struct tcp_sock *tp = tcp_sk(sk);
  
        if (tp->undo_marker && tcp_packet_delayed(tp)) {
                /* Plain luck! Hole if filled with delayed
-                * packet, rather than with a retransmit.
+                * packet, rather than with a retransmit. Check reordering.
                 */
-               tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
+               tcp_check_sack_reordering(sk, prior_snd_una, 1);
  
                /* We are getting evidence that the reordering degree is higher
                 * than we realized. If there are no retransmits out then we
@@@ -2774,7 -2711,7 +2711,7 @@@ static void tcp_rack_identify_loss(stru
        struct tcp_sock *tp = tcp_sk(sk);
  
        /* Use RACK to detect loss */
-       if (sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) {
+       if (sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) {
                u32 prior_retrans = tp->retrans_out;
  
                tcp_rack_mark_lost(sk);
        }
  }
  
+ static bool tcp_force_fast_retransmit(struct sock *sk)
+ {
+       struct tcp_sock *tp = tcp_sk(sk);
+       return after(tcp_highest_sack_seq(tp),
+                    tp->snd_una + tp->reordering * tp->mss_cache);
+ }
  /* Process an event, which can update packets-in-flight not trivially.
   * Main goal of this function is to calculate new estimate for left_out,
   * taking into account both packets sitting in receiver's buffer and
   * It does _not_ decide what to send, it is made in function
   * tcp_xmit_retransmit_queue().
   */
- static void tcp_fastretrans_alert(struct sock *sk, const int acked,
+ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
                                  bool is_dupack, int *ack_flag, int *rexmit)
  {
        struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
        int fast_rexmit = 0, flag = *ack_flag;
        bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
-                                   (tcp_fackets_out(tp) > tp->reordering));
+                                    tcp_force_fast_retransmit(sk));
  
-       if (WARN_ON(!tp->packets_out && tp->sacked_out))
+       if (!tp->packets_out && tp->sacked_out)
                tp->sacked_out = 0;
-       if (WARN_ON(!tp->sacked_out && tp->fackets_out))
-               tp->fackets_out = 0;
  
        /* Now state machine starts.
         * A. ECE, hence prohibit cwnd undoing, the reduction is required. */
                        if (tcp_is_reno(tp) && is_dupack)
                                tcp_add_reno_sack(sk);
                } else {
-                       if (tcp_try_undo_partial(sk, acked))
+                       if (tcp_try_undo_partial(sk, prior_snd_una))
                                return;
                        /* Partial ACK arrived. Force fast retransmit. */
                        do_lost = tcp_is_reno(tp) ||
-                                 tcp_fackets_out(tp) > tp->reordering;
+                                 tcp_force_fast_retransmit(sk);
                }
                if (tcp_try_undo_dsack(sk)) {
                        tcp_try_keep_open(sk);
                      (*ack_flag & FLAG_LOST_RETRANS)))
                        return;
                /* Change state if cwnd is undone or retransmits are lost */
+               /* fall through */
        default:
                if (tcp_is_reno(tp)) {
                        if (flag & FLAG_SND_UNA_ADVANCED)
  
  static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us)
  {
+       u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ;
        struct tcp_sock *tp = tcp_sk(sk);
-       u32 wlen = sysctl_tcp_min_rtt_wlen * HZ;
  
        minmax_running_min(&tp->rtt_min, wlen, tcp_jiffies32,
                           rtt_us ? : jiffies_to_usecs(1));
@@@ -3056,28 -3000,31 +3000,31 @@@ static void tcp_ack_tstamp(struct sock 
  
        shinfo = skb_shinfo(skb);
        if (!before(shinfo->tskey, prior_snd_una) &&
-           before(shinfo->tskey, tcp_sk(sk)->snd_una))
-               __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
+           before(shinfo->tskey, tcp_sk(sk)->snd_una)) {
+               tcp_skb_tsorted_save(skb) {
+                       __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
+               } tcp_skb_tsorted_restore(skb);
+       }
  }
  
  /* Remove acknowledged frames from the retransmission queue. If our packet
   * is before the ack sequence we can discard it as it's confirmed to have
   * arrived at the other end.
   */
- static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
-                              u32 prior_snd_una, int *acked,
+ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
+                              u32 prior_snd_una,
                               struct tcp_sacktag_state *sack)
  {
        const struct inet_connection_sock *icsk = inet_csk(sk);
        u64 first_ackt, last_ackt;
        struct tcp_sock *tp = tcp_sk(sk);
        u32 prior_sacked = tp->sacked_out;
-       u32 reord = tp->packets_out;
+       u32 reord = tp->snd_nxt; /* lowest acked un-retx un-sacked seq */
+       struct sk_buff *skb, *next;
        bool fully_acked = true;
        long sack_rtt_us = -1L;
        long seq_rtt_us = -1L;
        long ca_rtt_us = -1L;
-       struct sk_buff *skb;
        u32 pkts_acked = 0;
        u32 last_in_flight = 0;
        bool rtt_update;
  
        first_ackt = 0;
  
-       while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
+       for (skb = skb_rb_first(&sk->tcp_rtx_queue); skb; skb = next) {
                struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
+               const u32 start_seq = scb->seq;
                u8 sacked = scb->sacked;
                u32 acked_pcount;
  
                                break;
                        fully_acked = false;
                } else {
-                       /* Speedup tcp_unlink_write_queue() and next loop */
-                       prefetchw(skb->next);
                        acked_pcount = tcp_skb_pcount(skb);
                }
  
                                first_ackt = last_ackt;
  
                        last_in_flight = TCP_SKB_CB(skb)->tx.in_flight;
-                       reord = min(pkts_acked, reord);
+                       if (before(start_seq, reord))
+                               reord = start_seq;
                        if (!after(scb->end_seq, tp->high_seq))
                                flag |= FLAG_ORIG_SACK_ACKED;
                }
                if (!fully_acked)
                        break;
  
-               tcp_unlink_write_queue(skb, sk);
-               sk_wmem_free_skb(sk, skb);
+               next = skb_rb_next(skb);
                if (unlikely(skb == tp->retransmit_skb_hint))
                        tp->retransmit_skb_hint = NULL;
                if (unlikely(skb == tp->lost_skb_hint))
                        tp->lost_skb_hint = NULL;
+               tcp_rtx_queue_unlink_and_free(skb, sk);
        }
  
        if (!skb)
                        int delta;
  
                        /* Non-retransmitted hole got filled? That's reordering */
-                       if (reord < prior_fackets && reord <= tp->fackets_out)
-                               tcp_update_reordering(sk, tp->fackets_out - reord, 0);
+                       if (before(reord, prior_fack))
+                               tcp_check_sack_reordering(sk, reord, 0);
  
-                       delta = tcp_is_fack(tp) ? pkts_acked :
-                                                 prior_sacked - tp->sacked_out;
+                       delta = prior_sacked - tp->sacked_out;
                        tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta);
                }
-               tp->fackets_out -= min(pkts_acked, tp->fackets_out);
        } else if (skb && rtt_update && sack_rtt_us >= 0 &&
                   sack_rtt_us > tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp)) {
                /* Do not re-arm RTO if the sack RTT is measured from data sent
                }
        }
  #endif
-       *acked = pkts_acked;
        return flag;
  }
  
  static void tcp_ack_probe(struct sock *sk)
  {
-       const struct tcp_sock *tp = tcp_sk(sk);
        struct inet_connection_sock *icsk = inet_csk(sk);
+       struct sk_buff *head = tcp_send_head(sk);
+       const struct tcp_sock *tp = tcp_sk(sk);
  
        /* Was it a usable window open? */
-       if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq, tcp_wnd_end(tp))) {
+       if (!head)
+               return;
+       if (!after(TCP_SKB_CB(head)->end_seq, tcp_wnd_end(tp))) {
                icsk->icsk_backoff = 0;
                inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
                /* Socket must be waked up by subsequent tcp_data_snd_check().
@@@ -3378,7 -3322,7 +3322,7 @@@ static int tcp_ack_update_window(struc
                        tp->pred_flags = 0;
                        tcp_fast_path_check(sk);
  
-                       if (tcp_send_head(sk))
+                       if (!tcp_write_queue_empty(sk))
                                tcp_slow_start_after_idle_check(sk);
  
                        if (nwin > tp->max_window) {
@@@ -3399,7 -3343,7 +3343,7 @@@ static bool __tcp_oow_rate_limited(stru
        if (*last_oow_ack_time) {
                s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time);
  
-               if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
+               if (0 <= elapsed && elapsed < net->ipv4.sysctl_tcp_invalid_ratelimit) {
                        NET_INC_STATS(net, mib_idx);
                        return true;    /* rate-limited: don't send yet! */
                }
@@@ -3435,10 -3379,11 +3379,11 @@@ static void tcp_send_challenge_ack(stru
        static u32 challenge_timestamp;
        static unsigned int challenge_count;
        struct tcp_sock *tp = tcp_sk(sk);
+       struct net *net = sock_net(sk);
        u32 count, now;
  
        /* First check our per-socket dupack rate limit. */
-       if (__tcp_oow_rate_limited(sock_net(sk),
+       if (__tcp_oow_rate_limited(net,
                                   LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
                                   &tp->last_oow_ack_time))
                return;
        /* Then check host-wide RFC 5961 rate limit. */
        now = jiffies / HZ;
        if (now != challenge_timestamp) {
-               u32 half = (sysctl_tcp_challenge_ack_limit + 1) >> 1;
+               u32 ack_limit = net->ipv4.sysctl_tcp_challenge_ack_limit;
+               u32 half = (ack_limit + 1) >> 1;
  
                challenge_timestamp = now;
-               WRITE_ONCE(challenge_count, half +
-                          prandom_u32_max(sysctl_tcp_challenge_ack_limit));
+               WRITE_ONCE(challenge_count, half + prandom_u32_max(ack_limit));
        }
        count = READ_ONCE(challenge_count);
        if (count > 0) {
                WRITE_ONCE(challenge_count, count - 1);
-               NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK);
+               NET_INC_STATS(net, LINUX_MIB_TCPCHALLENGEACK);
                tcp_send_ack(sk);
        }
  }
@@@ -3553,18 -3498,17 +3498,17 @@@ static int tcp_ack(struct sock *sk, con
        u32 ack_seq = TCP_SKB_CB(skb)->seq;
        u32 ack = TCP_SKB_CB(skb)->ack_seq;
        bool is_dupack = false;
-       u32 prior_fackets;
        int prior_packets = tp->packets_out;
        u32 delivered = tp->delivered;
        u32 lost = tp->lost;
-       int acked = 0; /* Number of packets newly acked */
        int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */
+       u32 prior_fack;
  
        sack_state.first_sackt = 0;
        sack_state.rate = &rs;
  
-       /* We very likely will need to access write queue head. */
-       prefetchw(sk->sk_write_queue.next);
+       /* We very likely will need to access rtx queue. */
+       prefetch(sk->tcp_rtx_queue.rb_node);
  
        /* If the ack is older than previous acks
         * then we can probably ignore it.
                icsk->icsk_retransmits = 0;
        }
  
-       prior_fackets = tp->fackets_out;
+       prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una;
        rs.prior_in_flight = tcp_packets_in_flight(tp);
  
        /* ts_recent update must be made after we are sure that the packet
                goto no_queue;
  
        /* See if we can take anything off of the retransmit queue. */
-       flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked,
-                                   &sack_state);
+       flag |= tcp_clean_rtx_queue(sk, prior_fack, prior_snd_una, &sack_state);
+       tcp_rack_update_reo_wnd(sk, &rs);
  
        if (tp->tlp_high_seq)
                tcp_process_tlp_ack(sk, ack, flag);
  
        if (tcp_ack_is_dubious(sk, flag)) {
                is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
-               tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
+               tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
+                                     &rexmit);
        }
  
        if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
  no_queue:
        /* If data was DSACKed, see if we can undo a cwnd reduction. */
        if (flag & FLAG_DSACKING_ACK)
-               tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
+               tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
+                                     &rexmit);
        /* If this ack opens up a zero window, clear backoff.  It was
         * being used to time the probes, and is probably far higher than
         * it needs to be for normal retransmission.
         */
-       if (tcp_send_head(sk))
-               tcp_ack_probe(sk);
+       tcp_ack_probe(sk);
  
        if (tp->tlp_high_seq)
                tcp_process_tlp_ack(sk, ack, flag);
@@@ -3696,7 -3642,8 +3642,8 @@@ old_ack
        if (TCP_SKB_CB(skb)->sacked) {
                flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
                                                &sack_state);
-               tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
+               tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
+                                     &rexmit);
                tcp_xmit_recovery(sk, rexmit);
        }
  
@@@ -3721,6 -3668,21 +3668,21 @@@ static void tcp_parse_fastopen_option(i
        foc->exp = exp_opt;
  }
  
+ static void smc_parse_options(const struct tcphdr *th,
+                             struct tcp_options_received *opt_rx,
+                             const unsigned char *ptr,
+                             int opsize)
+ {
+ #if IS_ENABLED(CONFIG_SMC)
+       if (static_branch_unlikely(&tcp_have_smc)) {
+               if (th->syn && !(opsize & 1) &&
+                   opsize >= TCPOLEN_EXP_SMC_BASE &&
+                   get_unaligned_be32(ptr) == TCPOPT_SMC_MAGIC)
+                       opt_rx->smc_ok = 1;
+       }
+ #endif
+ }
  /* Look for tcp options. Normally only called on SYN and SYNACK packets.
   * But, this can also be called on packets in the established flow when
   * the fast version below fails.
@@@ -3828,6 -3790,9 +3790,9 @@@ void tcp_parse_options(const struct ne
                                        tcp_parse_fastopen_option(opsize -
                                                TCPOLEN_EXP_FASTOPEN_BASE,
                                                ptr + 2, th->syn, foc, true);
+                               else
+                                       smc_parse_options(th, opt_rx, ptr,
+                                                         opsize);
                                break;
  
                        }
@@@ -3995,6 -3960,8 +3960,8 @@@ static inline bool tcp_sequence(const s
  /* When we get a reset we do this. */
  void tcp_reset(struct sock *sk)
  {
+       trace_tcp_receive_reset(sk);
        /* We want the right error as BSD sees it (and indeed as we do). */
        switch (sk->sk_state) {
        case TCP_SYN_SENT:
@@@ -4117,7 -4084,7 +4084,7 @@@ static void tcp_dsack_set(struct sock *
  {
        struct tcp_sock *tp = tcp_sk(sk);
  
-       if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
+       if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
                int mib_idx;
  
                if (before(seq, tp->rcv_nxt))
@@@ -4152,7 -4119,7 +4119,7 @@@ static void tcp_send_dupack(struct soc
                NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
                tcp_enter_quickack_mode(sk);
  
-               if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
+               if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
                        u32 end_seq = TCP_SKB_CB(skb)->end_seq;
  
                        if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
@@@ -4268,11 -4235,6 +4235,6 @@@ static void tcp_sack_remove(struct tcp_
        tp->rx_opt.num_sacks = num_sacks;
  }
  
- enum tcp_queue {
-       OOO_QUEUE,
-       RCV_QUEUE,
- };
  /**
   * tcp_try_coalesce - try to merge skb to prior one
   * @sk: socket
   * Returns true if caller should free @from instead of queueing it
   */
  static bool tcp_try_coalesce(struct sock *sk,
-                            enum tcp_queue dest,
                             struct sk_buff *to,
                             struct sk_buff *from,
                             bool *fragstolen)
  
        if (TCP_SKB_CB(from)->has_rxtstamp) {
                TCP_SKB_CB(to)->has_rxtstamp = true;
-               if (dest == OOO_QUEUE)
-                       TCP_SKB_CB(to)->swtstamp = TCP_SKB_CB(from)->swtstamp;
-               else
-                       to->tstamp = from->tstamp;
+               to->tstamp = from->tstamp;
        }
  
        return true;
@@@ -4341,7 -4299,7 +4299,7 @@@ static void tcp_ofo_queue(struct sock *
  
        p = rb_first(&tp->out_of_order_queue);
        while (p) {
-               skb = rb_entry(p, struct sk_buff, rbnode);
+               skb = rb_to_skb(p);
                if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
                        break;
  
                }
                p = rb_next(p);
                rb_erase(&skb->rbnode, &tp->out_of_order_queue);
-               /* Replace tstamp which was stomped by rbnode */
-               if (TCP_SKB_CB(skb)->has_rxtstamp)
-                       skb->tstamp = TCP_SKB_CB(skb)->swtstamp;
  
                if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {
                        SOCK_DEBUG(sk, "ofo packet was already received\n");
                           TCP_SKB_CB(skb)->end_seq);
  
                tail = skb_peek_tail(&sk->sk_receive_queue);
-               eaten = tail && tcp_try_coalesce(sk, RCV_QUEUE,
-                                                tail, skb, &fragstolen);
+               eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
                tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
                fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
                if (!eaten)
@@@ -4409,7 -4363,7 +4363,7 @@@ static int tcp_try_rmem_schedule(struc
  static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
  {
        struct tcp_sock *tp = tcp_sk(sk);
-       struct rb_node **p, *q, *parent;
+       struct rb_node **p, *parent;
        struct sk_buff *skb1;
        u32 seq, end_seq;
        bool fragstolen;
                return;
        }
  
-       /* Stash tstamp to avoid being stomped on by rbnode */
-       if (TCP_SKB_CB(skb)->has_rxtstamp)
-               TCP_SKB_CB(skb)->swtstamp = skb->tstamp;
        /* Disable header prediction. */
        tp->pred_flags = 0;
        inet_csk_schedule_ack(sk);
        /* In the typical case, we are adding an skb to the end of the list.
         * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup.
         */
-       if (tcp_try_coalesce(sk, OOO_QUEUE, tp->ooo_last_skb,
+       if (tcp_try_coalesce(sk, tp->ooo_last_skb,
                             skb, &fragstolen)) {
  coalesce_done:
                tcp_grow_window(sk, skb);
        parent = NULL;
        while (*p) {
                parent = *p;
-               skb1 = rb_entry(parent, struct sk_buff, rbnode);
+               skb1 = rb_to_skb(parent);
                if (before(seq, TCP_SKB_CB(skb1)->seq)) {
                        p = &parent->rb_left;
                        continue;
                                __kfree_skb(skb1);
                                goto merge_right;
                        }
-               } else if (tcp_try_coalesce(sk, OOO_QUEUE, skb1,
+               } else if (tcp_try_coalesce(sk, skb1,
                                            skb, &fragstolen)) {
                        goto coalesce_done;
                }
@@@ -4517,9 -4467,7 +4467,7 @@@ insert
  
  merge_right:
        /* Remove other segments covered by skb. */
-       while ((q = rb_next(&skb->rbnode)) != NULL) {
-               skb1 = rb_entry(q, struct sk_buff, rbnode);
+       while ((skb1 = skb_rb_next(skb)) != NULL) {
                if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
                        break;
                if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
                tcp_drop(sk, skb1);
        }
        /* If there is no skb after us, we are the last_skb ! */
-       if (!q)
+       if (!skb1)
                tp->ooo_last_skb = skb;
  
  add_sack:
@@@ -4556,7 -4504,7 +4504,7 @@@ static int __must_check tcp_queue_rcv(s
  
        __skb_pull(skb, hdrlen);
        eaten = (tail &&
-                tcp_try_coalesce(sk, RCV_QUEUE, tail,
+                tcp_try_coalesce(sk, tail,
                                  skb, fragstolen)) ? 1 : 0;
        tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
        if (!eaten) {
@@@ -4720,7 -4668,7 +4668,7 @@@ static struct sk_buff *tcp_skb_next(str
        if (list)
                return !skb_queue_is_last(list, skb) ? skb->next : NULL;
  
-       return rb_entry_safe(rb_next(&skb->rbnode), struct sk_buff, rbnode);
+       return skb_rb_next(skb);
  }
  
  static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
  }
  
  /* Insert skb into rb tree, ordered by TCP_SKB_CB(skb)->seq */
static void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb)
+ void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb)
  {
        struct rb_node **p = &root->rb_node;
        struct rb_node *parent = NULL;
  
        while (*p) {
                parent = *p;
-               skb1 = rb_entry(parent, struct sk_buff, rbnode);
+               skb1 = rb_to_skb(parent);
                if (before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb1)->seq))
                        p = &parent->rb_left;
                else
@@@ -4796,7 -4744,7 +4744,7 @@@ restart
                 *   overlaps to the next one.
                 */
                if (!(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)) &&
-                   (tcp_win_from_space(skb->truesize) > skb->len ||
+                   (tcp_win_from_space(sk, skb->truesize) > skb->len ||
                     before(TCP_SKB_CB(skb)->seq, start))) {
                        end_of_skbs = false;
                        break;
@@@ -4868,26 -4816,19 +4816,19 @@@ static void tcp_collapse_ofo_queue(stru
  {
        struct tcp_sock *tp = tcp_sk(sk);
        struct sk_buff *skb, *head;
-       struct rb_node *p;
        u32 start, end;
  
-       p = rb_first(&tp->out_of_order_queue);
-       skb = rb_entry_safe(p, struct sk_buff, rbnode);
+       skb = skb_rb_first(&tp->out_of_order_queue);
  new_range:
        if (!skb) {
-               p = rb_last(&tp->out_of_order_queue);
-               /* Note: This is possible p is NULL here. We do not
-                * use rb_entry_safe(), as ooo_last_skb is valid only
-                * if rbtree is not empty.
-                */
-               tp->ooo_last_skb = rb_entry(p, struct sk_buff, rbnode);
+               tp->ooo_last_skb = skb_rb_last(&tp->out_of_order_queue);
                return;
        }
        start = TCP_SKB_CB(skb)->seq;
        end = TCP_SKB_CB(skb)->end_seq;
  
        for (head = skb;;) {
-               skb = tcp_skb_next(skb, NULL);
+               skb = skb_rb_next(skb);
  
                /* Range is terminated when we see a gap or when
                 * we are at the queue end.
@@@ -4930,14 -4871,14 +4871,14 @@@ static bool tcp_prune_ofo_queue(struct 
        do {
                prev = rb_prev(node);
                rb_erase(node, &tp->out_of_order_queue);
-               tcp_drop(sk, rb_entry(node, struct sk_buff, rbnode));
+               tcp_drop(sk, rb_to_skb(node));
                sk_mem_reclaim(sk);
                if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
                    !tcp_under_memory_pressure(sk))
                        break;
                node = prev;
        } while (node);
-       tp->ooo_last_skb = rb_entry(prev, struct sk_buff, rbnode);
+       tp->ooo_last_skb = rb_to_skb(prev);
  
        /* Reset SACK state.  A conforming SACK implementation will
         * do the same at a timeout based retransmit.  When a connection
@@@ -5112,7 -5053,7 +5053,7 @@@ static void tcp_check_urg(struct sock *
        struct tcp_sock *tp = tcp_sk(sk);
        u32 ptr = ntohs(th->urg_ptr);
  
-       if (ptr && !sysctl_tcp_stdurg)
+       if (ptr && !sock_net(sk)->ipv4.sysctl_tcp_stdurg)
                ptr--;
        ptr += ntohl(th->seq);
  
@@@ -5532,20 -5473,13 +5473,13 @@@ void tcp_finish_connect(struct sock *sk
                security_inet_conn_established(sk, skb);
        }
  
-       /* Make sure socket is routed, for correct metrics.  */
-       icsk->icsk_af_ops->rebuild_header(sk);
-       tcp_init_metrics(sk);
-       tcp_call_bpf(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB);
-       tcp_init_congestion_control(sk);
+       tcp_init_transfer(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB);
  
        /* Prevent spurious tcp_cwnd_restart() on first data
         * packet.
         */
        tp->lsndtime = tcp_jiffies32;
  
-       tcp_init_buffer_space(sk);
        if (sock_flag(sk, SOCK_KEEPOPEN))
                inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
  
@@@ -5559,7 -5493,7 +5493,7 @@@ static bool tcp_rcv_fastopen_synack(str
                                    struct tcp_fastopen_cookie *cookie)
  {
        struct tcp_sock *tp = tcp_sk(sk);
-       struct sk_buff *data = tp->syn_data ? tcp_write_queue_head(sk) : NULL;
+       struct sk_buff *data = tp->syn_data ? tcp_rtx_queue_head(sk) : NULL;
        u16 mss = tp->rx_opt.mss_clamp, try_exp = 0;
        bool syn_drop = false;
  
        tcp_fastopen_cache_set(sk, mss, cookie, syn_drop, try_exp);
  
        if (data) { /* Retransmit unacked data in SYN */
-               tcp_for_write_queue_from(data, sk) {
-                       if (data == tcp_send_head(sk) ||
-                           __tcp_retransmit_skb(sk, data, 1))
+               skb_rbtree_walk_from(data) {
+                       if (__tcp_retransmit_skb(sk, data, 1))
                                break;
                }
                tcp_rearm_rto(sk);
        return false;
  }
  
+ static void smc_check_reset_syn(struct tcp_sock *tp)
+ {
+ #if IS_ENABLED(CONFIG_SMC)
+       if (static_branch_unlikely(&tcp_have_smc)) {
+               if (tp->syn_smc && !tp->rx_opt.smc_ok)
+                       tp->syn_smc = 0;
+       }
+ #endif
+ }
  static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
                                         const struct tcphdr *th)
  {
                        tp->tcp_header_len = sizeof(struct tcphdr);
                }
  
-               if (tcp_is_sack(tp) && sysctl_tcp_fack)
-                       tcp_enable_fack(tp);
-               tcp_mtup_init(sk);
                tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
                tcp_initialize_rcv_mss(sk);
  
                 * is initialized. */
                tp->copied_seq = tp->rcv_nxt;
  
+               smc_check_reset_syn(tp);
                smp_mb();
  
                tcp_finish_connect(sk, skb);
@@@ -5938,15 -5879,18 +5879,18 @@@ int tcp_rcv_state_process(struct sock *
                if (req) {
                        inet_csk(sk)->icsk_retransmits = 0;
                        reqsk_fastopen_remove(sk, req, false);
+                       /* Re-arm the timer because data may have been sent out.
+                        * This is similar to the regular data transmission case
+                        * when new data has just been ack'ed.
+                        *
+                        * (TFO) - we could try to be more aggressive and
+                        * retransmitting any data sooner based on when they
+                        * are sent out.
+                        */
+                       tcp_rearm_rto(sk);
                } else {
-                       /* Make sure socket is routed, for correct metrics. */
-                       icsk->icsk_af_ops->rebuild_header(sk);
-                       tcp_call_bpf(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
-                       tcp_init_congestion_control(sk);
-                       tcp_mtup_init(sk);
+                       tcp_init_transfer(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
                        tp->copied_seq = tp->rcv_nxt;
-                       tcp_init_buffer_space(sk);
                }
                smp_mb();
                tcp_set_state(sk, TCP_ESTABLISHED);
                if (tp->rx_opt.tstamp_ok)
                        tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
  
-               if (req) {
-                       /* Re-arm the timer because data may have been sent out.
-                        * This is similar to the regular data transmission case
-                        * when new data has just been ack'ed.
-                        *
-                        * (TFO) - we could try to be more aggressive and
-                        * retransmitting any data sooner based on when they
-                        * are sent out.
-                        */
-                       tcp_rearm_rto(sk);
-               } else
-                       tcp_init_metrics(sk);
                if (!inet_csk(sk)->icsk_ca_ops->cong_control)
                        tcp_update_pacing_rate(sk);
  
        case TCP_LAST_ACK:
                if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
                        break;
+               /* fall through */
        case TCP_FIN_WAIT1:
        case TCP_FIN_WAIT2:
                /* RFC 793 says to queue data in these states,
@@@ -6183,6 -6115,9 +6115,9 @@@ static void tcp_openreq_init(struct req
        ireq->ir_rmt_port = tcp_hdr(skb)->source;
        ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
        ireq->ir_mark = inet_request_mark(sk, skb);
+ #if IS_ENABLED(CONFIG_SMC)
+       ireq->smc_ok = rx_opt->smc_ok;
+ #endif
  }
  
  struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
@@@ -6358,7 -6293,7 +6293,7 @@@ int tcp_conn_request(struct request_soc
        tcp_openreq_init_rwin(req, sk, dst);
        if (!want_cookie) {
                tcp_reqsk_record_syn(sk, req, skb);
-               fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc);
+               fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
        }
        if (fastopen_sk) {
                af_ops->send_synack(fastopen_sk, dst, &fl, req,
diff --combined net/ipv4/tcp_output.c
index 5a42e873d44a8f880d8999e911d64d6f388865fb,76dbe884f2469660028684a46fc19afa000a1353..540b7d92cc70b3ea4f91ecb307840166f7f4dbce
  #include <linux/compiler.h>
  #include <linux/gfp.h>
  #include <linux/module.h>
+ #include <linux/static_key.h>
  
- /* People can turn this off for buggy TCP's found in printers etc. */
- int sysctl_tcp_retrans_collapse __read_mostly = 1;
- /* People can turn this on to work with those rare, broken TCPs that
-  * interpret the window field as a signed quantity.
-  */
- int sysctl_tcp_workaround_signed_windows __read_mostly = 0;
- /* Default TSQ limit of four TSO segments */
- int sysctl_tcp_limit_output_bytes __read_mostly = 262144;
- /* This limits the percentage of the congestion window which we
-  * will allow a single TSO frame to consume.  Building TSO frames
-  * which are too large can cause TCP streams to be bursty.
-  */
- int sysctl_tcp_tso_win_divisor __read_mostly = 3;
- /* By default, RFC2861 behavior.  */
- int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
+ #include <trace/events/tcp.h>
  
  static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
                           int push_one, gfp_t gfp);
  
  /* Account for new data that has been sent to the network. */
- static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
+ static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
  {
        struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
        unsigned int prior_packets = tp->packets_out;
  
-       tcp_advance_send_head(sk, skb);
        tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
  
+       __skb_unlink(skb, &sk->sk_write_queue);
+       tcp_rbtree_insert(&sk->tcp_rtx_queue, skb);
        tp->packets_out += tcp_skb_pcount(skb);
        if (!prior_packets || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
                tcp_rearm_rto(sk);
@@@ -203,7 -188,7 +188,7 @@@ u32 tcp_default_init_rwnd(u32 mss
   * be a multiple of mss if possible. We assume here that mss >= 1.
   * This MUST be enforced by all callers.
   */
- void tcp_select_initial_window(int __space, __u32 mss,
+ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
                               __u32 *rcv_wnd, __u32 *window_clamp,
                               int wscale_ok, __u8 *rcv_wscale,
                               __u32 init_rcv_wnd)
         * which we interpret as a sign the remote TCP is not
         * misinterpreting the window field as a signed quantity.
         */
-       if (sysctl_tcp_workaround_signed_windows)
+       if (sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)
                (*rcv_wnd) = min(space, MAX_TCP_WINDOW);
        else
                (*rcv_wnd) = space;
        (*rcv_wscale) = 0;
        if (wscale_ok) {
                /* Set window scaling on max possible window */
-               space = max_t(u32, space, sysctl_tcp_rmem[2]);
+               space = max_t(u32, space, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
                space = max_t(u32, space, sysctl_rmem_max);
                space = min_t(u32, space, *window_clamp);
                while (space > U16_MAX && (*rcv_wscale) < TCP_MAX_WSCALE) {
@@@ -287,7 -272,8 +272,8 @@@ static u16 tcp_select_window(struct soc
        /* Make sure we do not exceed the maximum possible
         * scaled window.
         */
-       if (!tp->rx_opt.rcv_wscale && sysctl_tcp_workaround_signed_windows)
+       if (!tp->rx_opt.rcv_wscale &&
+           sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)
                new_win = min(new_win, MAX_TCP_WINDOW);
        else
                new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
@@@ -395,7 -381,6 +381,6 @@@ static void tcp_ecn_send(struct sock *s
  static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
  {
        skb->ip_summed = CHECKSUM_PARTIAL;
-       skb->csum = 0;
  
        TCP_SKB_CB(skb)->tcp_flags = flags;
        TCP_SKB_CB(skb)->sacked = 0;
@@@ -418,6 -403,22 +403,22 @@@ static inline bool tcp_urg_mode(const s
  #define OPTION_MD5            (1 << 2)
  #define OPTION_WSCALE         (1 << 3)
  #define OPTION_FAST_OPEN_COOKIE       (1 << 8)
+ #define OPTION_SMC            (1 << 9)
+ static void smc_options_write(__be32 *ptr, u16 *options)
+ {
+ #if IS_ENABLED(CONFIG_SMC)
+       if (static_branch_unlikely(&tcp_have_smc)) {
+               if (unlikely(OPTION_SMC & *options)) {
+                       *ptr++ = htonl((TCPOPT_NOP  << 24) |
+                                      (TCPOPT_NOP  << 16) |
+                                      (TCPOPT_EXP <<  8) |
+                                      (TCPOLEN_EXP_SMC_BASE));
+                       *ptr++ = htonl(TCPOPT_SMC_MAGIC);
+               }
+       }
+ #endif
+ }
  
  struct tcp_out_options {
        u16 options;            /* bit field of OPTION_* */
@@@ -536,6 -537,41 +537,41 @@@ static void tcp_options_write(__be32 *p
                }
                ptr += (len + 3) >> 2;
        }
+       smc_options_write(ptr, &options);
+ }
+ static void smc_set_option(const struct tcp_sock *tp,
+                          struct tcp_out_options *opts,
+                          unsigned int *remaining)
+ {
+ #if IS_ENABLED(CONFIG_SMC)
+       if (static_branch_unlikely(&tcp_have_smc)) {
+               if (tp->syn_smc) {
+                       if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
+                               opts->options |= OPTION_SMC;
+                               *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
+                       }
+               }
+       }
+ #endif
+ }
+ static void smc_set_option_cond(const struct tcp_sock *tp,
+                               const struct inet_request_sock *ireq,
+                               struct tcp_out_options *opts,
+                               unsigned int *remaining)
+ {
+ #if IS_ENABLED(CONFIG_SMC)
+       if (static_branch_unlikely(&tcp_have_smc)) {
+               if (tp->syn_smc && ireq->smc_ok) {
+                       if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
+                               opts->options |= OPTION_SMC;
+                               *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
+                       }
+               }
+       }
+ #endif
  }
  
  /* Compute TCP options for SYN packets. This is not the final
@@@ -603,11 -639,14 +639,14 @@@ static unsigned int tcp_syn_options(str
                }
        }
  
+       smc_set_option(tp, opts, &remaining);
        return MAX_TCP_OPTION_SPACE - remaining;
  }
  
  /* Set up TCP options for SYN-ACKs. */
- static unsigned int tcp_synack_options(struct request_sock *req,
+ static unsigned int tcp_synack_options(const struct sock *sk,
+                                      struct request_sock *req,
                                       unsigned int mss, struct sk_buff *skb,
                                       struct tcp_out_options *opts,
                                       const struct tcp_md5sig_key *md5,
                }
        }
  
+       smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining);
        return MAX_TCP_OPTION_SPACE - remaining;
  }
  
@@@ -973,6 -1014,12 +1014,12 @@@ static void tcp_internal_pacing(struct 
                      HRTIMER_MODE_ABS_PINNED);
  }
  
+ static void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb)
+ {
+       skb->skb_mstamp = tp->tcp_mstamp;
+       list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
+ }
  /* This routine actually transmits TCP packets queued in by
   * tcp_do_sendmsg().  This is used by both the initial
   * transmission and possible later retransmissions.
@@@ -1005,10 -1052,14 +1052,14 @@@ static int tcp_transmit_skb(struct soc
                TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
                        - tp->snd_una;
                oskb = skb;
-               if (unlikely(skb_cloned(skb)))
-                       skb = pskb_copy(skb, gfp_mask);
-               else
-                       skb = skb_clone(skb, gfp_mask);
+               tcp_skb_tsorted_save(oskb) {
+                       if (unlikely(skb_cloned(oskb)))
+                               skb = pskb_copy(oskb, gfp_mask);
+                       else
+                               skb = skb_clone(oskb, gfp_mask);
+               } tcp_skb_tsorted_restore(oskb);
                if (unlikely(!skb))
                        return -ENOBUFS;
        }
                err = net_xmit_eval(err);
        }
        if (!err && oskb) {
-               oskb->skb_mstamp = tp->tcp_mstamp;
+               tcp_update_skb_after_send(tp, oskb);
                tcp_rate_skb_sent(sk, oskb);
        }
        return err;
@@@ -1167,21 -1218,6 +1218,6 @@@ static void tcp_set_skb_tso_segs(struc
        }
  }
  
- /* When a modification to fackets out becomes necessary, we need to check
-  * skb is counted to fackets_out or not.
-  */
- static void tcp_adjust_fackets_out(struct sock *sk, const struct sk_buff *skb,
-                                  int decr)
- {
-       struct tcp_sock *tp = tcp_sk(sk);
-       if (!tp->sacked_out || tcp_is_reno(tp))
-               return;
-       if (after(tcp_highest_sack_seq(tp), TCP_SKB_CB(skb)->seq))
-               tp->fackets_out -= decr;
- }
  /* Pcount in the middle of the write queue got changed, we need to do various
   * tweaks to fix counters
   */
@@@ -1202,11 -1238,9 +1238,9 @@@ static void tcp_adjust_pcount(struct so
        if (tcp_is_reno(tp) && decr > 0)
                tp->sacked_out -= min_t(u32, tp->sacked_out, decr);
  
-       tcp_adjust_fackets_out(sk, skb, decr);
        if (tp->lost_skb_hint &&
            before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) &&
-           (tcp_is_fack(tp) || (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)))
+           (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
                tp->lost_cnt_hint -= decr;
  
        tcp_verify_left_out(tp);
@@@ -1241,12 -1275,25 +1275,25 @@@ static void tcp_skb_fragment_eor(struc
        TCP_SKB_CB(skb)->eor = 0;
  }
  
+ /* Insert buff after skb on the write or rtx queue of sk.  */
+ static void tcp_insert_write_queue_after(struct sk_buff *skb,
+                                        struct sk_buff *buff,
+                                        struct sock *sk,
+                                        enum tcp_queue tcp_queue)
+ {
+       if (tcp_queue == TCP_FRAG_IN_WRITE_QUEUE)
+               __skb_queue_after(&sk->sk_write_queue, skb, buff);
+       else
+               tcp_rbtree_insert(&sk->tcp_rtx_queue, buff);
+ }
  /* Function to create two new TCP segments.  Shrinks the given segment
   * to the specified size and appends a new segment with the rest of the
   * packet to the list.  This won't be called frequently, I hope.
   * Remember, these are still headerless SKBs at this point.
   */
- int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
+ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
+                struct sk_buff *skb, u32 len,
                 unsigned int mss_now, gfp_t gfp)
  {
        struct tcp_sock *tp = tcp_sk(sk);
  
        /* Link BUFF into the send queue. */
        __skb_header_release(buff);
-       tcp_insert_write_queue_after(skb, buff, sk);
+       tcp_insert_write_queue_after(skb, buff, sk, tcp_queue);
+       if (tcp_queue == TCP_FRAG_IN_RTX_QUEUE)
+               list_add(&buff->tcp_tsorted_anchor, &skb->tcp_tsorted_anchor);
  
        return 0;
  }
@@@ -1607,7 -1656,7 +1656,7 @@@ static void tcp_cwnd_validate(struct so
                if (tp->packets_out > tp->snd_cwnd_used)
                        tp->snd_cwnd_used = tp->packets_out;
  
-               if (sysctl_tcp_slow_start_after_idle &&
+               if (sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle &&
                    (s32)(tcp_jiffies32 - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto &&
                    !ca_ops->cong_control)
                        tcp_cwnd_application_limited(sk);
                 * is caused by insufficient sender buffer:
                 * 1) just sent some data (see tcp_write_xmit)
                 * 2) not cwnd limited (this else condition)
-                * 3) no more data to send (null tcp_send_head )
+                * 3) no more data to send (tcp_write_queue_empty())
                 * 4) application is hitting buffer limit (SOCK_NOSPACE)
                 */
-               if (!tcp_send_head(sk) && sk->sk_socket &&
+               if (tcp_write_queue_empty(sk) && sk->sk_socket &&
                    test_bit(SOCK_NOSPACE, &sk->sk_socket->flags) &&
                    (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
                        tcp_chrono_start(sk, TCP_CHRONO_SNDBUF_LIMITED);
@@@ -1671,7 -1720,7 +1720,7 @@@ u32 tcp_tso_autosize(const struct sock 
  {
        u32 bytes, segs;
  
-       bytes = min(sk->sk_pacing_rate >> 10,
+       bytes = min(sk->sk_pacing_rate >> sk->sk_pacing_shift,
                    sk->sk_gso_max_size - 1 - MAX_TCP_HEADER);
  
        /* Goal is to send at least one packet per ms,
@@@ -1694,7 -1743,8 +1743,8 @@@ static u32 tcp_tso_segs(struct sock *sk
        u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0;
  
        return tso_segs ? :
-               tcp_tso_autosize(sk, mss_now, sysctl_tcp_min_tso_segs);
+               tcp_tso_autosize(sk, mss_now,
+                                sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
  }
  
  /* Returns the portion of skb which can be sent right away */
@@@ -1815,7 -1865,8 +1865,8 @@@ static bool tcp_snd_wnd_test(const stru
   * know that all the data is in scatter-gather pages, and that the
   * packet has never been sent out before (and thus is not cloned).
   */
- static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
+ static int tso_fragment(struct sock *sk, enum tcp_queue tcp_queue,
+                       struct sk_buff *skb, unsigned int len,
                        unsigned int mss_now, gfp_t gfp)
  {
        struct sk_buff *buff;
  
        /* All of a TSO frame must be composed of paged data.  */
        if (skb->len != skb->data_len)
-               return tcp_fragment(sk, skb, len, mss_now, gfp);
+               return tcp_fragment(sk, tcp_queue, skb, len, mss_now, gfp);
  
        buff = sk_stream_alloc_skb(sk, 0, gfp, true);
        if (unlikely(!buff))
  
        /* Link BUFF into the send queue. */
        __skb_header_release(buff);
-       tcp_insert_write_queue_after(skb, buff, sk);
+       tcp_insert_write_queue_after(skb, buff, sk, tcp_queue);
  
        return 0;
  }
@@@ -1910,7 -1961,7 +1961,7 @@@ static bool tcp_tso_should_defer(struc
        if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len))
                goto send_now;
  
-       win_divisor = READ_ONCE(sysctl_tcp_tso_win_divisor);
 -      win_divisor = ACCESS_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tso_win_divisor);
++      win_divisor = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tso_win_divisor);
        if (win_divisor) {
                u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
  
                        goto send_now;
        }
  
-       head = tcp_write_queue_head(sk);
+       /* TODO : use tsorted_sent_queue ? */
+       head = tcp_rtx_queue_head(sk);
+       if (!head)
+               goto send_now;
        age = tcp_stamp_us_delta(tp->tcp_mstamp, head->skb_mstamp);
        /* If next ACK is likely to come too late (half srtt), do not defer */
        if (age < (tp->srtt_us >> 4))
@@@ -2145,18 -2198,18 +2198,18 @@@ static bool tcp_small_queue_check(struc
  {
        unsigned int limit;
  
-       limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10);
-       limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes);
+       limit = max(2 * skb->truesize, sk->sk_pacing_rate >> sk->sk_pacing_shift);
+       limit = min_t(u32, limit,
+                     sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
        limit <<= factor;
  
        if (refcount_read(&sk->sk_wmem_alloc) > limit) {
-               /* Always send the 1st or 2nd skb in write queue.
+               /* Always send skb if rtx queue is empty.
                 * No need to wait for TX completion to call us back,
                 * after softirq/tasklet schedule.
                 * This helps when TX completions are delayed too much.
                 */
-               if (skb == sk->sk_write_queue.next ||
-                   skb->prev == sk->sk_write_queue.next)
+               if (tcp_rtx_queue_empty(sk))
                        return false;
  
                set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags);
@@@ -2207,7 -2260,7 +2260,7 @@@ void tcp_chrono_stop(struct sock *sk, c
         * it's the "most interesting" or current chrono we are
         * tracking and starts busy chrono if we have pending data.
         */
-       if (tcp_write_queue_empty(sk))
+       if (tcp_rtx_and_write_queues_empty(sk))
                tcp_chrono_set(tp, TCP_CHRONO_UNSPEC);
        else if (type == tp->chrono_type)
                tcp_chrono_set(tp, TCP_CHRONO_BUSY);
@@@ -2263,7 -2316,7 +2316,7 @@@ static bool tcp_write_xmit(struct sock 
  
                if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
                        /* "skb_mstamp" is used as a start point for the retransmit timer */
-                       skb->skb_mstamp = tp->tcp_mstamp;
+                       tcp_update_skb_after_send(tp, skb);
                        goto repair; /* Skip network transmission */
                }
  
                                                    nonagle);
  
                if (skb->len > limit &&
-                   unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
+                   unlikely(tso_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE,
+                                         skb, limit, mss_now, gfp)))
                        break;
  
                if (test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags))
@@@ -2342,7 -2396,7 +2396,7 @@@ repair
                tcp_cwnd_validate(sk, is_cwnd_limited);
                return false;
        }
-       return !tp->packets_out && tcp_send_head(sk);
+       return !tp->packets_out && !tcp_write_queue_empty(sk);
  }
  
  bool tcp_schedule_loss_probe(struct sock *sk)
        struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
        u32 timeout, rto_delta_us;
+       int early_retrans;
  
        /* Don't do any loss probe on a Fast Open connection before 3WHS
         * finishes.
        if (tp->fastopen_rsk)
                return false;
  
+       early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans;
        /* Schedule a loss probe in 2*RTT for SACK capable connections
         * in Open state, that are either limited by cwnd or application.
         */
-       if ((sysctl_tcp_early_retrans != 3 && sysctl_tcp_early_retrans != 4) ||
+       if ((early_retrans != 3 && early_retrans != 4) ||
            !tp->packets_out || !tcp_is_sack(tp) ||
            icsk->icsk_ca_state != TCP_CA_Open)
                return false;
  
        if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) &&
-            tcp_send_head(sk))
+            !tcp_write_queue_empty(sk))
                return false;
  
        /* Probe timeout is 2*rtt. Add minimum RTO to account
@@@ -2419,18 -2475,14 +2475,14 @@@ void tcp_send_loss_probe(struct sock *s
        int mss = tcp_current_mss(sk);
  
        skb = tcp_send_head(sk);
-       if (skb) {
-               if (tcp_snd_wnd_test(tp, skb, mss)) {
-                       pcount = tp->packets_out;
-                       tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC);
-                       if (tp->packets_out > pcount)
-                               goto probe_sent;
-                       goto rearm_timer;
-               }
-               skb = tcp_write_queue_prev(sk, skb);
-       } else {
-               skb = tcp_write_queue_tail(sk);
+       if (skb && tcp_snd_wnd_test(tp, skb, mss)) {
+               pcount = tp->packets_out;
+               tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC);
+               if (tp->packets_out > pcount)
+                       goto probe_sent;
+               goto rearm_timer;
        }
+       skb = skb_rb_last(&sk->tcp_rtx_queue);
  
        /* At most one outstanding TLP retransmission. */
        if (tp->tlp_high_seq)
                goto rearm_timer;
  
        if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) {
-               if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss,
+               if (unlikely(tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
+                                         (pcount - 1) * mss, mss,
                                          GFP_ATOMIC)))
                        goto rearm_timer;
-               skb = tcp_write_queue_next(sk, skb);
+               skb = skb_rb_next(skb);
        }
  
        if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
@@@ -2651,7 -2704,7 +2704,7 @@@ void tcp_skb_collapse_tstamp(struct sk_
  static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
  {
        struct tcp_sock *tp = tcp_sk(sk);
-       struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
+       struct sk_buff *next_skb = skb_rb_next(skb);
        int skb_size, next_skb_size;
  
        skb_size = skb->len;
        }
        tcp_highest_sack_replace(sk, next_skb, skb);
  
-       tcp_unlink_write_queue(next_skb, sk);
        if (next_skb->ip_summed == CHECKSUM_PARTIAL)
                skb->ip_summed = CHECKSUM_PARTIAL;
  
  
        tcp_skb_collapse_tstamp(skb, next_skb);
  
-       sk_wmem_free_skb(sk, next_skb);
+       tcp_rtx_queue_unlink_and_free(next_skb, sk);
        return true;
  }
  
@@@ -2708,8 -2759,6 +2759,6 @@@ static bool tcp_can_collapse(const stru
                return false;
        if (skb_cloned(skb))
                return false;
-       if (skb == tcp_send_head(sk))
-               return false;
        /* Some heuristics for collapsing over SACK'd could be invented */
        if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
                return false;
@@@ -2727,12 -2776,12 +2776,12 @@@ static void tcp_retrans_try_collapse(st
        struct sk_buff *skb = to, *tmp;
        bool first = true;
  
-       if (!sysctl_tcp_retrans_collapse)
+       if (!sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse)
                return;
        if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
                return;
  
-       tcp_for_write_queue_from_safe(skb, tmp, sk) {
+       skb_rbtree_walk_from_safe(skb, tmp) {
                if (!tcp_can_collapse(sk, skb))
                        break;
  
@@@ -2807,7 -2856,8 +2856,8 @@@ int __tcp_retransmit_skb(struct sock *s
  
        len = cur_mss * segs;
        if (skb->len > len) {
-               if (tcp_fragment(sk, skb, len, cur_mss, GFP_ATOMIC))
+               if (tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb, len,
+                                cur_mss, GFP_ATOMIC))
                        return -ENOMEM; /* We'll try again later. */
        } else {
                if (skb_unclone(skb, GFP_ATOMIC))
                     skb_headroom(skb) >= 0xFFFF)) {
                struct sk_buff *nskb;
  
-               nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC);
-               err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
-                            -ENOBUFS;
+               tcp_skb_tsorted_save(skb) {
+                       nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC);
+                       err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
+                                    -ENOBUFS;
+               } tcp_skb_tsorted_restore(skb);
                if (!err) {
-                       skb->skb_mstamp = tp->tcp_mstamp;
+                       tcp_update_skb_after_send(tp, skb);
                        tcp_rate_skb_sent(sk, skb);
                }
        } else {
  
        if (likely(!err)) {
                TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
+               trace_tcp_retransmit_skb(sk, skb);
        } else if (err != -EBUSY) {
                NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
        }
@@@ -2890,36 -2944,25 +2944,25 @@@ int tcp_retransmit_skb(struct sock *sk
   * retransmitted data is acknowledged.  It tries to continue
   * resending the rest of the retransmit queue, until either
   * we've sent it all or the congestion window limit is reached.
-  * If doing SACK, the first ACK which comes back for a timeout
-  * based retransmit packet might feed us FACK information again.
-  * If so, we use it to avoid unnecessarily retransmissions.
   */
  void tcp_xmit_retransmit_queue(struct sock *sk)
  {
        const struct inet_connection_sock *icsk = inet_csk(sk);
+       struct sk_buff *skb, *rtx_head, *hole = NULL;
        struct tcp_sock *tp = tcp_sk(sk);
-       struct sk_buff *skb;
-       struct sk_buff *hole = NULL;
        u32 max_segs;
        int mib_idx;
  
        if (!tp->packets_out)
                return;
  
-       if (tp->retransmit_skb_hint) {
-               skb = tp->retransmit_skb_hint;
-       } else {
-               skb = tcp_write_queue_head(sk);
-       }
+       rtx_head = tcp_rtx_queue_head(sk);
+       skb = tp->retransmit_skb_hint ?: rtx_head;
        max_segs = tcp_tso_segs(sk, tcp_current_mss(sk));
-       tcp_for_write_queue_from(skb, sk) {
+       skb_rbtree_walk_from(skb) {
                __u8 sacked;
                int segs;
  
-               if (skb == tcp_send_head(sk))
-                       break;
                if (tcp_pacing_check(sk))
                        break;
  
                if (tcp_in_cwnd_reduction(sk))
                        tp->prr_out += tcp_skb_pcount(skb);
  
-               if (skb == tcp_write_queue_head(sk) &&
+               if (skb == rtx_head &&
                    icsk->icsk_pending != ICSK_TIME_REO_TIMEOUT)
                        inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
                                                  inet_csk(sk)->icsk_rto,
@@@ -3006,12 -3049,15 +3049,15 @@@ void tcp_send_fin(struct sock *sk
         * Note: in the latter case, FIN packet will be sent after a timeout,
         * as TCP stack thinks it has already been transmitted.
         */
-       if (tskb && (tcp_send_head(sk) || tcp_under_memory_pressure(sk))) {
+       if (!tskb && tcp_under_memory_pressure(sk))
+               tskb = skb_rb_last(&sk->tcp_rtx_queue);
+       if (tskb) {
  coalesce:
                TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN;
                TCP_SKB_CB(tskb)->end_seq++;
                tp->write_seq++;
-               if (!tcp_send_head(sk)) {
+               if (tcp_write_queue_empty(sk)) {
                        /* This means tskb was already sent.
                         * Pretend we included the FIN on previous transmit.
                         * We need to set tp->snd_nxt to the value it would have
                                goto coalesce;
                        return;
                }
+               INIT_LIST_HEAD(&skb->tcp_tsorted_anchor);
                skb_reserve(skb, MAX_TCP_HEADER);
                sk_forced_mem_schedule(sk, skb->truesize);
                /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
@@@ -3064,6 -3111,11 +3111,11 @@@ void tcp_send_active_reset(struct sock 
        /* Send it off. */
        if (tcp_transmit_skb(sk, skb, 0, priority))
                NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
+       /* skb of trace_tcp_send_reset() keeps the skb that caused RST,
+        * skb here is different to the troublesome skb, so use NULL
+        */
+       trace_tcp_send_reset(sk, NULL);
  }
  
  /* Send a crossed SYN-ACK during socket establishment.
@@@ -3076,20 -3128,24 +3128,24 @@@ int tcp_send_synack(struct sock *sk
  {
        struct sk_buff *skb;
  
-       skb = tcp_write_queue_head(sk);
+       skb = tcp_rtx_queue_head(sk);
        if (!skb || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
-               pr_debug("%s: wrong queue state\n", __func__);
+               pr_err("%s: wrong queue state\n", __func__);
                return -EFAULT;
        }
        if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) {
                if (skb_cloned(skb)) {
-                       struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
+                       struct sk_buff *nskb;
+                       tcp_skb_tsorted_save(skb) {
+                               nskb = skb_copy(skb, GFP_ATOMIC);
+                       } tcp_skb_tsorted_restore(skb);
                        if (!nskb)
                                return -ENOMEM;
-                       tcp_unlink_write_queue(skb, sk);
+                       INIT_LIST_HEAD(&nskb->tcp_tsorted_anchor);
+                       tcp_rtx_queue_unlink_and_free(skb, sk);
                        __skb_header_release(nskb);
-                       __tcp_add_write_queue_head(sk, nskb);
-                       sk_wmem_free_skb(sk, skb);
+                       tcp_rbtree_insert(&sk->tcp_rtx_queue, nskb);
                        sk->sk_wmem_queued += nskb->truesize;
                        sk_mem_charge(sk, nskb->truesize);
                        skb = nskb;
@@@ -3166,8 -3222,8 +3222,8 @@@ struct sk_buff *tcp_make_synack(const s
        md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req));
  #endif
        skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4);
-       tcp_header_size = tcp_synack_options(req, mss, skb, &opts, md5, foc) +
-                         sizeof(*th);
+       tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5,
+                                            foc) + sizeof(*th);
  
        skb_push(skb, tcp_header_size);
        skb_reset_transport_header(skb);
@@@ -3268,7 -3324,7 +3324,7 @@@ static void tcp_connect_init(struct soc
        if (rcv_wnd == 0)
                rcv_wnd = dst_metric(dst, RTAX_INITRWND);
  
-       tcp_select_initial_window(tcp_full_space(sk),
+       tcp_select_initial_window(sk, tcp_full_space(sk),
                                  tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
                                  &tp->rcv_wnd,
                                  &tp->window_clamp,
@@@ -3307,7 -3363,6 +3363,6 @@@ static void tcp_connect_queue_skb(struc
  
        tcb->end_seq += skb->len;
        __skb_header_release(skb);
-       __tcp_add_write_queue_tail(sk, skb);
        sk->sk_wmem_queued += skb->truesize;
        sk_mem_charge(sk, skb->truesize);
        tp->write_seq = tcb->end_seq;
@@@ -3355,6 -3410,7 +3410,7 @@@ static int tcp_send_syn_data(struct soc
                int copied = copy_from_iter(skb_put(syn_data, space), space,
                                            &fo->data->msg_iter);
                if (unlikely(!copied)) {
+                       tcp_skb_tsorted_anchor_cleanup(syn_data);
                        kfree_skb(syn_data);
                        goto fallback;
                }
        TCP_SKB_CB(syn_data)->tcp_flags = TCPHDR_ACK | TCPHDR_PSH;
        if (!err) {
                tp->syn_data = (fo->copied > 0);
+               tcp_rbtree_insert(&sk->tcp_rtx_queue, syn_data);
                NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT);
                goto done;
        }
  
-       /* data was not sent, this is our new send_head */
-       sk->sk_send_head = syn_data;
+       /* data was not sent, put it in write_queue */
+       __skb_queue_tail(&sk->sk_write_queue, syn_data);
        tp->packets_out -= tcp_skb_pcount(syn_data);
  
  fallback:
@@@ -3433,6 -3490,7 +3490,7 @@@ int tcp_connect(struct sock *sk
        tp->retrans_stamp = tcp_time_stamp(tp);
        tcp_connect_queue_skb(sk, buff);
        tcp_ecn_send_syn(sk, buff);
+       tcp_rbtree_insert(&sk->tcp_rtx_queue, buff);
  
        /* Send off SYN; include data in Fast Open. */
        err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) :
@@@ -3627,7 -3685,8 +3685,8 @@@ int tcp_write_wakeup(struct sock *sk, i
                    skb->len > mss) {
                        seg_size = min(seg_size, mss);
                        TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
-                       if (tcp_fragment(sk, skb, seg_size, mss, GFP_ATOMIC))
+                       if (tcp_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE,
+                                        skb, seg_size, mss, GFP_ATOMIC))
                                return -1;
                } else if (!tcp_skb_pcount(skb))
                        tcp_set_skb_tso_segs(skb, mss);
@@@ -3657,7 -3716,7 +3716,7 @@@ void tcp_send_probe0(struct sock *sk
  
        err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE);
  
-       if (tp->packets_out || !tcp_send_head(sk)) {
+       if (tp->packets_out || tcp_write_queue_empty(sk)) {
                /* Cancel probe timer, if it is not required. */
                icsk->icsk_probes_out = 0;
                icsk->icsk_backoff = 0;
@@@ -3698,6 -3757,7 +3757,7 @@@ int tcp_rtx_synack(const struct sock *s
                __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
                if (unlikely(tcp_passive_fastopen(sk)))
                        tcp_sk(sk)->total_retrans++;
+               trace_tcp_retransmit_synack(sk, req);
        }
        return res;
  }
diff --combined net/ipv4/udp.c
index 02ec9a3493033cf044b31724c340ce0cfa9add20,a6699af0553968e41d69b4201459e696b050badc..e4ff25c947c5e5b21ac4986d0327339f4f60d321
@@@ -1209,8 -1209,7 +1209,7 @@@ static void udp_rmem_release(struct soc
        if (likely(partial)) {
                up->forward_deficit += size;
                size = up->forward_deficit;
-               if (size < (sk->sk_rcvbuf >> 2) &&
-                   !skb_queue_empty(&up->reader_queue))
+               if (size < (sk->sk_rcvbuf >> 2))
                        return;
        } else {
                size += up->forward_deficit;
@@@ -1853,7 -1852,7 +1852,7 @@@ static int udp_queue_rcv_skb(struct soc
                 */
  
                /* if we're overly short, let UDP handle it */
 -              encap_rcv = ACCESS_ONCE(up->encap_rcv);
 +              encap_rcv = READ_ONCE(up->encap_rcv);
                if (encap_rcv) {
                        int ret;
  
@@@ -2298,7 -2297,7 +2297,7 @@@ void udp_destroy_sock(struct sock *sk
        unlock_sock_fast(sk, slow);
        if (static_key_false(&udp_encap_needed) && up->encap_type) {
                void (*encap_destroy)(struct sock *sk);
 -              encap_destroy = ACCESS_ONCE(up->encap_destroy);
 +              encap_destroy = READ_ONCE(up->encap_destroy);
                if (encap_destroy)
                        encap_destroy(sk);
        }
diff --combined net/ipv6/ah6.c
index 3bd9d806b506f603fcf546d054b6316b6c3ac35c,37bb33fbc742542dd9b99b7189188cfa7bf12048..78c974391567683ef08799d5c99dc217ff2789f9
@@@ -271,6 -271,7 +271,7 @@@ static int ipv6_clear_mutable_options(s
                case NEXTHDR_DEST:
                        if (dir == XFRM_POLICY_OUT)
                                ipv6_rearrange_destopt(iph, exthdr.opth);
+                       /* fall through */
                case NEXTHDR_HOP:
                        if (!zero_out_mutable_opts(exthdr.opth)) {
                                net_dbg_ratelimited("overrun %sopts\n",
@@@ -443,7 -444,7 +444,7 @@@ static int ah6_output(struct xfrm_stat
                if (err == -EINPROGRESS)
                        goto out;
  
 -              if (err == -EBUSY)
 +              if (err == -ENOSPC)
                        err = NET_XMIT_DROP;
                goto out_free;
        }
diff --combined net/ipv6/esp6.c
index c04d995df37c36f5ca2432de7392c66c52360879,4000b71bfdc5757c554e8964722417cad1a729e1..a902ff8f59be3ed7e1f28afc234a0e56eca4e684
@@@ -396,7 -396,7 +396,7 @@@ int esp6_output_tail(struct xfrm_state 
        case -EINPROGRESS:
                goto error;
  
 -      case -EBUSY:
 +      case -ENOSPC:
                err = NET_XMIT_DROP;
                break;
  
@@@ -483,8 -483,8 +483,8 @@@ static inline int esp_remove_trailer(st
                goto out;
        }
  
-       if (skb_copy_bits(skb, skb->len - alen - 2, nexthdr, 2))
-               BUG();
+       ret = skb_copy_bits(skb, skb->len - alen - 2, nexthdr, 2);
+       BUG_ON(ret);
  
        ret = -EINVAL;
        padlen = nexthdr[0];
@@@ -559,14 -559,14 +559,14 @@@ static void esp_input_restore_header(st
  static void esp_input_set_header(struct sk_buff *skb, __be32 *seqhi)
  {
        struct xfrm_state *x = xfrm_input_state(skb);
-       struct ip_esp_hdr *esph = (struct ip_esp_hdr *)skb->data;
  
        /* For ESN we move the header forward by 4 bytes to
         * accomodate the high bits.  We will move it back after
         * decryption.
         */
        if ((x->props.flags & XFRM_STATE_ESN)) {
-               esph = skb_push(skb, 4);
+               struct ip_esp_hdr *esph = skb_push(skb, 4);
                *seqhi = esph->spi;
                esph->spi = esph->seq_no;
                esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi;
diff --combined net/ipv6/ip6_tunnel.c
index dab94655415741873e869176d2f06520085d8729,00882fdb12239fe719e1f9669512a9ce3edc0d90..3d3092adf1d2d5962b5fc87bdf08419762d1b1ee
@@@ -471,15 -471,16 +471,16 @@@ static in
  ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
            u8 *type, u8 *code, int *msg, __u32 *info, int offset)
  {
-       const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) skb->data;
-       struct ip6_tnl *t;
-       int rel_msg = 0;
+       const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb->data;
+       struct net *net = dev_net(skb->dev);
        u8 rel_type = ICMPV6_DEST_UNREACH;
        u8 rel_code = ICMPV6_ADDR_UNREACH;
-       u8 tproto;
        __u32 rel_info = 0;
-       __u16 len;
+       struct ip6_tnl *t;
        int err = -ENOENT;
+       int rel_msg = 0;
+       u8 tproto;
+       __u16 len;
  
        /* If the packet doesn't contain the original IPv6 header we are
           in trouble since we might need the source address for further
        if (!t)
                goto out;
  
 -      tproto = ACCESS_ONCE(t->parms.proto);
 +      tproto = READ_ONCE(t->parms.proto);
        if (tproto != ipproto && tproto != 0)
                goto out;
  
        err = 0;
  
        switch (*type) {
-               __u32 teli;
                struct ipv6_tlv_tnl_enc_lim *tel;
-               __u32 mtu;
+               __u32 mtu, teli;
        case ICMPV6_DEST_UNREACH:
                net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n",
                                    t->parms.name);
                }
                break;
        case ICMPV6_PKT_TOOBIG:
+               ip6_update_pmtu(skb, net, htonl(*info), 0, 0,
+                               sock_net_uid(net, NULL));
                mtu = *info - offset;
                if (mtu < IPV6_MIN_MTU)
                        mtu = IPV6_MIN_MTU;
-               t->dev->mtu = mtu;
                len = sizeof(*ipv6h) + ntohs(ipv6h->payload_len);
                if (len > mtu) {
                        rel_type = ICMPV6_PKT_TOOBIG;
                        rel_msg = 1;
                }
                break;
+       case NDISC_REDIRECT:
+               ip6_redirect(skb, net, skb->dev->ifindex, 0,
+                            sock_net_uid(net, NULL));
+               break;
        }
  
        *type = rel_type;
@@@ -559,13 -563,12 +563,12 @@@ static in
  ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
           u8 type, u8 code, int offset, __be32 info)
  {
-       int rel_msg = 0;
-       u8 rel_type = type;
-       u8 rel_code = code;
        __u32 rel_info = ntohl(info);
-       int err;
-       struct sk_buff *skb2;
        const struct iphdr *eiph;
+       struct sk_buff *skb2;
+       int err, rel_msg = 0;
+       u8 rel_type = type;
+       u8 rel_code = code;
        struct rtable *rt;
        struct flowi4 fl4;
  
                rel_type = ICMP_DEST_UNREACH;
                rel_code = ICMP_FRAG_NEEDED;
                break;
-       case NDISC_REDIRECT:
-               rel_type = ICMP_REDIRECT;
-               rel_code = ICMP_REDIR_HOST;
        default:
                return 0;
        }
        eiph = ip_hdr(skb2);
  
        /* Try to guess incoming interface */
-       rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
-                                  eiph->saddr, 0,
-                                  0, 0,
-                                  IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
+       rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, eiph->saddr,
+                                  0, 0, 0, IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
        if (IS_ERR(rt))
                goto out;
  
        skb2->dev = rt->dst.dev;
+       ip_rt_put(rt);
  
        /* route "incoming" packet */
        if (rt->rt_flags & RTCF_LOCAL) {
-               ip_rt_put(rt);
-               rt = NULL;
                rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
-                                          eiph->daddr, eiph->saddr,
-                                          0, 0,
-                                          IPPROTO_IPIP,
-                                          RT_TOS(eiph->tos), 0);
-               if (IS_ERR(rt) ||
-                   rt->dst.dev->type != ARPHRD_TUNNEL) {
+                                          eiph->daddr, eiph->saddr, 0, 0,
+                                          IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
+               if (IS_ERR(rt) || rt->dst.dev->type != ARPHRD_TUNNEL) {
                        if (!IS_ERR(rt))
                                ip_rt_put(rt);
                        goto out;
                }
                skb_dst_set(skb2, &rt->dst);
        } else {
-               ip_rt_put(rt);
                if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos,
                                   skb2->dev) ||
                    skb_dst(skb2)->dev->type != ARPHRD_TUNNEL)
                if (rel_info > dst_mtu(skb_dst(skb2)))
                        goto out;
  
-               skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2, rel_info);
+               skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2,
+                                               rel_info);
        }
-       if (rel_type == ICMP_REDIRECT)
-               skb_dst(skb2)->ops->redirect(skb_dst(skb2), NULL, skb2);
  
        icmp_send(skb2, rel_type, rel_code, htonl(rel_info));
  
@@@ -665,11 -657,10 +657,10 @@@ static in
  ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
           u8 type, u8 code, int offset, __be32 info)
  {
-       int rel_msg = 0;
+       __u32 rel_info = ntohl(info);
+       int err, rel_msg = 0;
        u8 rel_type = type;
        u8 rel_code = code;
-       __u32 rel_info = ntohl(info);
-       int err;
  
        err = ip6_tnl_err(skb, IPPROTO_IPV6, opt, &rel_type, &rel_code,
                          &rel_msg, &rel_info, offset);
@@@ -769,7 -760,8 +760,8 @@@ int ip6_tnl_rcv_ctl(struct ip6_tnl *t
  
                if ((ipv6_addr_is_multicast(laddr) ||
                     likely(ipv6_chk_addr(net, laddr, ldev, 0))) &&
-                   likely(!ipv6_chk_addr(net, raddr, NULL, 0)))
+                   ((p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) ||
+                    likely(!ipv6_chk_addr(net, raddr, NULL, 0))))
                        ret = 1;
        }
        return ret;
@@@ -899,7 -891,7 +891,7 @@@ static int ipxip6_rcv(struct sk_buff *s
        t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr);
  
        if (t) {
 -              u8 tproto = ACCESS_ONCE(t->parms.proto);
 +              u8 tproto = READ_ONCE(t->parms.proto);
  
                if (tproto != ipproto && tproto != 0)
                        goto drop;
@@@ -999,7 -991,8 +991,8 @@@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t
                if (unlikely(!ipv6_chk_addr(net, laddr, ldev, 0)))
                        pr_warn("%s xmit: Local address not yet configured!\n",
                                p->name);
-               else if (!ipv6_addr_is_multicast(raddr) &&
+               else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) &&
+                        !ipv6_addr_is_multicast(raddr) &&
                         unlikely(ipv6_chk_addr(net, raddr, NULL, 0)))
                        pr_warn("%s xmit: Routing loop! Remote address found on this node!\n",
                                p->name);
@@@ -1233,7 -1226,7 +1226,7 @@@ ip4ip6_tnl_xmit(struct sk_buff *skb, st
  
        memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
  
 -      tproto = ACCESS_ONCE(t->parms.proto);
 +      tproto = READ_ONCE(t->parms.proto);
        if (tproto != IPPROTO_IPIP && tproto != 0)
                return -1;
  
@@@ -1303,7 -1296,7 +1296,7 @@@ ip6ip6_tnl_xmit(struct sk_buff *skb, st
        u8 tproto;
        int err;
  
 -      tproto = ACCESS_ONCE(t->parms.proto);
 +      tproto = READ_ONCE(t->parms.proto);
        if ((tproto != IPPROTO_IPV6 && tproto != 0) ||
            ip6_tnl_addr_conflict(t, ipv6h))
                return -1;
@@@ -2168,17 -2161,16 +2161,16 @@@ static struct xfrm6_tunnel ip6ip6_handl
        .priority       =       1,
  };
  
- static void __net_exit ip6_tnl_destroy_tunnels(struct net *net)
+ static void __net_exit ip6_tnl_destroy_tunnels(struct net *net, struct list_head *list)
  {
        struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
        struct net_device *dev, *aux;
        int h;
        struct ip6_tnl *t;
-       LIST_HEAD(list);
  
        for_each_netdev_safe(net, dev, aux)
                if (dev->rtnl_link_ops == &ip6_link_ops)
-                       unregister_netdevice_queue(dev, &list);
+                       unregister_netdevice_queue(dev, list);
  
        for (h = 0; h < IP6_TUNNEL_HASH_SIZE; h++) {
                t = rtnl_dereference(ip6n->tnls_r_l[h]);
                         * been added to the list by the previous loop.
                         */
                        if (!net_eq(dev_net(t->dev), net))
-                               unregister_netdevice_queue(t->dev, &list);
+                               unregister_netdevice_queue(t->dev, list);
                        t = rtnl_dereference(t->next);
                }
        }
-       unregister_netdevice_many(&list);
  }
  
  static int __net_init ip6_tnl_init_net(struct net *net)
@@@ -2236,16 -2226,21 +2226,21 @@@ err_alloc_dev
        return err;
  }
  
- static void __net_exit ip6_tnl_exit_net(struct net *net)
+ static void __net_exit ip6_tnl_exit_batch_net(struct list_head *net_list)
  {
+       struct net *net;
+       LIST_HEAD(list);
        rtnl_lock();
-       ip6_tnl_destroy_tunnels(net);
+       list_for_each_entry(net, net_list, exit_list)
+               ip6_tnl_destroy_tunnels(net, &list);
+       unregister_netdevice_many(&list);
        rtnl_unlock();
  }
  
  static struct pernet_operations ip6_tnl_net_ops = {
        .init = ip6_tnl_init_net,
-       .exit = ip6_tnl_exit_net,
+       .exit_batch = ip6_tnl_exit_batch_net,
        .id   = &ip6_tnl_net_id,
        .size = sizeof(struct ip6_tnl_net),
  };
diff --combined net/mac80211/sta_info.c
index 214d2ba02877d2fcb45980786528a4e650c9644d,9673e157bf8fd5be8f85277dfd41c0b46d634e17..a3060e55122c666eb3eedb6c8c93714e0783cab8
@@@ -329,10 -329,12 +329,12 @@@ struct sta_info *sta_info_alloc(struct 
                sta->mesh = kzalloc(sizeof(*sta->mesh), gfp);
                if (!sta->mesh)
                        goto free;
+               sta->mesh->plink_sta = sta;
                spin_lock_init(&sta->mesh->plink_lock);
                if (ieee80211_vif_is_mesh(&sdata->vif) &&
                    !sdata->u.mesh.user_mpm)
-                       init_timer(&sta->mesh->plink_timer);
+                       timer_setup(&sta->mesh->plink_timer, mesh_plink_timer,
+                                   0);
                sta->mesh->nonpeer_pm = NL80211_MESH_POWER_ACTIVE;
        }
  #endif
@@@ -515,6 -517,31 +517,31 @@@ static int sta_info_insert_drv_state(st
        return err;
  }
  
+ static void
+ ieee80211_recalc_p2p_go_ps_allowed(struct ieee80211_sub_if_data *sdata)
+ {
+       struct ieee80211_local *local = sdata->local;
+       bool allow_p2p_go_ps = sdata->vif.p2p;
+       struct sta_info *sta;
+       rcu_read_lock();
+       list_for_each_entry_rcu(sta, &local->sta_list, list) {
+               if (sdata != sta->sdata ||
+                   !test_sta_flag(sta, WLAN_STA_ASSOC))
+                       continue;
+               if (!sta->sta.support_p2p_ps) {
+                       allow_p2p_go_ps = false;
+                       break;
+               }
+       }
+       rcu_read_unlock();
+       if (allow_p2p_go_ps != sdata->vif.bss_conf.allow_p2p_go_ps) {
+               sdata->vif.bss_conf.allow_p2p_go_ps = allow_p2p_go_ps;
+               ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_P2P_PS);
+       }
+ }
  /*
   * should be called with sta_mtx locked
   * this function replaces the mutex lock
@@@ -561,6 -588,13 +588,13 @@@ static int sta_info_insert_finish(struc
                goto out_remove;
  
        set_sta_flag(sta, WLAN_STA_INSERTED);
+       if (sta->sta_state >= IEEE80211_STA_ASSOC) {
+               ieee80211_recalc_min_chandef(sta->sdata);
+               if (!sta->sta.support_p2p_ps)
+                       ieee80211_recalc_p2p_go_ps_allowed(sta->sdata);
+       }
        /* accept BA sessions now */
        clear_sta_flag(sta, WLAN_STA_BLOCK_BA);
  
@@@ -1788,31 -1822,6 +1822,6 @@@ void ieee80211_sta_set_buffered(struct 
  }
  EXPORT_SYMBOL(ieee80211_sta_set_buffered);
  
- static void
- ieee80211_recalc_p2p_go_ps_allowed(struct ieee80211_sub_if_data *sdata)
- {
-       struct ieee80211_local *local = sdata->local;
-       bool allow_p2p_go_ps = sdata->vif.p2p;
-       struct sta_info *sta;
-       rcu_read_lock();
-       list_for_each_entry_rcu(sta, &local->sta_list, list) {
-               if (sdata != sta->sdata ||
-                   !test_sta_flag(sta, WLAN_STA_ASSOC))
-                       continue;
-               if (!sta->sta.support_p2p_ps) {
-                       allow_p2p_go_ps = false;
-                       break;
-               }
-       }
-       rcu_read_unlock();
-       if (allow_p2p_go_ps != sdata->vif.bss_conf.allow_p2p_go_ps) {
-               sdata->vif.bss_conf.allow_p2p_go_ps = allow_p2p_go_ps;
-               ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_P2P_PS);
-       }
- }
  int sta_info_move_state(struct sta_info *sta,
                        enum ieee80211_sta_state new_state)
  {
@@@ -2008,7 -2017,7 +2017,7 @@@ static void sta_stats_decode_rate(struc
  
  static int sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo)
  {
 -      u16 rate = ACCESS_ONCE(sta_get_last_rx_stats(sta)->last_rate);
 +      u16 rate = READ_ONCE(sta_get_last_rx_stats(sta)->last_rate);
  
        if (rate == STA_STATS_RATE_INVALID)
                return -EINVAL;
index 3a43b3470331bccc4b83a72e593ae3a3ac0c9f18,f73561ca982d01750a88b8b82dc4c732de835d92..3e053cb300709cfb09b93364b33da69c5f9dff2a
@@@ -104,7 -104,7 +104,7 @@@ static inline void ct_write_unlock_bh(u
        spin_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
  }
  
 -static void ip_vs_conn_expire(unsigned long data);
 +static void ip_vs_conn_expire(struct timer_list *t);
  
  /*
   *    Returns hash value for IPVS connection entry
@@@ -185,7 -185,7 +185,7 @@@ static inline int ip_vs_conn_hash(struc
                hlist_add_head_rcu(&cp->c_list, &ip_vs_conn_tab[hash]);
                ret = 1;
        } else {
-               pr_err("%s(): request for already hashed, called from %pF\n",
+               pr_err("%s(): request for already hashed, called from %pS\n",
                       __func__, __builtin_return_address(0));
                ret = 0;
        }
@@@ -457,7 -457,7 +457,7 @@@ EXPORT_SYMBOL_GPL(ip_vs_conn_out_get_pr
  static void __ip_vs_conn_put_notimer(struct ip_vs_conn *cp)
  {
        __ip_vs_conn_put(cp);
 -      ip_vs_conn_expire((unsigned long)cp);
 +      ip_vs_conn_expire(&cp->timer);
  }
  
  /*
@@@ -817,9 -817,9 +817,9 @@@ static void ip_vs_conn_rcu_free(struct 
        kmem_cache_free(ip_vs_conn_cachep, cp);
  }
  
 -static void ip_vs_conn_expire(unsigned long data)
 +static void ip_vs_conn_expire(struct timer_list *t)
  {
 -      struct ip_vs_conn *cp = (struct ip_vs_conn *)data;
 +      struct ip_vs_conn *cp = from_timer(cp, t, timer);
        struct netns_ipvs *ipvs = cp->ipvs;
  
        /*
@@@ -909,7 -909,7 +909,7 @@@ ip_vs_conn_new(const struct ip_vs_conn_
        }
  
        INIT_HLIST_NODE(&cp->c_list);
 -      setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
 +      timer_setup(&cp->timer, ip_vs_conn_expire, 0);
        cp->ipvs           = ipvs;
        cp->af             = p->af;
        cp->daf            = dest_af;
index b47e266c6eca88d98df1e67efb28e1923f9497a5,fac8c802b4eaf0605403c1ebf48ac82ef41775c6..fff213eacf2aeda24f15d07eac1b1d4f64df1e34
@@@ -300,7 -300,7 +300,7 @@@ static int ip_vs_svc_hash(struct ip_vs_
        unsigned int hash;
  
        if (svc->flags & IP_VS_SVC_F_HASHED) {
-               pr_err("%s(): request for already hashed, called from %pF\n",
+               pr_err("%s(): request for already hashed, called from %pS\n",
                       __func__, __builtin_return_address(0));
                return 0;
        }
  static int ip_vs_svc_unhash(struct ip_vs_service *svc)
  {
        if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
-               pr_err("%s(): request for unhash flagged, called from %pF\n",
+               pr_err("%s(): request for unhash flagged, called from %pS\n",
                       __func__, __builtin_return_address(0));
                return 0;
        }
@@@ -1146,9 -1146,9 +1146,9 @@@ ip_vs_del_dest(struct ip_vs_service *sv
        return 0;
  }
  
 -static void ip_vs_dest_trash_expire(unsigned long data)
 +static void ip_vs_dest_trash_expire(struct timer_list *t)
  {
 -      struct netns_ipvs *ipvs = (struct netns_ipvs *)data;
 +      struct netns_ipvs *ipvs = from_timer(ipvs, t, dest_trash_timer);
        struct ip_vs_dest *dest, *next;
        unsigned long now = jiffies;
  
@@@ -2034,12 -2034,16 +2034,16 @@@ static int ip_vs_info_seq_show(struct s
                seq_puts(seq,
                         "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
        } else {
+               struct net *net = seq_file_net(seq);
+               struct netns_ipvs *ipvs = net_ipvs(net);
                const struct ip_vs_service *svc = v;
                const struct ip_vs_iter *iter = seq->private;
                const struct ip_vs_dest *dest;
                struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler);
                char *sched_name = sched ? sched->name : "none";
  
+               if (svc->ipvs != ipvs)
+                       return 0;
                if (iter->table == ip_vs_svc_table) {
  #ifdef CONFIG_IP_VS_IPV6
                        if (svc->af == AF_INET6)
@@@ -4019,7 -4023,8 +4023,7 @@@ int __net_init ip_vs_control_net_init(s
  
        INIT_LIST_HEAD(&ipvs->dest_trash);
        spin_lock_init(&ipvs->dest_trash_lock);
 -      setup_timer(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire,
 -                  (unsigned long) ipvs);
 +      timer_setup(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire, 0);
        atomic_set(&ipvs->ftpsvc_counter, 0);
        atomic_set(&ipvs->nullsvc_counter, 0);
        atomic_set(&ipvs->conn_out_counter, 0);
diff --combined net/wireless/nl80211.c
index eb866647a27ac3847f2a6eca3709945808dd70e6,fce2cbe6a19390f7059137ba140296c6d4108d21..bb16f1ec766ead1e65fb6e4196a4278ff09a67a3
@@@ -2130,6 -2130,15 +2130,15 @@@ static int nl80211_parse_chandef(struc
                case NL80211_CHAN_HT40MINUS:
                        cfg80211_chandef_create(chandef, chandef->chan,
                                                chantype);
+                       /* user input for center_freq is incorrect */
+                       if (info->attrs[NL80211_ATTR_CENTER_FREQ1] &&
+                           chandef->center_freq1 != nla_get_u32(
+                                       info->attrs[NL80211_ATTR_CENTER_FREQ1]))
+                               return -EINVAL;
+                       /* center_freq2 must be zero */
+                       if (info->attrs[NL80211_ATTR_CENTER_FREQ2] &&
+                           nla_get_u32(info->attrs[NL80211_ATTR_CENTER_FREQ2]))
+                               return -EINVAL;
                        break;
                default:
                        return -EINVAL;
@@@ -5677,6 -5686,11 +5686,11 @@@ static int nl80211_req_set_reg(struct s
        }
  }
  
+ static int nl80211_reload_regdb(struct sk_buff *skb, struct genl_info *info)
+ {
+       return reg_reload_regdb();
+ }
  static int nl80211_get_mesh_config(struct sk_buff *skb,
                                   struct genl_info *info)
  {
@@@ -6618,6 -6632,77 +6632,77 @@@ static bool cfg80211_off_channel_oper_a
        return regulatory_pre_cac_allowed(wdev->wiphy);
  }
  
+ static int
+ nl80211_check_scan_flags(struct wiphy *wiphy, struct wireless_dev *wdev,
+                        void *request, struct nlattr **attrs,
+                        bool is_sched_scan)
+ {
+       u8 *mac_addr, *mac_addr_mask;
+       u32 *flags;
+       enum nl80211_feature_flags randomness_flag;
+       if (!attrs[NL80211_ATTR_SCAN_FLAGS])
+               return 0;
+       if (is_sched_scan) {
+               struct cfg80211_sched_scan_request *req = request;
+               randomness_flag = wdev ?
+                                 NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR :
+                                 NL80211_FEATURE_ND_RANDOM_MAC_ADDR;
+               flags = &req->flags;
+               mac_addr = req->mac_addr;
+               mac_addr_mask = req->mac_addr_mask;
+       } else {
+               struct cfg80211_scan_request *req = request;
+               randomness_flag = NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR;
+               flags = &req->flags;
+               mac_addr = req->mac_addr;
+               mac_addr_mask = req->mac_addr_mask;
+       }
+       *flags = nla_get_u32(attrs[NL80211_ATTR_SCAN_FLAGS]);
+       if ((*flags & NL80211_SCAN_FLAG_LOW_PRIORITY) &&
+           !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN))
+               return -EOPNOTSUPP;
+       if (*flags & NL80211_SCAN_FLAG_RANDOM_ADDR) {
+               int err;
+               if (!(wiphy->features & randomness_flag) ||
+                   (wdev && wdev->current_bss))
+                       return -EOPNOTSUPP;
+               err = nl80211_parse_random_mac(attrs, mac_addr, mac_addr_mask);
+               if (err)
+                       return err;
+       }
+       if ((*flags & NL80211_SCAN_FLAG_FILS_MAX_CHANNEL_TIME) &&
+           !wiphy_ext_feature_isset(wiphy,
+                                    NL80211_EXT_FEATURE_FILS_MAX_CHANNEL_TIME))
+               return -EOPNOTSUPP;
+       if ((*flags & NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP) &&
+          !wiphy_ext_feature_isset(wiphy,
+                                   NL80211_EXT_FEATURE_ACCEPT_BCAST_PROBE_RESP))
+               return -EOPNOTSUPP;
+       if ((*flags & NL80211_SCAN_FLAG_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION) &&
+           !wiphy_ext_feature_isset(wiphy,
+                                    NL80211_EXT_FEATURE_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION))
+               return -EOPNOTSUPP;
+       if ((*flags & NL80211_SCAN_FLAG_OCE_PROBE_REQ_HIGH_TX_RATE) &&
+           !wiphy_ext_feature_isset(wiphy,
+                                    NL80211_EXT_FEATURE_OCE_PROBE_REQ_HIGH_TX_RATE))
+               return -EOPNOTSUPP;
+       return 0;
+ }
  static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
  {
        struct cfg80211_registered_device *rdev = info->user_ptr[0];
                        nla_get_flag(info->attrs[NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY]);
        }
  
-       if (info->attrs[NL80211_ATTR_SCAN_FLAGS]) {
-               request->flags = nla_get_u32(
-                       info->attrs[NL80211_ATTR_SCAN_FLAGS]);
-               if ((request->flags & NL80211_SCAN_FLAG_LOW_PRIORITY) &&
-                   !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) {
-                       err = -EOPNOTSUPP;
-                       goto out_free;
-               }
-               if (request->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) {
-                       if (!(wiphy->features &
-                                       NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR)) {
-                               err = -EOPNOTSUPP;
-                               goto out_free;
-                       }
-                       if (wdev->current_bss) {
-                               err = -EOPNOTSUPP;
-                               goto out_free;
-                       }
-                       err = nl80211_parse_random_mac(info->attrs,
-                                                      request->mac_addr,
-                                                      request->mac_addr_mask);
-                       if (err)
-                               goto out_free;
-               }
-       }
+       err = nl80211_check_scan_flags(wiphy, wdev, request, info->attrs,
+                                      false);
+       if (err)
+               goto out_free;
  
        request->no_cck =
                nla_get_flag(info->attrs[NL80211_ATTR_TX_NO_CCK_RATE]);
@@@ -7298,37 -7359,9 +7359,9 @@@ nl80211_parse_sched_scan(struct wiphy *
                       request->ie_len);
        }
  
-       if (attrs[NL80211_ATTR_SCAN_FLAGS]) {
-               request->flags = nla_get_u32(
-                       attrs[NL80211_ATTR_SCAN_FLAGS]);
-               if ((request->flags & NL80211_SCAN_FLAG_LOW_PRIORITY) &&
-                   !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) {
-                       err = -EOPNOTSUPP;
-                       goto out_free;
-               }
-               if (request->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) {
-                       u32 flg = NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR;
-                       if (!wdev) /* must be net-detect */
-                               flg = NL80211_FEATURE_ND_RANDOM_MAC_ADDR;
-                       if (!(wiphy->features & flg)) {
-                               err = -EOPNOTSUPP;
-                               goto out_free;
-                       }
-                       if (wdev && wdev->current_bss) {
-                               err = -EOPNOTSUPP;
-                               goto out_free;
-                       }
-                       err = nl80211_parse_random_mac(attrs, request->mac_addr,
-                                                      request->mac_addr_mask);
-                       if (err)
-                               goto out_free;
-               }
-       }
+       err = nl80211_check_scan_flags(wiphy, wdev, request, attrs, true);
+       if (err)
+               goto out_free;
  
        if (attrs[NL80211_ATTR_SCHED_SCAN_DELAY])
                request->delay =
@@@ -8932,8 -8965,14 +8965,14 @@@ static int nl80211_connect(struct sk_bu
  
        if (info->attrs[NL80211_ATTR_USE_MFP]) {
                connect.mfp = nla_get_u32(info->attrs[NL80211_ATTR_USE_MFP]);
+               if (connect.mfp == NL80211_MFP_OPTIONAL &&
+                   !wiphy_ext_feature_isset(&rdev->wiphy,
+                                            NL80211_EXT_FEATURE_MFP_OPTIONAL))
+                       return -EOPNOTSUPP;
                if (connect.mfp != NL80211_MFP_REQUIRED &&
-                   connect.mfp != NL80211_MFP_NO)
+                   connect.mfp != NL80211_MFP_NO &&
+                   connect.mfp != NL80211_MFP_OPTIONAL)
                        return -EINVAL;
        } else {
                connect.mfp = NL80211_MFP_NO;
@@@ -12684,6 -12723,12 +12723,12 @@@ static const struct genl_ops nl80211_op
                .policy = nl80211_policy,
                .flags = GENL_ADMIN_PERM,
        },
+       {
+               .cmd = NL80211_CMD_RELOAD_REGDB,
+               .doit = nl80211_reload_regdb,
+               .policy = nl80211_policy,
+               .flags = GENL_ADMIN_PERM,
+       },
        {
                .cmd = NL80211_CMD_GET_MESH_CONFIG,
                .doit = nl80211_get_mesh_config,
@@@ -13812,9 -13857,7 +13857,7 @@@ void nl80211_send_roamed(struct cfg8021
                     info->req_ie)) ||
            (info->resp_ie &&
             nla_put(msg, NL80211_ATTR_RESP_IE, info->resp_ie_len,
-                    info->resp_ie)) ||
-           (info->authorized &&
-            nla_put_flag(msg, NL80211_ATTR_PORT_AUTHORIZED)))
+                    info->resp_ie)))
                goto nla_put_failure;
  
        genlmsg_end(msg, hdr);
                                NL80211_MCGRP_MLME, gfp);
        return;
  
+  nla_put_failure:
+       genlmsg_cancel(msg, hdr);
+       nlmsg_free(msg);
+ }
+ void nl80211_send_port_authorized(struct cfg80211_registered_device *rdev,
+                                 struct net_device *netdev, const u8 *bssid)
+ {
+       struct sk_buff *msg;
+       void *hdr;
+       msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+       if (!msg)
+               return;
+       hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_PORT_AUTHORIZED);
+       if (!hdr) {
+               nlmsg_free(msg);
+               return;
+       }
+       if (nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, bssid))
+               goto nla_put_failure;
+       genlmsg_end(msg, hdr);
+       genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
+                               NL80211_MCGRP_MLME, GFP_KERNEL);
+       return;
   nla_put_failure:
        genlmsg_cancel(msg, hdr);
        nlmsg_free(msg);
@@@ -14201,7 -14274,7 +14274,7 @@@ static bool __nl80211_unexpected_frame(
        struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
        struct sk_buff *msg;
        void *hdr;
 -      u32 nlportid = ACCESS_ONCE(wdev->ap_unexpected_nlportid);
 +      u32 nlportid = READ_ONCE(wdev->ap_unexpected_nlportid);
  
        if (!nlportid)
                return false;